《Pandas Cookbook》第09章 合併Pandas對象

 In[1]: import pandas as pd
        import numpy as np

        import matplotlib.pyplot as plt
        %matplotlib inline

1. DataFrame添加新的行

# 讀取names數據集
 In[2]: names = pd.read_csv('data/names.csv')
        names
Out[2]: 
# 用loc直接賦值新的行
 In[3]: new_data_list = ['Aria', 1]
        names.loc[4] = new_data_list
        names
Out[3]: 
# 用loc的標籤直接賦值新的行
 In[4]: names.loc['five'] = ['Zach', 3]
        names
Out[4]: 
# 也可以用字典賦值新行
 In[5]: names.loc[len(names)] = {'Name':'Zayd', 'Age':2}
        names
Out[5]: 
 In[6]: names
Out[6]: 
# 字典可以打亂列名的順序
 In[7]: names.loc[len(names)] = pd.Series({'Age':32, 'Name':'Dean'})
        names
Out[7]: 
# 直接append一個字典
 In[8]: names = pd.read_csv('data/names.csv')
        names.append({'Name':'Aria', 'Age':1})
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-8-562aecc73587> in <module>()
      1 # Use append with fresh copy of names
      2 names = pd.read_csv('data/names.csv')
----> 3 names.append({'Name':'Aria', 'Age':1})

/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/frame.py in append(self, other, ignore_index, verify_integrity)
   4515                 other = Series(other)
   4516             if other.name is None and not ignore_index:
-> 4517                 raise TypeError('Can only append a Series if ignore_index=True'
   4518                                 ' or if the Series has a name')
   4519 

TypeError: Can only append a Series if ignore_index=True or if the Series has a name
# 按照錯誤提示,加上ignore_index=True
 In[9]: names.append({'Name':'Aria', 'Age':1}, ignore_index=True)
Out[9]: 
# 設定行索引
 In[10]: names.index = ['Canada', 'Canada', 'USA', 'USA']
         names
Out[10]: 
# 添加一行
 In[11]: names.append({'Name':'Aria', 'Age':1}, ignore_index=True)
Out[11]: 
# 創建一個Series對象
 In[12]: s = pd.Series({'Name': 'Zach', 'Age': 3}, name=len(names))
         s
Out[12]: Age        3
         Name    Zach
         Name: 4, dtype: object
# append方法可以將DataFrame和Series相連
 In[13]: names.append(s)
Out[13]: 
# append方法可以同時連接多行,只要將對象放到列表中
 In[14]: s1 = pd.Series({'Name': 'Zach', 'Age': 3}, name=len(names))
         s2 = pd.Series({'Name': 'Zayd', 'Age': 2}, name='USA')
         names.append([s1, s2])
Out[14]: 
# 讀取baseball16數據集
 In[15]: bball_16 = pd.read_csv('data/baseball16.csv')
         bball_16.head()
Out[15]: 
# 選取一行,並將其轉換爲字典
 In[16]: data_dict = bball_16.iloc[0].to_dict()
         print(data_dict)
{'playerID': 'altuvjo01', 'yearID': 2016, 'stint': 1, 'teamID': 'HOU', 'lgID': 'AL', 'G': 161, 'AB': 640, 'R': 108, 'H': 216, '2B': 42, '3B': 5, 'HR': 24, 'RBI': 96.0, 'SB': 30.0, 'CS': 10.0, 'BB': 60, 'SO': 70.0, 'IBB': 11.0, 'HBP': 7.0, 'SH': 3.0, 'SF': 7.0, 'GIDP': 15.0}
# 對這個字典做格式處理,如果是字符串則爲空,否則爲缺失值
 In[17]: new_data_dict = {k: '' if isinstance(v, str) else np.nan for k, v in data_dict.items()}
         print(new_data_dict)
{'playerID': '', 'yearID': nan, 'stint': nan, 'teamID': '', 'lgID': '', 'G': nan, 'AB': nan, 'R': nan, 'H': nan, '2B': nan, '3B': nan, 'HR': nan, 'RBI': nan, 'SB': nan, 'CS': nan, 'BB': nan, 'SO': nan, 'IBB': nan, 'HBP': nan, 'SH': nan, 'SF': nan, 'GIDP': nan}

更多

# 將一行數據添加到DataFrame是非常消耗資源的,不能通過循環的方法來做。下面是創建一千行的新數據,用作Series的列表:
 In[18]: random_data = []
         for i in range(1000):
             d = dict()
             for k, v in data_dict.items():
                 if isinstance(v, str):
                     d[k] = np.random.choice(list('abcde'))
                 else:
                     d[k] = np.random.randint(10)
             random_data.append(pd.Series(d, name=i + len(bball_16)))
    
         random_data[0].head()
Out[18]: 2B    2
         3B    6
         AB    8
         BB    2
         CS    0
         Name: 16, dtype: object
# 給上面的append操作計時,1000行的數據用了5秒鐘
 In[19]: %%timeit
         bball_16_copy = bball_16.copy()
         for row in random_data:
             bball_16_copy = bball_16_copy.append(row)
5.36 s ± 298 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# 如果是通過列表的方式append,可以大大節省時間
 In[20]: %%timeit
         bball_16_copy = bball_16.copy()
         bball_16_copy = bball_16_copy.append(random_data)
86.2 ms ± 3.71 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

2. 連接多個DataFrame

# 讀取stocks_2016和stocks_2017兩個數據集,用Symbol作爲行索引名
 In[21]: stocks_2016 = pd.read_csv('data/stocks_2016.csv', index_col='Symbol')
         stocks_2017 = pd.read_csv('data/stocks_2017.csv', index_col='Symbol')
 In[22]: stocks_2016
Out[22]: 
 In[23]: stocks_2017
Out[23]: 
# 將兩個DataFrame放到一個列表中,用pandas的concat方法將它們連接起來
 In[24]: s_list = [stocks_2016, stocks_2017]
         pd.concat(s_list)
Out[24]: 
# keys參數可以給兩個DataFrame命名,該標籤會出現在行索引的最外層,會生成多層索引,names參數可以重命名每個索引層
 In[25]: pd.concat(s_list, keys=['2016', '2017'], names=['Year', 'Symbol'])
Out[25]: 
# 也可以橫向連接。只要將axis參數設爲columns或1
 In[26]: pd.concat(s_list, keys=['2016', '2017'], axis='columns', names=['Year', None])
Out[26]: 
# concat函數默認使用的是外連接,會保留每個DataFrame中的所有行。也可以通過設定join參數,使用內連接:
 In[27]: pd.concat(s_list, join='inner', keys=['2016', '2017'], axis='columns', names=['Year', None])
Out[27]: 

更多

# append是concat方法的超簡化版本,append內部其實就是調用concat。前本節的第二個例子,pd.concat也可以如下實現:
 In[28]: stocks_2016.append(stocks_2017)
Out[28]: 
# 原書沒有下面三行代碼
 In[29]: stocks_2015 = stocks_2016.copy()
 In[30]: stocks_2017
Out[30]: 

3. 比較特朗普和奧巴馬的支持率

# pandas的read_html函數可以從網頁抓取表格數據
 In[31]: base_url = 'http://www.presidency.ucsb.edu/data/popularity.php?pres={}'
         trump_url = base_url.format(45)

         df_list = pd.read_html(trump_url)
         len(df_list)
Out[31]: 14
# 一共返回了14個表的DataFrame,取第一個
 In[32]: df0 = df_list[0]
         df0.shape
Out[32]: (324, 1906)

 In[33]: df0.head(7)
Out[33]: 
# 用match參數匹配table中的字符串
 In[34]: df_list = pd.read_html(trump_url, match='Start Date')
         len(df_list)
Out[34]: 3
# 通過檢查頁面元素的屬性,用attrs參數進行匹配
 In[35]: df_list = pd.read_html(trump_url, match='Start Date', attrs={'align':'center'})
         len(df_list)
Out[35]: 1
# 查看DataFrame的形狀
 In[36]: trump = df_list[0]
         trump.shape
Out[36]: (265, 19)
 In[37]: trump.head(8)
Out[37]: 
# skiprows可以指定跳過一些行,header參數可以指定列名,用parse_dates指定開始和結束日期
 In[38]: df_list = pd.read_html(trump_url, match='Start Date', attrs={'align':'center'}, 
                       header=0, skiprows=[0,1,2,3,5], parse_dates=['Start Date', 'End Date'])
         trump = df_list[0]
         trump.head()
Out[38]: 
# 刪除所有值都是缺失值的列
 In[39]: trump = trump.dropna(axis=1, how='all')
         trump.head()
Out[39]: 
# 統計各列的缺失值個數
 In[40]: trump.isnull().sum()
Out[40]: President         258
         Start Date          0
         End Date            0
         Approving           0
         Disapproving        0
         unsure/no data      0
         dtype: int64

# 缺失值向前填充
 In[41]: trump = trump.ffill()
         trump.head()
Out[41]: 
# 確認數據類型
 In[42]: trump.dtypes
Out[42]: President                 object
         Start Date        datetime64[ns]
         End Date          datetime64[ns]
         Approving                  int64
         Disapproving               int64
         unsure/no data             int64
         dtype: object
# 將前面的步驟做成一個函數,用於獲取任意總統的信息
 In[43]: def get_pres_appr(pres_num):
             base_url = 'http://www.presidency.ucsb.edu/data/popularity.php?pres={}'
             pres_url = base_url.format(pres_num)
             df_list = pd.read_html(pres_url, match='Start Date', attrs={'align':'center'}, 
                                    header=0, skiprows=[0,1,2,3,5], parse_dates=['Start Date', 'End Date'])
             pres = df_list[0].copy()
             pres = pres.dropna(axis=1, how='all')
             pres['President'] = pres['President'].ffill()
             return pres.sort_values('End Date').reset_index(drop=True)

# 括號中的數字是總統的編號,奧巴馬是44
 In[44]: obama = get_pres_appr(44)
         obama.head()
Out[44]: 
# 獲取最近五位總統的數據,輸出每位的前三行數據
 In[45]: pres_41_45 = pd.concat([get_pres_appr(x) for x in range(41,46)], ignore_index=True)
         pres_41_45.groupby('President').head(3)
Out[45]: 
# 確認一下是否有一個日期對應多個支持率
 In[46]: pres_41_45['End Date'].value_counts().head(8)
Out[46]: 1990-03-11    2
         1990-08-12    2
         1990-08-26    2
         2013-10-10    2
         1999-02-09    2
         1992-11-22    2
         1990-05-22    2
         2005-01-05    1
         Name: End Date, dtype: int64
# 去除重複值
 In[47]: pres_41_45 = pres_41_45.drop_duplicates(subset='End Date')
 In[48]: pres_41_45.shape
Out[48]: (3695, 6)
# 對數據做簡單的統計
 In[49]: pres_41_45['President'].value_counts()
Out[49]: Barack Obama          2786
         George W. Bush         270
         Donald J. Trump        259
         William J. Clinton     227
         George Bush            153
         Name: President, dtype: int64

 In[50]: pres_41_45.groupby('President', sort=False).median().round(1)
Out[50]: 
# 畫出每任總統的支持率變化
 In[51]: from matplotlib import cm
         fig, ax = plt.subplots(figsize=(16,6))

         styles = ['-.', '-', ':', '-', ':']
         colors = [.9, .3, .7, .3, .9]
         groups = pres_41_45.groupby('President', sort=False)

         for style, color, (pres, df) in zip(styles, colors, groups):
             df.plot('End Date', 'Approving', ax=ax, label=pres, style=style, color=cm.Greys(color), 
                     title='Presedential Approval Rating')
# 上面的圖是將數據前後串起來,也可以用支持率對在職天數作圖
 In[52]: days_func = lambda x: x - x.iloc[0]
         pres_41_45['Days in Office'] = pres_41_45.groupby('President') \
                                                  ['End Date'] \
                                                  .transform(days_func)
 In[82]: pres_41_45['Days in Office'] = pres_41_45.groupby('President')['End Date'].transform(lambda x: x - x.iloc[0])
         pres_41_45.groupby('President').head(3)
Out[82]: 
# 查看數據類型
 In[83]: pres_41_45.dtypes
Out[83]: President                  object
         Start Date         datetime64[ns]
         End Date           datetime64[ns]
         Approving                   int64
         Disapproving                int64
         unsure/no data              int64
         Days in Office    timedelta64[ns]
         dtype: object
# Days in Office的數據類型是timedelta64[ns],單位是納秒,將其轉換爲整數
 In[86]: pres_41_45['Days in Office'] = pres_41_45['Days in Office'].dt.days
         pres_41_45['Days in Office'].head()
Out[86]: 0     0
         1    32
         2    35
         3    43
         4    46
         Name: Days in Office, dtype: int64
# 轉換數據,使每位總統的支持率各成一列
 In[87]: pres_pivot = pres_41_45.pivot(index='Days in Office', columns='President', values='Approving')
         pres_pivot.head()
Out[87]: 
# 只畫出特朗普和奧巴馬的支持率
 In[88]: plot_kwargs = dict(figsize=(16,6), color=cm.gray([.3, .7]), style=['-', '--'], title='Approval Rating')
         pres_pivot.loc[:250, ['Donald J. Trump', 'Barack Obama']].ffill().plot(**plot_kwargs)
Out[88]: <matplotlib.axes._subplots.AxesSubplot at 0x1152254a8>

更多

# rolling average方法可以平滑曲線,在這個例子中,使用的是90天求平均,參數on指明瞭滾動窗口是從哪列計算的
 In[89]: pres_rm = pres_41_45.groupby('President', sort=False) \
                             .rolling('90D', on='End Date')['Approving'] \
                             .mean()
         pres_rm.head()
Out[89]: President    End Date  
         George Bush  1989-01-26    51.000000
                      1989-02-27    55.500000
                      1989-03-02    57.666667
                      1989-03-10    58.750000
                      1989-03-13    58.200000
         Name: Approving, dtype: float64
# 對數據的行和列做調整,然後作圖
 In[90]: styles = ['-.', '-', ':', '-', ':']
         colors = [.9, .3, .7, .3, .9]
         color = cm.Greys(colors)
         title='90 Day Approval Rating Rolling Average'
         plot_kwargs = dict(figsize=(16,6), style=styles, color = color, title=title)
         correct_col_order = pres_41_45.President.unique()
         pres_rm.unstack('President')[correct_col_order].plot(**plot_kwargs)
Out[90]: <matplotlib.axes._subplots.AxesSubplot at 0x1162d0780>

4. concat, join, 和merge的區別

concat

  • Pandas函數
  • 可以垂直和水平地連接兩個或多個pandas對象
  • 只用索引對齊
  • 索引出現重複值時會報錯
  • 默認是外連接(也可以設爲內連接)

join

  • DataFrame方法
  • 只能水平連接兩個或多個pandas對象
  • 對齊是靠被調用的DataFrame的列索引或行索引和另一個對象的行索引(不能是列索引)
  • 通過笛卡爾積處理重複的索引值
  • 默認是左連接(也可以設爲內連接、外連接和右連接)

merge

  • DataFrame方法
  • 只能水平連接兩個DataFrame對象
  • 對齊是靠被調用的DataFrame的列或行索引和另一個DataFrame的列或行索引
  • 通過笛卡爾積處理重複的索引值
  • 默認是內連接(也可以設爲左連接、外連接、右連接)
# 用戶自定義的display_frames函數,可以接收一列DataFrame,然後在一行中顯示:
 In[91]: from IPython.display import display_html

         years = 2016, 2017, 2018
         stock_tables = [pd.read_csv('data/stocks_{}.csv'.format(year), index_col='Symbol') 
                         for year in years]

         def display_frames(frames, num_spaces=0):
             t_style = '<table style="display: inline;"'
             tables_html = [df.to_html().replace('<table', t_style) for df in frames]

             space = '&nbsp;' * num_spaces
             display_html(space.join(tables_html), raw=True)

         display_frames(stock_tables, 30)
         stocks_2016, stocks_2017, stocks_2018 = stock_tables
# concat是唯一一個可以將DataFrames垂直連接起來的函數
 In[92]: pd.concat(stock_tables, keys=[2016, 2017, 2018])
Out[92]: 
# concat也可以將DataFrame水平連起來
 In[93]: pd.concat(dict(zip(years,stock_tables)), axis='columns')
Out[93]: 
# 用join將DataFrame連起來;如果列名有相同的,需要設置lsuffix或rsuffix以進行區分
 In[94]: stocks_2016.join(stocks_2017, lsuffix='_2016', rsuffix='_2017', how='outer')
Out[94]: 
 In[95]: stocks_2016
Out[95]: 
# 要重現前面的concat方法,可以將一個DataFrame列表傳入join
 In[96]: other = [stocks_2017.add_suffix('_2017'), stocks_2018.add_suffix('_2018')]
         stocks_2016.add_suffix('_2016').join(other, how='outer')
Out[96]: 
# 檢驗這兩個方法是否相同
 In[97]: stock_join = stocks_2016.add_suffix('_2016').join(other, how='outer')
         stock_concat = pd.concat(dict(zip(years,stock_tables)), axis='columns')
 In[98]: stock_concat.columns = stock_concat.columns.get_level_values(1) + '_' + \
                                     stock_concat.columns.get_level_values(0).astype(str)
 In[99]: stock_concat
Out[99]: 
 In[100]: step1 = stocks_2016.merge(stocks_2017, left_index=True, right_index=True, 
                                    how='outer', suffixes=('_2016', '_2017'))
          stock_merge = step1.merge(stocks_2018.add_suffix('_2018'), 
                                    left_index=True, right_index=True, how='outer')

          stock_concat.equals(stock_merge)
Out[100]: True
# 查看food_prices和food_transactions兩個小數據集
 In[101]: names = ['prices', 'transactions']
          food_tables = [pd.read_csv('data/food_{}.csv'.format(name)) for name in names]
          food_prices, food_transactions = food_tables
          display_frames(food_tables, 30)
# 通過鍵item和store,將food_transactions和food_prices兩個數據集融合
 In[102]: food_transactions.merge(food_prices, on=['item', 'store'])
Out[102]: 
# 因爲steak在兩張表中分別出現了兩次,融合時產生了笛卡爾積,造成結果中出現了四行steak;因爲coconut沒有對應的價格,造成結果中沒有coconut
# 下面只融合2017年的數據
 In[103]: food_transactions.merge(food_prices.query('Date == 2017'), how='left')
Out[103]: 
# 使用join復現上面的方法,需要需要將要連接的food_prices列轉換爲行索引
 In[104]: food_prices_join = food_prices.query('Date == 2017').set_index(['item', 'store'])
          food_prices_join
Out[104]: 
# join方法只對齊傳入DataFrame的行索引,但可以對齊調用DataFrame的行索引和列索引;
# 要使用列做對齊,需要將其傳給參數on
 In[105]: food_transactions.join(food_prices_join, on=['item', 'store'])
Out[105]: 
# 要使用concat,需要將item和store兩列放入兩個DataFrame的行索引。但是,因爲行索引值有重複,造成了錯誤
 In[106]: pd.concat([food_transactions.set_index(['item', 'store']), 
                     food_prices.set_index(['item', 'store'])], axis='columns')
---------------------------------------------------------------------------
Exception                                 Traceback (most recent call last)
<ipython-input-106-8aa3223bf3d1> in <module>()
      1 pd.concat([food_transactions.set_index(['item', 'store']), 
----> 2            food_prices.set_index(['item', 'store'])], axis='columns')

/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
    205                        verify_integrity=verify_integrity,
    206                        copy=copy)
--> 207     return op.get_result()
    208 
    209 

/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/reshape/concat.py in get_result(self)
    399                     obj_labels = mgr.axes[ax]
    400                     if not new_labels.equals(obj_labels):
--> 401                         indexers[ax] = obj_labels.reindex(new_labels)[1]
    402 
    403                 mgrs_indexers.append((obj._data, indexers))

/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/multi.py in reindex(self, target, method, level, limit, tolerance)
   1861                                                tolerance=tolerance)
   1862                 else:
-> 1863                     raise Exception("cannot handle a non-unique multi-index!")
   1864 
   1865         if not isinstance(target, MultiIndex):

Exception: cannot handle a non-unique multi-index!
# glob模塊的glob函數可以將文件夾中的文件迭代取出,取出的是文件名字符串列表,可以直接傳給read_csv函數
 In[107]: import glob

          df_list = []
          for filename in glob.glob('data/gas prices/*.csv'):
              df_list.append(pd.read_csv(filename, index_col='Week', parse_dates=['Week']))

          gas = pd.concat(df_list, axis='columns')
          gas.head()
Out[107]: 

5. 連接SQL數據庫

# 在讀取chinook數據庫之前,需要創建SQLAlchemy引擎
 In[108]: from sqlalchemy import create_engine
          engine = create_engine('sqlite:///data/chinook.db')
 In[109]: tracks = pd.read_sql_table('tracks', engine)
          tracks.head()
Out[109]: 
# read_sql_table函數可以讀取一張表,第一個參數是表名,第二個參數是引擎
 In[110]: genres = pd.read_sql_table('genres', engine)
          genres.head()
Out[110]: 
# 找到每種類型歌曲的平均時長
 In[111]: genre_track = genres.merge(tracks[['GenreId', 'Milliseconds']], 
                                     on='GenreId', how='left') \
                              .drop('GenreId', axis='columns')
          genre_track.head()
Out[111]: 
# 將Milliseconds列轉變爲timedelta數據類型
 In[112]: genre_time = genre_track.groupby('Name')['Milliseconds'].mean()
          pd.to_timedelta(genre_time, unit='ms').dt.floor('s').sort_values()
Out[112]: 
Name
Rock And Roll        00:02:14
Opera                00:02:54
Hip Hop/Rap          00:02:58
Easy Listening       00:03:09
Bossa Nova           00:03:39
R&B/Soul             00:03:40
World                00:03:44
Pop                  00:03:49
Latin                00:03:52
Alternative & Punk   00:03:54
Soundtrack           00:04:04
Reggae               00:04:07
Alternative          00:04:24
Blues                00:04:30
Rock                 00:04:43
Jazz                 00:04:51
Classical            00:04:53
Heavy Metal          00:04:57
Electronica/Dance    00:05:02
Metal                00:05:09
Comedy               00:26:25
TV Shows             00:35:45
Drama                00:42:55
Science Fiction      00:43:45
Sci Fi & Fantasy     00:48:31
Name: Milliseconds, dtype: timedelta64[ns]
# 找到每名顧客花費的總時長
 In[113]: cust = pd.read_sql_table('customers', engine, 
                                    columns=['CustomerId', 'FirstName', 'LastName'])
          invoice = pd.read_sql_table('invoices', engine, 
                                       columns=['InvoiceId','CustomerId'])
          ii = pd.read_sql_table('invoice_items', engine, 
                                  columns=['InvoiceId', 'UnitPrice', 'Quantity'])
 In[114]: cust_inv = cust.merge(invoice, on='CustomerId') \
                         .merge(ii, on='InvoiceId')
          cust_inv.head()
Out[114]: 
# 現在可以用總量乘以單位價格,找到每名顧客的總消費
 In[115]: total = cust_inv['Quantity'] * cust_inv['UnitPrice']
          cols = ['CustomerId', 'FirstName', 'LastName']
          cust_inv.assign(Total = total).groupby(cols)['Total'] \
                                        .sum() \
                                        .sort_values(ascending=False).head()
Out[115]: 

更多

# sql語句查詢方法read_sql_query
 In[116]: pd.read_sql_query('select * from tracks limit 5', engine)
Out[116]: 
# 可以將長字符串傳給read_sql_query
 In[117]: sql_string1 = '''
          select 
              Name, 
              time(avg(Milliseconds) / 1000, 'unixepoch') as avg_time
          from (
                  select 
                      g.Name, 
                      t.Milliseconds
                  from 
                      genres as g 
                  join
                      tracks as t
                      on 
                          g.genreid == t.genreid
              )
          group by 
              Name
          order by 
              avg_time
          '''
          pd.read_sql_query(sql_string1, engine)
Out[117]: 
 In[118]: sql_string2 = '''
          select 
                c.customerid, 
                c.FirstName, 
                c.LastName, 
                sum(ii.quantity *  ii.unitprice) as Total
          from
                customers as c
          join
                invoices as i
                     on c.customerid = i.customerid
          join
                invoice_items as ii
                     on i.invoiceid = ii.invoiceid
          group by
                c.customerid, c.FirstName, c.LastName
          order by
                Total desc
          '''
          pd.read_sql_query(sql_string2, engine)
Out[118]: 
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章