一、問題
問題一:
源碼:
most_egregious = table.order_by('Total (%)', reverse=True).limit(10)
報錯:
KeyError: 'Total (%)'
可能是自己寫錯了,但是agate出來的也是
沒有找出來哪裏錯了
只能修改爲
most_egregious = table.order_by(table.column_names[1], reverse=True).limit(10)
問題二:
整體比較混亂:
cpi_types = get_types(cpi_sheet.row(3))
位於
get_types(cpi_sheet.row(3))導致閱讀具有一定障礙
這一節感覺整體閱讀具有障礙,變量太多而沒有羅列清楚,
且
cpi_rows = get_new_array(cpi_rows,float_to_str)
中的float_to_str一直沒有找到
加上之前試圖封裝9.1.1、9.1.2的內容導致函數調用非常麻煩,整整折騰了一下午和一個晚上
錯誤示例:
from 數據導入 import Data_explore
import xlrd
import agate
from 探索表函數 import Data_Sort
#兩個中文就是前兩個封裝的內容
def xlrd_cpi():
tm = Data_explore('I:\\360下載\\data-wrangling\\data\\chp9\\corruption_perception_index.xls')
#print(tm.path)
tm.xls_type()
print(tm.agate_data_check())
table = tm.get_new_array()
#print(table.column_names)
xlrd_cpi()
def table_():
tm = Data_explore('I:\\360下載\\data-wrangling\\data\\chp9\\corruption_perception_index.xls')
table = tm.get_new_array()
print(table.column_names)
#table_()
def get_table():
try:
tm = Data_explore('I:\\360下載\\data-wrangling\\data\\chp9\\corruption_perception_index.xls')
table = tm.get_new_array()
print(table)
'''DuplicateColumnWarning: Column name "1.0 3.0" already exists in Table. Column will be renamed to "1.0 3.0_2".'''
except Exception as e:
print(e)
#get_table()
def repeat_row_manage():
tm = Data_explore('I:\\360下載\\data-wrangling\\data\\chp9\\corruption_perception_index.xls')
index = 0
title_ = [titles + ' ' for titles in tm.combine_title() if titles]
#print(type(title))
tm_ =Data_explore('I:\\360下載\\data-wrangling\\data\\chp9\\corruption_perception_index.xls',title_)
#print(tm_.title)
#print(title)
#print(tm.combine_title())#tm.combine_title是一個列表
print()
table = tm_.get_new_array()#錯誤原因沒有變成tm_
#print(table)
t = Data_Sort()
ranked = t.data_table_rank()
cpi_and_cl = table.join(ranked, table.column_names[1], ' Countries and areas', inner=True)
print(cpi_and_cl.column_names)
for r in cpi_and_cl.order_by('CPI 2013 Score').limit(10).rows:
print('{}: {} - {}%'.format(r[tm_.get_new_array.column_names[1]], r['CPI 2013 Score'], r['Total (%)']))
print('是否運行至此測試------------------------------------------')
#repeat_row_manage()
理解的關鍵:
table需要rows ,title,type.table是agate的基礎,table就是xlrd的標題與數據的聯合,就是將xlrd內容轉換成爲agate的內容,而type是需要設置的內容
正常的源碼:
import xlrd
from xlrd.sheet import ctype_text
import agate
import pprint
from 探索表函數 import Data_Sort
'''table需要rows ,title,type.其中rows'''
def xlrd_open():
cpi_workbook = xlrd.open_workbook('I:\\360下載\\data-wrangling\\data\\chp9\\corruption_perception_index.xls')
cpi_sheet = cpi_workbook.sheets()[0]
return cpi_sheet
def cpi_titles():
'''簿-表-標題行(行值不是列值)'''
cpi_sheet = xlrd_open()
cpi_title_rows = zip(cpi_sheet.row_values(1), cpi_sheet.row_values(2))
#print(cpi_sheet.row_values(2))
'''之前測試失敗是由於title的拼湊位置不一樣,修改會導致原title出錯'''
cpi_titles = [list(t)[0] + ' ' + list(t)[1] for t in cpi_title_rows]
#print(cpi_titles)
cpi_titles = [t.strip() for t in cpi_titles]
#print(cpi_titles)
return cpi_titles
print(cpi_titles())
#def cpi_rows():
'''行數的值,正式行數的值'''
cpi_sheet = xlrd_open()
cpi_rows = [cpi_sheet.row_values(r) for r in range(3, cpi_sheet.nrows)]
#與數據導入中的有效行也不一致
#print(cpi_rows)#是正式的數據行,沒有表頭
'''函數化後無法輸出,只能輸出內存地址'''
def cpi_types():
'''行類型,對正式數據行的行類型'''
cpi_sheet = xlrd_open()
types = []
text_type = agate.Text()
number_type = agate.Number()
boolean_type = agate.Boolean()
date_type = agate.DataType()
for v in cpi_sheet.row(3):
'''ctype_text爲字典'''
value_type = ctype_text[v.ctype]
# print(value_type, end=' ')
if value_type == 'text':
types.append(text_type)
elif value_type == 'number':
types.append(number_type)
elif value_type == 'xldate':
types.append(date_type)
else:
types.append(text_type)
return types
def cpi_titles_update():
global cpi_titles
cpi_titles = cpi_titles()
cpi_titles[0] = cpi_titles[0] +'Duplicate'
return cpi_titles
'''針對更新後的title進行table輸出'''
cpi_table = agate.Table(cpi_rows,cpi_titles_update(),cpi_types())
cpi_table.print_table(max_columns=7)
'''數據集連接'''
ranked = Data_Sort().data_table_rank()#.print_table(max_columns=7)
cpi_and_cl = cpi_table.join(ranked, 'Country / Territory', ' Countries and areas', inner=True)
print(cpi_and_cl.column_names)
for r in cpi_and_cl.order_by('CPI 2013 Score').limit(10).rows:
print('{}: {} - {}%'.format(r['Country / Territory'], r['CPI 2013 Score'], r['Total (%) ']))