分享一個統計excel的關係統計腳本
1.xlwt操作,合併單元格
原表格:,去計算word1,word2,word3之之間的關係
腳本處理後,可獲得每個詞語相關的詞語的次數
源碼:
import xlrd,re,os,xlwt
import operator
def exportExcel(path,field_attr=None):
#詞組
word_group = set()
#1.檢查表格
flag,msg = True,'ok'
if not flag:
return msg
else:#通過驗證
# 2.讀取excel
data = xlrd.open_workbook(path)
now_table = data.sheet_by_index(0)
# 獲得當前表格的行數
rows_numn = now_table.nrows
# 將當前的sheet插入到數據庫
for k in range(1, rows_numn):
row_vlaue = now_table.row_values(k)
#obj = {}
# 處理要插入的數據,把非字符串的數據轉換成字符串類型,同事將字符串變成 sql語句需要的類型
for a in range(0, len(row_vlaue)):
ctype = now_table.cell(k, a).ctype
# ctype: 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
if ctype == 0 or row_vlaue[a] == '':
pass
else:
#print(k,row_vlaue[a])
word_group.add(row_vlaue[a].lower())
#obj[field_attr[a]] = row_vlaue[a]
#大字典
big_dict = {}
for i in word_group:
big_dict[i] = {}
for k in range(1, rows_numn):
row_vlaue = now_table.row_values(k)
#obj = {}
# 處理要插入的數據,把非字符串的數據轉換成字符串類型,同事將字符串變成 sql語句需要的類型
row_arr = []
for a in range(0, len(row_vlaue)):
ctype = now_table.cell(k, a).ctype
# ctype: 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
if ctype == 0 or row_vlaue[a] == '':
pass
else:
row_arr.append(row_vlaue[a].lower())
if len(row_arr) == 2:
a = row_arr[0]
b = row_arr[1]
a_word_dict = big_dict[a]
b_word_dict = big_dict[b]
if b in a_word_dict.keys():
a_word_dict[b] += 1
else:
a_word_dict[b] = 1
if a in b_word_dict.keys():
b_word_dict[a] += 1
else:
b_word_dict[a] = 1
print(a,b)
elif len(row_arr) == 3:
a = row_arr[0]
b = row_arr[1]
c = row_arr[2]
print(a,b,c)
a_word_dict = big_dict[a]
b_word_dict = big_dict[b]
c_word_dict = big_dict[c]
if b in a_word_dict.keys():
a_word_dict[b] += 1
else:
a_word_dict[b] = 1
if c in a_word_dict.keys():
a_word_dict[c] += 1
else:
a_word_dict[c] =1
if a in b_word_dict.keys():
b_word_dict[a] += 1
else:
b_word_dict[a] = 1
if c in b_word_dict.keys():
b_word_dict[c] += 1
else:
b_word_dict[c] = 1
if a in c_word_dict.keys():
c_word_dict[a] += 1
else:
c_word_dict[a] = 1
if b in c_word_dict.keys():
c_word_dict[b] += 1
else:
c_word_dict[b] = 1
else:pass
return big_dict
def do_main(path,table_name):
big_dict = exportExcel(path)
big_arr = []
for k, v in big_dict.items():
v = sorted(v.items(), key=operator.itemgetter(1), reverse=True);
total = 0
for i in v:
total += i[1]
v = v[:5] if len(v) > 5 else v
print('total:', total, k, ':', v)
big_arr.append({'word': k, 'detail': v, 'total': total})
big_arr = sorted(big_arr, key=lambda obj: obj['total'])
obj_list = big_arr
# 1.創建表格
workbook = xlwt.Workbook()
# 創建excel的一個sheet
sheet = workbook.add_sheet(table_name, cell_overwrite_ok=True)
heads = ['詞語','g關係總數','詞語關聯(前五)']
sheet.write(0,0,heads[0])
sheet.write(0,1,heads[1])
sheet.write_merge(0,0,3,7,heads[2])
# 4.對象導入表格
for row in range(len(obj_list)):
obj = obj_list[len(obj_list) - row - 1]
print(obj['word'], obj['total'], obj['detail'])
sheet.write(row + 1, 0, obj['word'])
sheet.write(row + 1, 1, obj['total'])
for col in range(0, len(obj['detail'])):
word = obj['detail'][col]
sheet.write(row + 1, col + 3, "{0} ({1})".format(word[0], word[1]))
save_path = table_name+'.xls'
print('文件路徑爲' + save_path)
try:
workbook.save(save_path)
except:
print(save_path + '目錄不存在')
# 6.檢查是否導出成功
if os.path.isfile(save_path):
print('導出成功,文件爲' + save_path)
else:
msg = '存儲文件發生異常,檢查{0}目錄是否存在'.format(save_path)
print(msg)
if __name__ == '__main__':
path = 'C:\\Users\\SHEIN\Desktop\\relation\\UK.csv'
table_name = 'UK-result'
do_main(path, table_name)