python-處理問卷數據

分享一個統計excel的關係統計腳本

1.xlwt操作,合併單元格

原表格:,去計算word1,word2,word3之之間的關係

腳本處理後,可獲得每個詞語相關的詞語的次數

 

源碼:

import xlrd,re,os,xlwt
import operator



def exportExcel(path,field_attr=None):
    #詞組
    word_group = set()
    #1.檢查表格
    flag,msg = True,'ok'
    if not flag:
        return msg
    else:#通過驗證
        # 2.讀取excel
        data = xlrd.open_workbook(path)
        now_table = data.sheet_by_index(0)
        # 獲得當前表格的行數
        rows_numn = now_table.nrows
        # 將當前的sheet插入到數據庫
        for k in range(1, rows_numn):
            row_vlaue = now_table.row_values(k)
            #obj = {}
            # 處理要插入的數據,把非字符串的數據轉換成字符串類型,同事將字符串變成 sql語句需要的類型

            for a in range(0, len(row_vlaue)):
                ctype = now_table.cell(k, a).ctype
                # ctype: 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
                if ctype == 0 or row_vlaue[a] == '':
                    pass
                else:
                    #print(k,row_vlaue[a])
                    word_group.add(row_vlaue[a].lower())

                #obj[field_attr[a]] = row_vlaue[a]
         #大字典
        big_dict = {}
        for i in word_group:
            big_dict[i] = {}
        for k in range(1, rows_numn):
            row_vlaue = now_table.row_values(k)
            #obj = {}
            # 處理要插入的數據,把非字符串的數據轉換成字符串類型,同事將字符串變成 sql語句需要的類型
            row_arr = []
            for a in range(0, len(row_vlaue)):
                ctype = now_table.cell(k, a).ctype
                # ctype: 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
                if ctype == 0 or row_vlaue[a] == '':
                    pass
                else:
                    row_arr.append(row_vlaue[a].lower())
            if len(row_arr) == 2:
                a = row_arr[0]
                b = row_arr[1]
                a_word_dict = big_dict[a]
                b_word_dict = big_dict[b]
                if b in a_word_dict.keys():
                    a_word_dict[b] += 1
                else:
                    a_word_dict[b] = 1

                if a in b_word_dict.keys():
                    b_word_dict[a] += 1
                else:
                    b_word_dict[a] = 1
                print(a,b)
            elif len(row_arr) == 3:
                a = row_arr[0]
                b = row_arr[1]
                c = row_arr[2]
                print(a,b,c)
                a_word_dict = big_dict[a]
                b_word_dict = big_dict[b]
                c_word_dict = big_dict[c]
                if b in a_word_dict.keys():
                    a_word_dict[b] += 1
                else:
                    a_word_dict[b] = 1
                if c in a_word_dict.keys():
                    a_word_dict[c] += 1
                else:
                    a_word_dict[c] =1

                if a in b_word_dict.keys():
                    b_word_dict[a] += 1
                else:
                    b_word_dict[a] = 1
                if c in b_word_dict.keys():
                    b_word_dict[c] += 1
                else:
                    b_word_dict[c] = 1

                if a in c_word_dict.keys():
                    c_word_dict[a] += 1
                else:
                    c_word_dict[a] = 1
                if b in c_word_dict.keys():
                    c_word_dict[b] += 1
                else:
                    c_word_dict[b] = 1
            else:pass
        return big_dict



def do_main(path,table_name):
    big_dict = exportExcel(path)
    big_arr = []
    for k, v in big_dict.items():
        v = sorted(v.items(), key=operator.itemgetter(1), reverse=True);
        total = 0
        for i in v:
            total += i[1]
        v = v[:5] if len(v) > 5 else v
        print('total:', total, k, ':', v)
        big_arr.append({'word': k, 'detail': v, 'total': total})

    big_arr = sorted(big_arr, key=lambda obj: obj['total'])

    obj_list = big_arr
    # 1.創建表格
    workbook = xlwt.Workbook()
    # 創建excel的一個sheet
    sheet = workbook.add_sheet(table_name, cell_overwrite_ok=True)

    heads = ['詞語','g關係總數','詞語關聯(前五)']

    sheet.write(0,0,heads[0])
    sheet.write(0,1,heads[1])
    sheet.write_merge(0,0,3,7,heads[2])

    # 4.對象導入表格
    for row in range(len(obj_list)):
        obj = obj_list[len(obj_list) - row - 1]
        print(obj['word'], obj['total'], obj['detail'])
        sheet.write(row + 1, 0, obj['word'])
        sheet.write(row + 1, 1, obj['total'])
        for col in range(0, len(obj['detail'])):
            word = obj['detail'][col]
            sheet.write(row + 1, col + 3, "{0} ({1})".format(word[0], word[1]))

    save_path = table_name+'.xls'
    print('文件路徑爲' + save_path)
    try:
        workbook.save(save_path)
    except:
        print(save_path + '目錄不存在')

    # 6.檢查是否導出成功
    if os.path.isfile(save_path):
        print('導出成功,文件爲' + save_path)
    else:
        msg = '存儲文件發生異常,檢查{0}目錄是否存在'.format(save_path)
        print(msg)



if __name__ == '__main__':
    path = 'C:\\Users\\SHEIN\Desktop\\relation\\UK.csv'
    table_name = 'UK-result'

    do_main(path, table_name)


 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章