貓眼電影-分析

  此篇文章承接(貓眼電影-爬取)。

  將電影數據儲存到MySQL中後,發現評論人數和票房的數據當中存在漢字,後期不好分析,所以需要將漢字轉化爲數值。

  保險起見,我先將films表裏面的結構和數據複製了成了一個新表films_copy,然後新增了2列,people和box_price。

將數據轉化爲便於分析的數據,代碼如下:

import pymysql

data1 = []
data2 = []
db = pymysql.connect(host='localhost', user='root', passwd='password', db='maoyan', port=3306)
cursor = db.cursor()
sql = "SELECT score_hum,box_office FROM films_copy"
try:
    cursor.execute(sql)
    results = cursor.fetchall()
    for item in results:
        if '萬' in item[0]:
            change0 = item[0]
            change1 = int(float(item[0].replace('萬', '')) * 10000)
            data1.append((change1, change0))
        else:
            change0 = item[0]
            change1 = item[0]
            data1.append((change1, change0))
        if '萬' in item[1]:
            if '美元' in item[1]:
                change0 = item[1]
                change2 = int(float(item[1].replace('萬美元', '')) * 10000 * 6.8)
                data2.append((change2, change0))
            else:
                change0 = item[1]
                change2 = int(float(item[1].replace('萬', '')) * 10000)
                data2.append((change2, change0))
        elif '億' in item[1]:
            if '美元' in item[1]:
                change0 = item[1]
                change2 = int(float(item[1].replace('億美元', '')) * 100000000 * 6.8)
                data2.append((change2, change0))
            else:
                change0 = item[1]
                change2 = int(float(item[1].replace('億', '')) * 100000000)
                data2.append((change2, change0))
        else:
            change0 = item[1]
            data2.append((item[1], change0))
except:
    print("something wrong")
db.close()

for i in range(len(data1)):
    db = pymysql.connect(host='localhost', user='root', passwd='password', db='maoyan', port=3306)
    cursor = db.cursor()
    sql1 = "UPDATE films_copy SET people = '%s' WHERE score_hum = '%s'" %(data1[i][0], data1[i][1])
    print(data1[i][0], data1[i][1])
    try:
        if cursor.execute(sql1):
            print('Successful')
            db.commit()
    except:
        db.rollback()
        print('Falied')
    db.close()
    
for i in range(len(data2)):
    db = pymysql.connect(host='localhost', user='root', passwd='password', db='maoyan', port=3306)
    cursor = db.cursor()
    sql2 = "UPDATE films_copy SET box_price = '%s' WHERE box_office = '%s'" %(data2[i][0], data2[i][1])
    try:
        if cursor.execute(sql2):
            print('Successful')
            db.commit()
    except:
        db.rollback()
        print('Falied')
    db.close()
import pymysql


def get_data():
    data1 = []
    data2 = []
    db = pymysql.connect(host='localhost', user='root', passwd='password', db='maoyan', port=3306)
    cursor = db.cursor()
    sql = "SELECT score_hum,box_office FROM films_copy"
    try:
        cursor.execute(sql)
        results = cursor.fetchall()
        for item in results:
            if '萬' in item[0]:
                change0 = item[0]
                change1 = int(float(item[0].replace('萬', '')) * 10000)
                data1.append((change1, change0))
            else:
                change0 = item[0]
                change1 = item[0]
                data1.append((change1, change0))
            if '萬' in item[1]:
                if '美元' in item[1]:
                    change0 = item[1]
                    change2 = int(float(item[1].replace('萬美元', '')) * 10000 * 6.8)
                    data2.append((change2, change0))
                else:
                    change0 = item[1]
                    change2 = int(float(item[1].replace('萬', '')) * 10000)
                    data2.append((change2, change0))
            elif '億' in item[1]:
                if '美元' in item[1]:
                    change0 = item[1]
                    change2 = int(float(item[1].replace('億美元', '')) * 100000000 * 6.8)
                    data2.append((change2, change0))
                else:
                    change0 = item[1]
                    change2 = int(float(item[1].replace('億', '')) * 100000000)
                    data2.append((change2, change0))
            else:
                change0 = item[1]
                data2.append((item[1], change0))
    except:
        print("something wrong")
    db.close()
    return data1,data2

def change_hum(data1)
    for i in range(len(data1)):
        db = pymysql.connect(host='localhost', user='root', passwd='password', db='maoyan', port=3306)
        cursor = db.cursor()
        sql1 = "UPDATE films_copy SET people = '%s' WHERE score_hum = '%s'" %(data1[i][0], data1[i][1])
        print(data1[i][0], data1[i][1])
        try:
            if cursor.execute(sql1):
                print('Successful')
                db.commit()
        except:
            db.rollback()
            print('Falied')
        db.close()
 
def change_prices(data2):    
    for i in range(len(data2)):
        db = pymysql.connect(host='localhost', user='root', passwd='password', db='maoyan', port=3306)
        cursor = db.cursor()
        sql2 = "UPDATE films_copy SET box_price = '%s' WHERE box_office = '%s'" %(data2[i][0], data2[i][1])
        try:
            if cursor.execute(sql2):
                print('Successful')
                db.commit()
        except:
            db.rollback()
            print('Falied')
        db.close()

def main():
    data1 = get_data()[0]
    data2 = get_data()[1]
    change_hum(data1)
    change_prices(data2)

if '__name__' == '__main__':
    main()
    
    

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章