此篇文章承接(貓眼電影-爬取)。
將電影數據儲存到MySQL中後,發現評論人數和票房的數據當中存在漢字,後期不好分析,所以需要將漢字轉化爲數值。
保險起見,我先將films表裏面的結構和數據複製了成了一個新表films_copy,然後新增了2列,people和box_price。
將數據轉化爲便於分析的數據,代碼如下:
import pymysql
data1 = []
data2 = []
db = pymysql.connect(host='localhost', user='root', passwd='password', db='maoyan', port=3306)
cursor = db.cursor()
sql = "SELECT score_hum,box_office FROM films_copy"
try:
cursor.execute(sql)
results = cursor.fetchall()
for item in results:
if '萬' in item[0]:
change0 = item[0]
change1 = int(float(item[0].replace('萬', '')) * 10000)
data1.append((change1, change0))
else:
change0 = item[0]
change1 = item[0]
data1.append((change1, change0))
if '萬' in item[1]:
if '美元' in item[1]:
change0 = item[1]
change2 = int(float(item[1].replace('萬美元', '')) * 10000 * 6.8)
data2.append((change2, change0))
else:
change0 = item[1]
change2 = int(float(item[1].replace('萬', '')) * 10000)
data2.append((change2, change0))
elif '億' in item[1]:
if '美元' in item[1]:
change0 = item[1]
change2 = int(float(item[1].replace('億美元', '')) * 100000000 * 6.8)
data2.append((change2, change0))
else:
change0 = item[1]
change2 = int(float(item[1].replace('億', '')) * 100000000)
data2.append((change2, change0))
else:
change0 = item[1]
data2.append((item[1], change0))
except:
print("something wrong")
db.close()
for i in range(len(data1)):
db = pymysql.connect(host='localhost', user='root', passwd='password', db='maoyan', port=3306)
cursor = db.cursor()
sql1 = "UPDATE films_copy SET people = '%s' WHERE score_hum = '%s'" %(data1[i][0], data1[i][1])
print(data1[i][0], data1[i][1])
try:
if cursor.execute(sql1):
print('Successful')
db.commit()
except:
db.rollback()
print('Falied')
db.close()
for i in range(len(data2)):
db = pymysql.connect(host='localhost', user='root', passwd='password', db='maoyan', port=3306)
cursor = db.cursor()
sql2 = "UPDATE films_copy SET box_price = '%s' WHERE box_office = '%s'" %(data2[i][0], data2[i][1])
try:
if cursor.execute(sql2):
print('Successful')
db.commit()
except:
db.rollback()
print('Falied')
db.close()
import pymysql
def get_data():
data1 = []
data2 = []
db = pymysql.connect(host='localhost', user='root', passwd='password', db='maoyan', port=3306)
cursor = db.cursor()
sql = "SELECT score_hum,box_office FROM films_copy"
try:
cursor.execute(sql)
results = cursor.fetchall()
for item in results:
if '萬' in item[0]:
change0 = item[0]
change1 = int(float(item[0].replace('萬', '')) * 10000)
data1.append((change1, change0))
else:
change0 = item[0]
change1 = item[0]
data1.append((change1, change0))
if '萬' in item[1]:
if '美元' in item[1]:
change0 = item[1]
change2 = int(float(item[1].replace('萬美元', '')) * 10000 * 6.8)
data2.append((change2, change0))
else:
change0 = item[1]
change2 = int(float(item[1].replace('萬', '')) * 10000)
data2.append((change2, change0))
elif '億' in item[1]:
if '美元' in item[1]:
change0 = item[1]
change2 = int(float(item[1].replace('億美元', '')) * 100000000 * 6.8)
data2.append((change2, change0))
else:
change0 = item[1]
change2 = int(float(item[1].replace('億', '')) * 100000000)
data2.append((change2, change0))
else:
change0 = item[1]
data2.append((item[1], change0))
except:
print("something wrong")
db.close()
return data1,data2
def change_hum(data1)
for i in range(len(data1)):
db = pymysql.connect(host='localhost', user='root', passwd='password', db='maoyan', port=3306)
cursor = db.cursor()
sql1 = "UPDATE films_copy SET people = '%s' WHERE score_hum = '%s'" %(data1[i][0], data1[i][1])
print(data1[i][0], data1[i][1])
try:
if cursor.execute(sql1):
print('Successful')
db.commit()
except:
db.rollback()
print('Falied')
db.close()
def change_prices(data2):
for i in range(len(data2)):
db = pymysql.connect(host='localhost', user='root', passwd='password', db='maoyan', port=3306)
cursor = db.cursor()
sql2 = "UPDATE films_copy SET box_price = '%s' WHERE box_office = '%s'" %(data2[i][0], data2[i][1])
try:
if cursor.execute(sql2):
print('Successful')
db.commit()
except:
db.rollback()
print('Falied')
db.close()
def main():
data1 = get_data()[0]
data2 = get_data()[1]
change_hum(data1)
change_prices(data2)
if '__name__' == '__main__':
main()