Python抓取網頁雲音樂指定歌手的歌曲和評論數量

之前簡單學了一下Python,沒做過東西,心血來潮來了個idea,就寫了一個抓取網頁雲音樂指定歌手的歌曲和評論數量的腳本。

代碼如下,如果缺少包則用pip安裝一下,不過AES加密用到的pycrypto包,編譯安裝需要有c++環境,所以建議下載編譯好的版本,我這裏是Python35的:https://github.com/nsrathjen/pycrypto-py3.5-win64-binary

代碼

import os
import json
import hashlib
import base64
import binascii
from Crypto.Cipher import AES
import requests
import prettytable


default_timeout = 100 #定義超時時間


modulus = ('00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7'
           'b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280'
           '104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932'
           '575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b'
           '3ece0462db0a22b8e7')
nonce = '0CoJUm6Qyw8W8jud'
pubKey = '010001'
# 歌曲加密算法, 基於https://github.com/yanunon/NeteaseCloudMusic腳本實現
def encrypted_id(id):
    magic = bytearray('3go8&$8*3*3h0k(2)2', 'u8')
    song_id = bytearray(id, 'cd u8')
    magic_len = len(magic)
    for i, sid in enumerate(song_id):
        song_id[i] = sid ^ magic[i % magic_len]
    m = hashlib.md5(song_id)
    result = m.digest()
    result = base64.b64encode(result)
    result = result.replace(b'/', b'_')
    result = result.replace(b'+', b'-')
    return result.decode('utf-8')




# 加密算法, 基於https://github.com/stkevintan/nw_musicbox腳本實現
def encrypted_request(text):
    text = json.dumps(text)
    secKey = createSecretKey(16)
    encText = aesEncrypt(aesEncrypt(text, nonce), secKey)
    encSecKey = rsaEncrypt(secKey, pubKey, modulus)
    data = {'params': encText, 'encSecKey': encSecKey}
    return data




def aesEncrypt(text, secKey):
    pad = 16 - len(text) % 16
    text = text + chr(pad) * pad
    encryptor = AES.new(secKey, 2, '0102030405060708')
    ciphertext = encryptor.encrypt(text)
    ciphertext = base64.b64encode(ciphertext).decode('utf-8')
    return ciphertext




def rsaEncrypt(text, pubKey, modulus):
    text = text[::-1]
    rs = pow(int(binascii.hexlify(text), 16), int(pubKey, 16), int(modulus, 16))
    return format(rs, 'x').zfill(256)




def createSecretKey(size):
    return binascii.hexlify(os.urandom(size))[:16]




# 此類用了post查詢歌曲
class NetEase:
    def __init__(self):
        self.header = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip,deflate,sdch',
            'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4',
            'Connection': 'keep-alive',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Host': 'music.163.com',
            'Referer': 'http://music.163.com/search/',
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36'
        }
        self.cookies = {'appver': '1.5.2'}


    # 搜索單曲(1),歌手(100),專輯(10),歌單(1000),用戶(1002) *(type)*
    def search(self, s, stype=1, offset=0, total='true', limit=100):
        action = 'http://music.163.com/api/search/get/web'
        data = {
            's': s,
            'type': stype,
            'offset': offset,
            'total': total,
            'limit': limit
        }
        return self.httpRequest('POST', action, data)
    ###發起一個http請求
    def httpRequest(self, method, action, query=None, urlencoded=None, callback=None, timeout=None):
        if(method == 'GET'):
            url = action if (query == None) else (action + '?' + query)
            connection = requests.get(url, headers=self.header, timeout=default_timeout)


        else:
            connection = requests.post(
                action,
                data=query,
                headers=self.header,
                timeout=default_timeout
            )


        connection.encoding = "UTF-8"
        connection = json.loads(connection.text)
        return connection
    ####獲取評論數量
    def getCommentNum(self, id):
        action = 'http://music.163.com/weapi/v1/resource/comments/R_SO_4_'+str(id)+'?csrf_token='
        csrf = ''
        req = {'csrf_token': csrf}
        data = encrypted_request(req)
        return self.httpRequest('POST', action, data)    


###############################################################################
#配置項
singer = '周杰倫' #歌手
page = 2 #只請求兩頁數據
limit = 100 #每頁的歌曲數量


##########################正文##############################
netEase = NetEase()
musics = netEase.search(singer, stype=1)
songCount = musics['result']['songCount']
#總頁數
pageNum =  int(songCount/limit) if songCount%limit==0 else int(songCount/limit)+1
songs = [[]] * (songCount if (page > pageNum) else (page * limit))
number = 0
for currentPage in range(0,pageNum):
    if(currentPage>=page):
        break
    print('正在處理第' + str(currentPage + 1) + '頁數據...')
    offset = currentPage*limit
    musics = netEase.search(singer,stype=1,offset=offset,limit=limit)
    count = (songCount-offset) if currentPage==limit else limit
    ###循環獲取歌曲評論數量
    for key in range(0,count):
        songId = musics['result']['songs'][key]['id']
        songName = musics['result']['songs'][key]['name']
        result = netEase.getCommentNum(songId)
        commentNum = result['total']
        songs[number] = [songId,songName,singer,commentNum]
        number+=1
songs.sort(key=lambda x:x[3],reverse=True) ##按評論數量逆序
##表格輸出結果
table = prettytable.PrettyTable()
table.field_names = ["ID", "歌名", "歌手", "評論數量"]
for number in range(0, 20): ##只展示評論數量前20名的歌曲
    table.add_row([songs[number][0],songs[number][1],songs[number][2],songs[number][3]])
print(table)


效果


發佈了74 篇原創文章 · 獲贊 15 · 訪問量 14萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章