網絡學習筆記
import requests
commments = requests.get('http://comment5.news.sina.com.cn/page/info?version=1&format=js&channel=gn&newsid=comos-fxvctcc8121090&group=&compress=0&ie=utf-8&oe=utf-8&page=1&page_size=20')
import jason
jd = json.loads(conmments.text.strip('var data='))
jd['result']['count']['total']
newsurl = 'http://news.sina.com.cn/c/nd/2016-08-20/doc-ifxvctcc8121090.shtml'
newsurl.split('/')[-1].rstrip('.shtml').lstrip('doc-i')
'fxvctcc8121090'
import re
m = re.search('doc-i(.*).shtml',newsurl )
print(m)
<_sre.SRE_Match object; span=(40, 65), match='doc-ifxvctcc8121090.shtml'>
newsid = m.group(1)
newsid
'fxvctcc8121090'