python爬蟲基礎class 2(中國大學排名)

 

 

import requests
import bs4


def getdata(url):
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        print('error')
        return ''


def filllist(soup, ulist):
    for line in soup.find('tbody').children:
        if isinstance(line, bs4.element.Tag):
            tds = line.find_all('td')
            ulist.append([tds[0].string, tds[1].find('div').string, tds[2].string, tds[3].string])


def printlist(ulist, num=101):
    fp = open('uinfo.txt', 'w')
    fp.write('{:^8}\t{:^20}\t{:^10}\t{:^10}\n'.format('排名', '學校', '地區', '分數'))
    print('{:^8}\t{:^20}\t{:^10}\t{:^10}\n'.format('排名', '學校', '地區', '分數'))
    for i in range(num):
        u = ulist[i]
        print('{0:^8}\t{1:{2}^15}\t{3:^8}\t{4:^8}'.format(u[0], u[1], chr(12288), u[2], u[3]))
        fp.write('{0:^8}\t{1:{2}^15}\t{3:^8}\t{4:^8}\n'.format(u[0], u[1], chr(12288), u[2], u[3]))
    fp.close()


url = 'http://www.zuihaodaxue.cn/Greater_China_Ranking2019_0.html'
uinfo = list()
data = getdata(url)
soup = bs4.BeautifulSoup(data, 'html.parser')
filllist(soup, uinfo)
printlist(uinfo)

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章