from lxml import etree
from fake_useragent import UserAgent
from urllib import parse
import requests
class TBImage():
def __init__(self):
self.ua = UserAgent().ie
self.headers = {
"User-Agent": self.ua
}
self.url = "https://tieba.baidu.com/f?kw="
def purpose(self):
key = input("請輸入你要查詢的關鍵字:")
BgPage = input("請輸入你要查詢的起始頁:")
LtPage = input("請輸入你要查詢的終止頁:")
keyword = parse.quote(key)
pn = [pn for pn in map(lambda x: x * 50, range(int(BgPage) - 1, int(LtPage)))]
for pagepn in pn:
url = self.url + keyword + "&ie=utf-8&pn=" + str(pagepn)
print("正在加載第{}頁。。。".format(int(BgPage)))
self.downLoadPage(url)
BgPage = int(BgPage)
BgPage += 1
print("下載已完成,謝謝使用!")
def downLoadPage(self, url):
req = requests.get(url=url, headers=self.headers)
res = req.text
html = etree.HTML(res)
link = html.xpath('//a[@class="j_th_tit "]/@href')
for item in link:
fullurl = "http://www.tieba.com" + item
print("正在查找{}中圖片鏈接".format(fullurl))
self.downLoadImageLink(fullurl)
def downLoadImageLink(self, url):
req2 = requests.get(url=url, headers=self.headers)
res2 = req2.text
# print(res2)
html2 = etree.HTML(res2)
imagelink = html2.xpath('//img[@class="BDE_Image"]//@src')
for image in imagelink:
print("正在下載圖片{}".format(image))
self.dowLoadImage(image)
def dowLoadImage(self, url):
req1 = requests.get(url=url, headers=self.headers)
res1 = req1.content
filename = url[-9:]
with open("./Image/" + filename, "wb") as f:
f.write(res1)
if __name__ == '__main__':
tiebaImage = TBImage()
tiebaImage.purpose()