python爬蟲入門

import re
import requests
def fun(first):
    #http://www.mmonly.cc/gqbz/list_41_345.html
    url = "http://www.mmonly.cc/gqbz/list_41_"+first+".html"#10-70 1-7
    res = requests.get(url=url)
    html=res.text
    with open("zhenze.txt", "rt", encoding="utf-8") as in_file:
        zhenze = in_file.read()
    pic_url = re.findall(zhenze,html,re.S)
    print(pic_url)
    for i in range (1,len(pic_url)-1):
        picture=requests.get(pic_url[i])
        with open(first+"_"+str(i) +".jpg", "wb")as  f:
            f.write(picture.content)

def getfun():

    for i in range(10,300):
        fun(str(i))

if __name__ == "__main__":
    getfun()

下面是zhenze.txt的內容 

<img.*?src="(.*?\.jpg)"

運行,爬蟲完畢,繼續學習~

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章