import re from urllib import parse from urllib import request from bs4 import BeautifulSoup # 使用get方法發送請求 reqGet = request.Request("http://www.baidu.com") reqGet.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36") respGet = request.urlopen(reqGet) #print(respGet.read().decode("utf-8")) # 使用post方法發送請求 reqPost = request.Request("http://www.dev4free.com/devbuy_web/java/manageplatform/login.action") postData = parse.urlencode([("name","test"),("password","test")]) reqPost.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36") respPost = request.urlopen(reqPost,data=postData.encode("utf-8")) # print(respPost.read().decode("utf-8")) #解析http://www.dev4free.com首頁所有的圖片地址 responese = request.urlopen("http://www.dev4free.com").read().decode("utf-8") soup = BeautifulSoup(responese,"html.parser") images = soup.findAll("img", src=re.compile("^(images)")) for imagesUrl in images: print("http://www.dev4free.com/" + imagesUrl["src"])
python爬蟲抓取-helloworld
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.