打印指定網頁數據
s = urllib.urlopen("http://www.sina.com")
for i in range(10):
print("line %d: %s " % (i + 1, s.readline()))
輸出列表
def print_list(list):
for i in list:
print(i)
打印所有行
lists = s.readlines()
print_list(lists)
print(s.getcode())
下載數據(帶進度條)
def retrieve():
fname, msg = urllib.urlretrieve("http://www.sina.com", "index.html", reporthook=progress)
print(fname)
print_list(msg.items())
def progress(blk, blk_size, total_size):
print("%d/%d - %.02f%%" % (blk * blk_size, total_size, (float)(blk * blk_size) * 100 / total_size))
編碼成URL
def urlencode():
params = {"score": 100, "name": "爬蟲", "comment": "very good"}
qs = urllib.urlencode(params)
print(qs)
URL解碼成數據
import urlparse
def parse_qs():
url = "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=python&rsv_pq=a1a3e55800046021&rsv_t=3bcfmLc%2Fd4M1X4qWGjXjGFAEJByJA1q92Gki%2BNMAgWdmrg8xcfkWgJM6t74&rsv_enter=1&rsv_sug3=9&rsv_sug1=7&rsv_sug7=100&rsv_sug2=0&inputT=7085&rsv_sug4=7123"
result = urlparse.urlparse(url)
print(result)
params = urlparse.parse_qs(result.query)
print(params)