#coding=utf-8
'''
Created on 2013-7-17
@author: zinan.zhang
'''
import re
import time
import httplib2
import urllib
from bs4 import BeautifulSoup
#路徑
savePath = 'F://TDDOWNLOAD//aNO.4//'
#獲取url
def url_xunhuan(url,list):
return url + list
#下載圖片的時候
time.sleep(0.5)#先sleep,再讀取數據
"""根據url下載文件,文件名自動從url獲取"""
def gDownload(url,savePath):
#參數檢查,現忽略
fileName = gGetFileName(url)
#fileName =gRandFilename('jpg')
gDownloadWithFilename(url,savePath,fileName)
"""根據url獲取文件名"""
def gGetFileName(url):
if url==None: return None
if url=="" : return ""
arr=url.split("/")
return arr[len(arr)-1]
"""根據url下載文件,文件名參數指定"""
def gDownloadWithFilename(url,savePath,file):
#參數檢查,現忽略
try:
urlopen=urllib.URLopener()
fp = urlopen.open(url)
data = fp.read()
fp.close()
file=open(savePath + file,'w+b')
file.write(data)
print "下載成功:"+ url
file.close()
except IOError:
print "下載失敗:"+ url
#初始化頁面,提取必要信息
def getPage(url):
userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
headers = {
'User-Agent': userAgent,
'Accept-Language': 'zh-CN,zh;q=0.8',
'Accept': 'text/css,*/*;q=0.1',
}
http = httplib2.Http(".cache")
_, content = http.request(url, 'GET',headers=headers)
return content
#循環下載列表固定的 ---就是wallpaper,enterdesk等網站
def xuanhuan_down_list():
list=[]
url = 'http://tupian.enterdesk.com/2013/mxy/0311/4/'
temp=10
for i in range(temp):
list.append(str(i)+'.jpg')
for i in range(temp):
url_list = url_xunhuan(url,list[i])
gDownload(url_list,savePath)
time.sleep(0.2)
#爬網獲取url
def spider_url(url):
page = getPage(url)
dom = BeautifulSoup(page)
srcs = [x['src'] for x in dom.findAll('img')]
#成功獲取url
return srcs[0]
#循環下載列表隨機的 ---就是ZOL桌面壁紙下載
def xuanhuan_down_suiji():
try:
temp=25
i=88
j=i
while (i <= j+temp):
#http://desk.zol.com.cn/showpic/1920x1200_30688_33.html
url = 'http://desk.zol.com.cn/showpic/1920x1200_12'+str(i)+'_37.html'
src_url = spider_url(url)
gDownload(src_url,savePath)
time.sleep(0.1)
i+=1
except IOError:
print "url獲取失敗!"
if __name__ == "__main__":
#gDownload(url,savePath)
'''
#批量下載序號固定的圖片
xuanhuan_down_list()
'''
'''
#批量下載隱藏jpg路徑的文件
xuanhuan_down_suiji()
'''
#批量下載文件
print '下載完成!'