最近一直在opengrok上看代碼,該工具搜索,查看調用等都很方便,不好是放在服務器上,有時候網絡不好時,看起來很不方便。便想着把代碼下載到本地,網絡不好時在本地看。首先是網上找下載工具,都沒有合適的。最後沒辦法只好自己動手寫一個腳本,從網上找了個python腳本,根據我的情況修改了一下,運行良好。
代碼如下,因爲是在我的環境上運行的,所以對於腳本中有些提取目錄和下載鏈接的關鍵詞要根據你的情況來修改。
#!/usr/bin/python
import requests
import re
import os
import sys
def help(script):
text = 'python3 %s <link_address> <path>' % script
print(text)
def get_file(url,path):##文件下載函數
content = requests.get(url).text
file_str1 = "class=\"p\""
if file_str1 in content:
#Get download link from html, you should change the key word according to your html
sub_url = re.findall('class="p".*?href="(/source/download.*?)"',content)
length = len(sub_url)
print("\033[31m======Total %s files, will download=======\033[0m" %length)
for sub_path in sub_url:
cpath = sub_path[sub_path.rfind('/Qualcomm/'):]
fileName = sub_path.split('/')[-1]
print("downloading -> %-60s" %(fileName), end=" ")
dir_path = path + cpath
#To check whether file is exist or not, you can delete this if you want download all file again
if os.path.isfile(dir_path):
print("\033[33mFile is exists already, ignoring!\033[0m")
continue
#it can not extract IP or domain from html, so we get it from input URL
domainName = url[0:url.rfind('/source/xref/Qualcomm')]
res = requests.get(domainName+sub_path)
res.raise_for_status() # 確保程序在下載失敗時停止
playFile = open(path+cpath, 'wb')
for chunk in res.iter_content(100000):
playFile.write(chunk)
playFile.close()
print("\033[32mDone\033[0m")
def get_dir(url,path): #文件夾處理邏輯
content = requests.get(url).text
dir_str1 = "class=\"r\"" #directory mark in html
file_str1 = "class=\"p\"" #single file mark in html
if dir_str1 in content:
sub_url = re.findall('class="r".*?href="(/source/history.*?)"',content)
for sub_path in sub_url:
path_slice=sub_path[sub_path.rfind('/Qualcomm/'):]
if not os.path.exists(path+path_slice):
print("will create directory %s" %(path+path_slice))
os.makedirs(path+path_slice)
i = sub_path.split('/')[-1]
get_dir(url+"/"+i,".")
if file_str1 in content:
get_file(url,path)
if __name__ == '__main__':
if (len(sys.argv) < 2):
help(sys.argv[0])
exit(0)
else:
get_dir(sys.argv[1],".")