def unGz(file_name): """ ungz zip file import gzip :param file_name: :return: """ f_name = file_name.replace(".gz", "") #獲取文件的名稱,去掉 g_file = gzip.GzipFile(file_name) #創建gzip對象 open(f_name, "w+").write(g_file.read()) #gzip對象用read()打開後,寫入open()建立的文件裏。 g_file.close() #關閉gzip對象 def unZip(file_name): """ unzip zip file import zipfile :param file_name: :return: """ zip_file = zipfile.ZipFile(file_name) if os.path.isdir(file_name + "_files"): pass else: os.mkdir(file_name + "_files") for names in zip_file.namelist(): zip_file.extract(names,file_name + "_files/") zip_file.close()
def getLink(url:str): """ :param url: :return: """ headers = { 'Accept-Language': 'zh-CN,zh;q=0.9,en-CN;q=0.8,en;q=0.7,zh-TW;q=0.6', 'Cookie': 'rewardsn=; wxtokenkey=777', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36' } # 訪問鏈接並從json中提取微信推文鏈接 response = requests.get(url, headers=headers) data = json.loads(response.text) link = data['getalbum_resp']['article_list'][0]['url'] return link def getZipUrl(link:str): """ :param link: :return: """ # 訪問微信推文鏈接並解析網頁 response = requests.get(link) soup = BeautifulSoup(response.text, 'html.parser') # 提取文本中的zip鏈接,正則匹配以https://開頭以.zip後綴的鏈接 content = soup.find('div', {'id': 'js_content'}).get_text() zipurl = re.findall(r'https://.*?\.zip', content) return zipurl def requestsDownload(url:str,newfile:str): """ :param url: :param newfile: :return: """ content = requests.get(url).content with open(newfile, 'wb') as file: file.write(content) def print_hi(name): # Use a breakpoint in the code line below to debug your script. print(f'Hi, {name} world,geovindu,塗聚文') # Press Ctrl+F8 to toggle the breakpoint. # Press the green button in the gutter to run the script. if __name__ == '__main__': #print_hi('PyCharm,python language') # 從微信推文json數據中獲得最新一期IP庫的發佈文章鏈接 url = 'https://mp.weixin.qq.com/mp/appmsgalbum?__biz=Mzg3Mzc0NTA3NA==&action=getalbum&album_id=2329805780276838401&f=json' downurl="" try: link = getLink(url) if link: zip_url = getZipUrl(link) if len(zip_url)>0: for i in range(0,len(zip_url)): downurl=zip_url[0] print(zip_url[i]) else: print("沒有找到zip鏈接") else: print("沒有找到微信推文鏈接") except Exception as e: print("出現錯誤:", e) requestsDownload(downurl,"geovindu.zip")