Python: download file

 

def unGz(file_name):
    """
    ungz zip file  import gzip

    :param file_name: 
    :return: 
    """
    f_name = file_name.replace(".gz", "")
    #獲取文件的名稱,去掉
    g_file = gzip.GzipFile(file_name)
    #創建gzip對象
    open(f_name, "w+").write(g_file.read())
    #gzip對象用read()打開後,寫入open()建立的文件裏。
    g_file.close()
    #關閉gzip對象

def unZip(file_name):
    """
    unzip zip file  import zipfile
    :param file_name: 
    :return: 
    """
    zip_file = zipfile.ZipFile(file_name)
    if os.path.isdir(file_name + "_files"):
        pass
    else:
        os.mkdir(file_name + "_files")
    for names in zip_file.namelist():
        zip_file.extract(names,file_name + "_files/")
    zip_file.close()

  

 

def getLink(url:str):
    """
    
    :param url: 
    :return: 
    """
    headers = {
        'Accept-Language': 'zh-CN,zh;q=0.9,en-CN;q=0.8,en;q=0.7,zh-TW;q=0.6',
        'Cookie': 'rewardsn=; wxtokenkey=777',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
    }

    # 訪問鏈接並從json中提取微信推文鏈接
    response = requests.get(url, headers=headers)
    data = json.loads(response.text)
    link = data['getalbum_resp']['article_list'][0]['url']
    return link

def getZipUrl(link:str):
    """
    
    :param link: 
    :return: 
    """
    
    # 訪問微信推文鏈接並解析網頁
    response = requests.get(link)
    soup = BeautifulSoup(response.text, 'html.parser')

    # 提取文本中的zip鏈接,正則匹配以https://開頭以.zip後綴的鏈接
    content = soup.find('div', {'id': 'js_content'}).get_text()
    zipurl = re.findall(r'https://.*?\.zip', content)

    return zipurl

def requestsDownload(url:str,newfile:str):
    """
    
    :param url: 
    :param newfile: 
    :return: 
    """
    content = requests.get(url).content
    with open(newfile, 'wb') as file:
        file.write(content)




def print_hi(name):
    # Use a breakpoint in the code line below to debug your script.
    print(f'Hi, {name} world,geovindu,塗聚文')  # Press Ctrl+F8 to toggle the breakpoint.


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    #print_hi('PyCharm,python language')
    # 從微信推文json數據中獲得最新一期IP庫的發佈文章鏈接
    url = 'https://mp.weixin.qq.com/mp/appmsgalbum?__biz=Mzg3Mzc0NTA3NA==&action=getalbum&album_id=2329805780276838401&f=json'
    downurl=""
    try:
        link = getLink(url)
        if link:
            zip_url = getZipUrl(link)
            if len(zip_url)>0:
                for i in range(0,len(zip_url)):
                    downurl=zip_url[0]
                    print(zip_url[i])
            else:
                print("沒有找到zip鏈接")
        else:
            print("沒有找到微信推文鏈接")
    except Exception as e:
        print("出現錯誤:", e)

    requestsDownload(downurl,"geovindu.zip")
    

  

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章