Python: download file

def unGz(file_name):
    """
    ungz zip file  import gzip

    :param file_name: 
    :return: 
    """
    f_name = file_name.replace(".gz", "")
    #获取文件的名称，去掉
    g_file = gzip.GzipFile(file_name)
    #创建gzip对象
    open(f_name, "w+").write(g_file.read())
    #gzip对象用read()打开后，写入open()建立的文件里。
    g_file.close()
    #关闭gzip对象

def unZip(file_name):
    """
    unzip zip file  import zipfile
    :param file_name: 
    :return: 
    """
    zip_file = zipfile.ZipFile(file_name)
    if os.path.isdir(file_name + "_files"):
        pass
    else:
        os.mkdir(file_name + "_files")
    for names in zip_file.namelist():
        zip_file.extract(names,file_name + "_files/")
    zip_file.close()

def getLink(url:str):
    """
    
    :param url: 
    :return: 
    """
    headers = {
        'Accept-Language': 'zh-CN,zh;q=0.9,en-CN;q=0.8,en;q=0.7,zh-TW;q=0.6',
        'Cookie': 'rewardsn=; wxtokenkey=777',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
    }

    # 访问链接并从json中提取微信推文链接
    response = requests.get(url, headers=headers)
    data = json.loads(response.text)
    link = data['getalbum_resp']['article_list'][0]['url']
    return link

def getZipUrl(link:str):
    """
    
    :param link: 
    :return: 
    """
    
    # 访问微信推文链接并解析网页
    response = requests.get(link)
    soup = BeautifulSoup(response.text, 'html.parser')

    # 提取文本中的zip链接，正则匹配以https://开头以.zip后缀的链接
    content = soup.find('div', {'id': 'js_content'}).get_text()
    zipurl = re.findall(r'https://.*?\.zip', content)

    return zipurl

def requestsDownload(url:str,newfile:str):
    """
    
    :param url: 
    :param newfile: 
    :return: 
    """
    content = requests.get(url).content
    with open(newfile, 'wb') as file:
        file.write(content)




def print_hi(name):
    # Use a breakpoint in the code line below to debug your script.
    print(f'Hi, {name} world,geovindu,涂聚文')  # Press Ctrl+F8 to toggle the breakpoint.


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    #print_hi('PyCharm,python language')
    # 从微信推文json数据中获得最新一期IP库的发布文章链接
    url = 'https://mp.weixin.qq.com/mp/appmsgalbum?__biz=Mzg3Mzc0NTA3NA==&action=getalbum&album_id=2329805780276838401&f=json'
    downurl=""
    try:
        link = getLink(url)
        if link:
            zip_url = getZipUrl(link)
            if len(zip_url)>0:
                for i in range(0,len(zip_url)):
                    downurl=zip_url[0]
                    print(zip_url[i])
            else:
                print("没有找到zip链接")
        else:
            print("没有找到微信推文链接")
    except Exception as e:
        print("出现错误：", e)

    requestsDownload(downurl,"geovindu.zip")

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

Python: download file

再谈23种设计模式（3）：行为型模式（学习笔记）

Power Automate Desktop 安装完，登录后老是提示one driver 错误

微前端学习笔记(4):从微前端到微模块之EMP与hel-micro方案探索

微前端学习笔记（1）：微前端总体架构概述，从微服务发微

985 硕士程序员，空窗 4 个月没有 Offer！

一文搞懂 Spring 循环依赖

赛博斗地主——使用大语言模型扮演Agent智能体玩牌类游戏。

VScode右键打开(添加到右键)

记一次 .NET某工控视觉自动化系统卡死分析

WindowsServer--SQL Server搭建主从同步实现读写分离 - 事务性分发

Python: SunMoonTimeCalculator

CSharp: SunCalc for calculating

CSharp: SunTimeCalculator

python： Binary Number

JavaScript:根據經緯度計算太陽、月亮升起和落下時間

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結