導出gitlab上所有issuse 生成列表

需求 : 導出 gitlab 某個項目的所有issuse  , 

分析: gitlab 並不具備 導出所有問題列表的能力,提供的API 也不友好, 遂自己用python爬取所有問題列表,具體源碼如下:


import requests
import json
import time
#import itchat
import random
import _thread
import os
from urllib.parse import urlencode
from bs4 import BeautifulSoup
import ssl

#修改此處爲自己登陸的Cookie
localCookie = "sidebar_collapsed=false; _gitlab_session=e0cff180434843a933739a0427763816; event_filter=push"
#修改此處爲自己保存輸出文件的目錄
outputdir = "D:/yuxl/"


ssl._create_default_https_context = ssl._create_unverified_context


def parseContentList(content):
    soup = BeautifulSoup(content, 'html.parser')
    trs = soup.find_all(class_="issue-title-text")
      
    listret = []
    for tr in trs:
        listr = []
        str1 = tr.text
        sps = str1.split("\n")
        nodes = tr.find_all("a")
        listr.append(sps[1])

        for node in nodes:
            if node["href"] and len(node["href"]) > 20:
                strids = str(node["href"]).split('/')
                c = len(strids)-1
                strtmp = strids[c]
                listr.append(strtmp)
        listret.append(listr)
    return listret

def downLoad(filename, paraMac):
    totoname = 0
    global localCookie
    headers = {
        "Host": paraMac['host'],
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Cache-Control": "max-age=0",
        "Connection": "keep-alive",
        "Cookie": localCookie,
        #"Host": "192.168.31.238:10088",
        "If-None-Match": "W/\"e92673383cfe9f2384cf6e65329603e2\"",
        #"Referer": "http://192.168.31.238:10088/jing/kernel_2018_05/issues/936",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36",
        }

    session = requests.session()

    strurl = ""
    for val in paraMac.keys():
        strurl+= paraMac[val]
        strurl+="/"
    strurl = strurl[0:len(strurl) - 1]

    #組織路徑
    global outputdir
    txtpath =  outputdir + filename + str(time.strftime("%Y_%m_%d", time.localtime()) ) + ".txt"
    f = open(txtpath, "w", encoding="utf-8")


    for var in range(1,1000):
        surltmp = strurl.replace("{chijing}",str(var));
        content = session.get(surltmp, params=paraMac, headers=headers)
        content = content.text
       
        listret = parseContentList(content)

        if len(listret) < 1:
            f.close()
            return
                
        #print(listret)

        for listr in listret:
            f.write(listr[1])
            f.write(":")
            f.write(listr[0])
            f.write("\t\n")
        f.flush()
       
    f.close()

def start():
    #修改此處可以下載其他項目所有問題
    paraKernel = {
                "http":"http:/",
                "host":"192.168.31.238:10088",#服務地址端口
                "username": "jing",#項目創建人的名字
                "projectname": "kernel_2018_05",#項目名稱
                "issues": "issues",#問題
                "other": "?scope=all&utf8=%E2%9C%93&state=all&page={chijing}",#分頁參數
    }

    mydict = {
        "內核":paraKernel,
    }
  
    for key in mydict.keys():
        downLoad(key, mydict[key])




#1: downLoad函數中的cookie
#2:所有涉及到路徑的地方,所有的路徑都保證路徑存在

start()

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章