需求 : 導出 gitlab 某個項目的所有issuse ,
分析: gitlab 並不具備 導出所有問題列表的能力,提供的API 也不友好, 遂自己用python爬取所有問題列表,具體源碼如下:
import requests
import json
import time
#import itchat
import random
import _thread
import os
from urllib.parse import urlencode
from bs4 import BeautifulSoup
import ssl
#修改此處爲自己登陸的Cookie
localCookie = "sidebar_collapsed=false; _gitlab_session=e0cff180434843a933739a0427763816; event_filter=push"
#修改此處爲自己保存輸出文件的目錄
outputdir = "D:/yuxl/"
ssl._create_default_https_context = ssl._create_unverified_context
def parseContentList(content):
soup = BeautifulSoup(content, 'html.parser')
trs = soup.find_all(class_="issue-title-text")
listret = []
for tr in trs:
listr = []
str1 = tr.text
sps = str1.split("\n")
nodes = tr.find_all("a")
listr.append(sps[1])
for node in nodes:
if node["href"] and len(node["href"]) > 20:
strids = str(node["href"]).split('/')
c = len(strids)-1
strtmp = strids[c]
listr.append(strtmp)
listret.append(listr)
return listret
def downLoad(filename, paraMac):
totoname = 0
global localCookie
headers = {
"Host": paraMac['host'],
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"Cookie": localCookie,
#"Host": "192.168.31.238:10088",
"If-None-Match": "W/\"e92673383cfe9f2384cf6e65329603e2\"",
#"Referer": "http://192.168.31.238:10088/jing/kernel_2018_05/issues/936",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36",
}
session = requests.session()
strurl = ""
for val in paraMac.keys():
strurl+= paraMac[val]
strurl+="/"
strurl = strurl[0:len(strurl) - 1]
#組織路徑
global outputdir
txtpath = outputdir + filename + str(time.strftime("%Y_%m_%d", time.localtime()) ) + ".txt"
f = open(txtpath, "w", encoding="utf-8")
for var in range(1,1000):
surltmp = strurl.replace("{chijing}",str(var));
content = session.get(surltmp, params=paraMac, headers=headers)
content = content.text
listret = parseContentList(content)
if len(listret) < 1:
f.close()
return
#print(listret)
for listr in listret:
f.write(listr[1])
f.write(":")
f.write(listr[0])
f.write("\t\n")
f.flush()
f.close()
def start():
#修改此處可以下載其他項目所有問題
paraKernel = {
"http":"http:/",
"host":"192.168.31.238:10088",#服務地址端口
"username": "jing",#項目創建人的名字
"projectname": "kernel_2018_05",#項目名稱
"issues": "issues",#問題
"other": "?scope=all&utf8=%E2%9C%93&state=all&page={chijing}",#分頁參數
}
mydict = {
"內核":paraKernel,
}
for key in mydict.keys():
downLoad(key, mydict[key])
#1: downLoad函數中的cookie
#2:所有涉及到路徑的地方,所有的路徑都保證路徑存在
start()