有一個頭疼的問題一直困擾着我,就是進入教務系統要輸入兩次密碼,而且是不同的密碼,還好是第三方教務系統,方正教務系統,但各所學校的cookie是不同的,所以首先要得到學校的cookie.然後就是配置re
urllib.request
urllib.parse
http.cookiejar
bs4
getpass
pickle
os
platform
subprocess
bs4 BeautifulSoup
prettytable PrettyTable
這些庫文件,當然,作爲一個爬蟲,一定要僞裝,只有僞裝成服務器,接下來就是代碼。
#song
import re
import urllib.request
import urllib.parse
import http.cookiejar
import bs4
import getpass
import pickle
import os
import platform
import subprocess
from bs4 import BeautifulSoup
from prettytable import PrettyTable
#準備Cookie和opener,因爲cookie存於opener中,所以以下所有網頁操作全部要基於同一個opener
cookie = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookie))
#判斷操作系統類型
def getOpeningSystem():
return platform.system()
#判斷是否聯網
def isConnected():
userOs = getOpeningSystem()
if userOs == "Windows":
subprocess.check_call(["ping", "-n", "2", "www.baidu.com"], stdout=subprocess.PIPE)
else:
subprocess.check_call(["ping", "-c", "2", "www.baidu.com"], stdout=subprocess.PIPE)
#登陸
def login():
#構造表單
params = {
'txtUserName' : sid,
'Textbox1' : '',
'Textbox2': spwd,
'RadioButtonList1':'學生',
'Button1' : '',
'lbLanguage':'',
'hidPdrs':'',
'hidsc':'',
}
#獲取驗證碼
res = opener.open('http://218.5.241.21/checkcode.aspx').read()
with open(r'D:\Program Files\ScoreHelper\code.jpg','wb') as file:
file.write(res)
img = Image.open(r'D:\Program Files\ScoreHelper\code.jpg')
img.show()
vcode = input('請輸入驗證碼:')
img.close()
params['txtSecretCode'] = vcode
#獲取ViewState
response = urllib.request.urlopen('http://218.5.241.21')
html = response.read().decode('gb2312')
viewstate = re.search('<input type="hidden" name="__VIEWSTATE" value="(.+?)"',html)
params['__VIEWSTATE'] = viewstate.group(1)
#嘗試登陸
loginurl = 'http://218.5.241.21/default2.aspx'
data = urllib.parse.urlencode(params).encode('gb2312')
response = opener.open(loginurl,data)
if response.geturl() == 'http://218.5.241.21/default2.aspx':
print('登陸失敗,可能是姓名、學號、密碼、驗證碼填寫錯誤!')
return False
else:
return True
#獲取成績
def getScore():
#構造url
url = ''.join([
'http://218.5.241.21/xscj.aspx',
'?xh=',
sid,
'&xm=',
urllib.parse.quote(sname),
'&gnmkdm=N121604',
])
#構建查詢全部成績表單
params = {
'ddlXN':'',
'ddlXQ':'',
'txtQSCJ':'0',
'txtZZCJ':'100',
'Button1':'在校學習成績',
}
#構造Request對象,填入Header,防止302跳轉,獲取新的View_State
req = urllib.request.Request(url)
req.add_header('Referer','http://218.5.241.21/default2.aspx')
req.add_header('Origin','http://218.5.241.21')
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36')
response = opener.open(req)
html = response.read().decode('gb2312')
viewstate = re.search('<input type="hidden" name="__VIEWSTATE" value="(.+?)"',html)
params['__VIEWSTATE'] = viewstate.group(1)
#查詢所有成績
req = urllib.request.Request(url,urllib.parse.urlencode(params).encode('gb2312'))
req.add_header('Referer','http://218.5.241.21/default2.aspx')
req.add_header('Origin','http://218.5.241.21')
response = opener.open(req)
soup = BeautifulSoup(response.read().decode('gb2312'),'html.parser')
html = soup.find('table',class_='datelist')
print('你的所有成績如下:')
#指定要輸出的列,原網頁的表格列下標從0開始
outColumn = [1,2,3,4,6,7,8]
#用於標記是否是遍歷第一行
flag = True
#根據DOM解析所要數據,首位的each是NavigatableString對象,其餘爲Tag對象
#遍歷行
for each in html:
columnCounter = 0
column = []
if(type(each) == bs4.element.NavigableString):
pass
else:
#遍歷列
for item in each.contents:
if(item != '\n'):
if columnCounter in outColumn:
#要使用str轉換,不然陷入copy與deepcopy的無限遞歸
column.append(str(item.contents[0]).strip())
columnCounter += 1
if flag:
table = PrettyTable(column)
flag = False
else:
table.add_row(column)
print(table)
if __name__ == '__main__':
try:
print('歡迎使用昆明理工大學津橋學院成績查詢助手!')
print('正在檢查網絡...')
isConnected()
with open(r'D:\Program Files\ScoreHelper\uinfo.bin','rb') as file:
udick = pickle.load(file)
sname = udick['sname']
sid = udick['sid']
spwd = udick['spwd']
while(not login()):
continue
getScore()
except FileNotFoundError:
os.mkdir(r'D:\Program Files\ScoreHelper')#注:針對Windows目錄結構
print('這是你第一次使用,請按提示輸入信息,以後可不必再次輸入~')
sid = input('請輸入學號:')
sname = input('請輸入姓名:')
#隱藏密碼
spwd = getpass.getpass('請輸入密碼:')
while(not login()):
sname = input('請輸入姓名:')
sid = input('請輸入學號:')
spwd = getpass.getpass('請輸入密碼:')
getScore()
udick = {'sname':sname,'sid':sid,'spwd':spwd}
file = open(r'D:\Program Files\ScoreHelper\uinfo.bin','wb')
pickle.dump(udick,file)
file.close()
except subprocess.CalledProcessError:
print("網絡連接不正常!請檢查網絡!")
except:
print("失敗!可能是你沒有完成教學評價!沒有完成教學評價則無法查看成績!")
finally:
input('Done!請按任意鍵退出')
最後運行出來的結果估計是正則沒有用對,所以沒有爬出成績,但大體思路就是這樣