一個簡單的XSS檢測工具

檢測的主要流程：
發送隨機flag -> 確定參數回顯 -> 確定回顯位置以及情況(html，js語法解析)
-> 根據情況根據不同payload探測 -> 使用html，js語法解析確定是否多出來了標籤，屬性，js語句等等。

在拿到一個域名後開始深度檢索存在的所有鏈接(href,src)，在經過數據清洗後得到有用的頁面(去除js等其他目錄)

def SearchDir(TestUrl):
    header = {
        'User-Agent':
            'Mozilla / 5.0(Windows NT 10.0; Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 73.0.3683.75Safari / 537.36'
    }
    html = requests.get(TestUrl,header)
    print(html.text)
    _html = BeautifulSoup(html.text,'lxml')
    href_all = re.findall(r"href=\".+?\"", str(_html))
    src_all = re.findall(r"src=\".+?\"", str(_html))
    NoNeedDir = ['.js','.css','.jpg','.png','javascript','.gif','.bmp']
    for i in href_all:
        #判斷是否爲同一域名
        flag_http = 0
        if 'http://' in i :
            if TestUrl in i:
                flag_http=1
            else:
                continue
        i = i.replace('href=','')
        i = i.replace('"',"")
        flag = 0
        for p in NoNeedDir:
            if  p in i:
                flag=1
                break
        if flag==0:
            if flag_http == 1:
                if i in urlList:
                    continue
                urlList.append(i)
            else:
                url = TestUrl + i
                if url in urlList:
                    continue
                urlList.append(url)
    for i in src_all:
        flag_http = 0
        if 'http://' in i :
            if TestUrl in i:
                flag_http=1
            else:
                continue
        i = i.replace('src=', '')
        i = i.replace('"', "")
        flag = 0
        for p in NoNeedDir:
            if p in i:
                flag = 1
                break
        if flag == 0:
            if flag_http==1:
                urlList.append(i)
            else:
                urlList.append(TestUrl+i)
                #檢測玩href與src中的目錄後存入urlList列表中

下一步開始檢測存在的參數。這裏主要以兩種形式。1.內置一些常見參數
這裏是根據大佬的一篇博客裏寫的作爲借鑑。
2.進行html分析。這裏爬蟲使用的是BeautifulSoup。(在確定回顯的時候還是HTML語法樹比較好用。)
在檢測參數時順序:
1).檢測url裏存在的參數
2).form標籤裏的參數例如：提取input 標籤裏的name值(如果經過js處理後的是檢測不出來的)
同時提取參數提交方式(get post)

blindParams = [  # common paramtere names to be bruteforced for parameter discovery
    'redirect', 'redir', 'url', 'link', 'goto', 'debug', '_debug', 'test', 'get', 'index', 'src', 'source', 'file',
    'frame', 'config', 'new', 'old', 'var', 'rurl', 'return_to', '_return', 'returl', 'last', 'text', 'load', 'email',
    'mail', 'user', 'username', 'password', 'pass', 'passwd', 'first_name', 'last_name', 'back', 'href', 'ref', 'data', 'input',
    'out', 'net', 'host', 'address', 'code', 'auth', 'userid', 'auth_token', 'token', 'error', 'keyword', 'key', 'q', 'query', 'aid',
    'bid', 'cid', 'did', 'eid', 'fid', 'gid', 'hid', 'iid', 'jid', 'kid', 'lid', 'mid', 'nid', 'oid', 'pid', 'qid', 'rid', 'sid',
    'tid', 'uid', 'vid', 'wid', 'xid', 'yid', 'zid', 'cal', 'country', 'x', 'y', 'topic', 'title', 'head', 'higher', 'lower', 'width',
    'height', 'add', 'result', 'log', 'demo', 'example', 'message']

#分別將提取的到參數存入字典中
def Collect_parameters(testUrl):

    #http://www.xylbh.cn/yzsp.aspx?w=83&code=100230
    html = requests.get(testUrl)
    '''
    檢測html，input標籤，提交數據如果相應在頁面中，則構建payload
    '''
    _html = BeautifulSoup(html.text, 'lxml')
    all_form = _html.find_all('form')
    for i in all_form:
        #print(i)
        #print('-----------------------------')
        #method[0]是action
        #method[1]是method
        Action_method = checkMethod(str(i))
        if Action_method:
            pass
        else:
            continue
        #如果是get 顯示在url裏，如果是post顯示在DATA裏
        if Action_method[1]=="get":
            Parameter = checkparameter(str(i))
            # print(Action_method[0])
            # print(Parameter)
            onedic = {'action': Action_method[0], 'method': Action_method[1], 'params': Parameter, 'url': testUrl}
            #print(onedic)
            if onedic in AMDic_get:
                pass
            else:
                AMDic_get.append(onedic)
        elif Action_method[1]=='post':
            PostData = checkparameter(str(i))
            # print(Action_method[1])
            # print(Action_method[0])
            # print(PostData)
            onedic={'action':Action_method[0],'method':Action_method[1],'params':PostData,'url':testUrl}
            if onedic in AMDic_post:
                pass
            else:
                AMDic_post.append(onedic)

確定回顯:

class myHTMLParser(HTMLParser):
    def __init__(self,flag):
        HTMLParser.__init__(self)
        self.startag = ''
        self.endtag = ''
        self.falg = flag

        self.loaction = []
    def handle_decl(self, decl):
        HTMLParser.handle_decl(self, decl)

    def handle_starttag(self, tag, attrs):
        #由於檢測時會出現誤差，這裏使用html樹和BeautifulSoup一起檢測提高準確率
        HTMLParser.handle_starttag(self, tag, attrs)
        self.startag=tag
        flagtag = ''
        if attrs:
            flagtag+='<'+tag
            #print(attrs)
            for i in attrs:
                for p in i:
                    try:
                        flagtag+=p
                    except:
                        pass
            if self.falg in flagtag:
                print(flagtag)
                self.loaction.append('intag')
    def handle_endtag(self, tag):
        HTMLParser.handle_endtag(self, tag)
        #print('</' + tag + '>')
        self.endtag = tag
    def handle_data(self, data):
        HTMLParser.handle_data(self, data)
        if self.falg in data:
            # print(self.startag)
            # print(data)
            if 'script' in self.startag:
                self.loaction.append('script')
            else:
                self.loaction.append('outtag')
        # 下面的代碼是處理類似於<br/>這樣的沒有閉合的標籤
    def handle_startendtag(self, tag, attrs):
        HTMLParser.handle_startendtag(self, tag, attrs)
    def handle_comment(self, data):
        #在註釋中的內容
        HTMLParser.handle_comment(self, data)
    def close(self):
        HTMLParser.close(self)
        return self.loaction