論文：

題目：Personal Information in Passwords and Its Security Implications

鏈接：https://ieeexplore.ieee.org/abstract/document/7931642

實驗重現：

# encoding: utf-8
# author: kaiyouhu


import pandas as pd
import numpy as np
import re


class Passenger:

    # passenger_list = []

    def __init__(self, login_email, password, name, id_number, username, phone, email):
        self.login_email = login_email
        self.password = password
        self.name = name
        self.id_number = id_number
        self.username = username
        self.phone = phone
        self.email = email
        # self.passenger_list = []

    # def add(self, passenger):
    #     self.passenger_list.append(passenger)


def read_data(path):
    with open(path) as f:
        data = f.read()
        informations = data.splitlines()

        for index in range(len(informations)):
            informations[index] = informations[index].split('----')

    # print sum of information
    informations_sum = len(informations)
    print('sum: ' + str(informations_sum))
    table1_header = ['RANK', 'Password', 'Amount', 'Percentage']
    rank_list = list(range(1, 11))
    password_list = ['123456', 'a123456', '123456a', '5201314', '111111',
                     'woaini1314', 'qq123456', '123123', '000000', '1qaz2wsx']
    amount_list1 = []
    percentage_list1 = []

    for index, password in enumerate(password_list):
        count = 0
        for information in informations:
            if information[1] == password:
                count += 1
        amount_list1.append(count)

    for amount in amount_list1:
        percentage_list1.append(float(amount/informations_sum))

    result = np.array(list(zip(rank_list, password_list, amount_list1, percentage_list1)), order='C')
    df1 = pd.DataFrame(result, columns=table1_header)
    print(df1)

    structure_list = ['D7', 'D8', 'D6', 'L2D7', 'L3D6', 'L1D7', 'L2D6', 'L3D7', 'D9', 'L2D8']
    structure_list_regex = ['^\d{7}$', '^\d{8}$', '^\d{6}$', '^[a-zA-Z]{2}\d{7}$', '^[a-zA-Z]{3}\d{6}$',
                            '^[a-zA-Z]{1}\d{7}$', '^[a-zA-Z]{2}\d{6}$', '^[a-zA-Z]{3}\d{7}$',
                            '^\d{9}$', '^[a-zA-Z]{2}\d{8}$']
    amount_list2 = []
    percentage_list2 = []

    for password_index, structure_regex in enumerate(structure_list_regex):
        count = 0
        for index, information in enumerate(informations):
            if re.match(structure_regex, str(information[1])):
                count += 1
        amount_list2.append(count)

    for amount in amount_list2:
        percentage_list2.append(float(amount / informations_sum))

    result2 = np.array(list(zip(rank_list, structure_list, amount_list2, percentage_list2)), order='C')
    df2 = pd.DataFrame(result2, columns=table1_header)
    print(df2)

    rank_list = list(range(1, 7))
    information_type_list = ['Birthdate', 'AccountName', 'Name', 'Email', 'IDNumber', 'CellPhone']
    amount_list2 = []
    percentage_list2 = []

    for index, information_type in enumerate(information_type_list):
        count = 0
        for information in informations:
            if information_type == 'Birthdate':
                if information[1].find(information[3][6:14]) != -1:
                    count += 1
            elif information_type == 'AccountName':
                if information[1].find(information[4]) != -1:
                    count += 1
            elif information_type == 'Name':
                # are you kidding?
                if information[1].find(information[4]) != -1:
                    count += 1
            elif information_type == 'Email':
                if information[1].find(information[0].split('@')[0]) != -1:
                    count += 1
            elif information_type == 'IDNumber':
                if information[4].find(information[1]) != -1:
                    count += 1
            elif information_type == 'CellPhone':
                if information[1].find(information[5]) != -1:
                    count += 1
        amount_list2.append(count)

    for amount in amount_list2:
        percentage_list2.append(float(amount / informations_sum))

    result = np.array(list(zip(rank_list, information_type_list, amount_list2, percentage_list2)), order='C')
    df1 = pd.DataFrame(result, columns=table1_header)
    print(df1)

    pass


read_data('../data/12306.txt')

輸出結果：

sum: 131653
  RANK    Password Amount             Percentage
0    1      123456    392  0.0029775242493524645
1    2     a123456    281  0.0021343987603776593
2    3     123456a    165  0.0012532946457733587
3    4     5201314    161  0.0012229117452697623
4    5      111111    157  0.0011925288447661656
5    6  woaini1314    136  0.0010330186171222835
6    7    qq123456     98  0.0007443810623381161
7    8      123123     98  0.0007443810623381161
8    9      000000     97  0.0007367853372122169
9   10    1qaz2wsx     93  0.0007064024367086204
  RANK Password Amount            Percentage
0    1       D7  10906   0.08283897822305607
1    2       D8   9458    0.0718403682407541
2    3       D6   9102   0.06913629009593401
3    4     L2D7   5073  0.038533113563686355
4    5     L3D6   4832  0.036702543808344666
5    6     L1D7   4778   0.03629237465154611
6    7     L2D6   4275   0.03247172491321884
7    8     L3D7   3885  0.029509392114118176
8    9       D9   3594  0.027299036102481522
9   10     L2D8   3371  0.025605189399406016
  RANK     Password Amount             Percentage
0    1    Birthdate   5726     0.0434931220708985
1    2  AccountName   2565   0.019483034947931303
2    3         Name   2565   0.019483034947931303
3    4        Email   3979   0.030223390275952694
4    5     IDNumber   6835    0.05191678123552065
5    6    CellPhone     89  0.0006760195362050238

Process finished with exit code 0

備註：

1實驗數據自己去百度網盤找下載的，大概14M的txt，共131653條數據（有的版本可能會上下差一些條數，但基本上差不多）

2只重現了第三個表格，前面兩個基本上數據差不多，第三個，我感覺不太理解，也不知道作者具體怎麼實現的（匹配細節不知道），後面的我懶得去編寫輸出了

substring← get_all_substring(pwd) 
reverse_length_sort(substring)
for eachstring ∈ substring do 
if len(eachstring) ≥ 2 then 
if matchbd(eachstring,infolist) then

這裏按照作者代碼的理解思路是，獲取密碼的全部長度大於等於2的子串，然後去和身份證信息，電話號碼等匹配，我就呵呵了

3論文的內容看看就好了，個人感覺過程有點水，結論得到地太草率（本人愚鈍之見，不要太在意）

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

復現Personal Information in Passwords and Its Security Implications的部分實驗

論文：

實驗重現：

輸出結果：

備註：

DAPPER 事務 TRANSACTION

Vue.js iView Page分頁組件之真分頁

R語言環境下，使用tspmeta的concorde方法出現：Can not find executables for concorde or linkern.

drf-yasg 靜態資源static404問題解決

復現Personal Information in Passwords and Its Security Implications的部分實驗

python:對一個數組隨機切片成幾個數組

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結