目錄
前言:
今天是入職第一天,被項目組長分配工作:寫一個Cookie池接口
Cookie池要求:
1、要有一個保存cookie的接口,保存文檔不限制,存入mysql,txt文本,redis都行
2、要有一個提取cooike的接口
3、要給cookie加一個“有效時間”的屬性,並且Cookie池可以清除掉超時的cookie
4、要留出足夠的擴展空間,方便後面的拓展開發
9點到公司報道,弄清後10點多了,到12點一直在構思怎麼簡潔快速的完成任務,最後決定使用mysql數據庫作爲存儲,下午開始寫代碼,於是乎有了第一個版本,如下
第一天修改
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import redis,time,json,pymysql
# from pymysql
class CookDea:
'''
初始化數據庫連接
:param host:數據庫IP地址
:param port:數據庫端口
:param database:所使用數據庫
:param user:連接用戶名
:param password:連接密碼
'''
def __init__(self, host, port, database, user, password, charset="utf8"):
# 連接數據庫
self.db = pymysql.connect(host=host, port=port, database=database, user=user, password=password, charset=charset,)
self.cursor = self.db.cursor()# 創建遊標對象
# self.table_name = table_name
'''
根據table_name和data在數據庫建立相應的表
'''
def create_table(self, table_name, data):
data = data.copy()
try:
data.pop("cookies")
except:
pass
sql_add="".join('{} varchar(2000) DEFAULT NULL,'.format(k) for k in data.keys())
self.cursor.execute('''CREATE TABLE IF Not EXISTS `{}` (
`id` int(10) NOT NULL AUTO_INCREMENT, {}
`cookies` varchar(2000) DEFAULT NULL,
`beg_time` int(20) DEFAULT NULL,
`end_time` int(20) DEFAULT NULL,
PRIMARY KEY (`id`)
) DEFAULT CHARSET=utf8;'''.format(table_name,sql_add))
'''
保存cookies信息到數據庫
:param table_name:表名
:param data:cookies數據,格式如:{"province":"浙江", "tax": "1", "cookies":"{"user":"123"}" }
:param alive_time:cookies有效時間(單位-秒)
注意:如果未創建數據表,請使用create_table(self, table_name, data)創建數據表
'''
def save(self, table_name, data, alive_time=3600):
beg_time = int(time.time())
data["beg_time"]=beg_time
data["end_time"]=beg_time+alive_time
self.__insert(table_name, data)
result = self.__update(table_name, data)
if result==0:
result = self.__insert(table_name, data)
return result
'''
獲取相應條件下的隨機一條cookies信息
:param table_name:表名
:param data:格式爲{"province":"浙江","tax": "1"},表示查詢province爲浙江且tax爲1的一條數據
'''
def get_one(self, table_name, data):
return self.__select( table_name, data)
'''
數據庫插入操作
:param table_name:表名
:param data:插入數據
'''
def __insert(self, table_name, data):
sql = "insert into {}(%s) values(%s)".format(table_name)
res_sql = sql % (", ".join('`{}`'.format(k) for k in data.keys()), ', '.join('%({})s'.format(k) for k in data.keys()))
self.cursor.execute(res_sql, data) # 將字典data傳入
self.db.commit()
'''
數據庫更新操作
:param table_name:表名
:param data:更新數據
'''
def __update(self, table_name, data):
print("此處待修改~~~~~~~")
return 0
sql_add = " AND ".join("`" + key + "`='" + value + "'" for key, value in data.items())
sql = "UPDATE {} SET {} WHERE cookies={};".format(table_name, sql_add, data["cookies"])
print("sql:", sql)
self.cursor.execute(sql) # 將字典data傳入
self.db.commit()
'''
數據庫查詢操作
:param table_name:表名
:param data:查詢條件,格式爲{"province":"浙江","tax": "1"},表示查詢province爲浙江且tax爲1的一條數據
'''
def __select(self, table_name, data):
try:
sql_add = " AND ".join("`" + key + "`='" + value + "'"for key, value in data.items())
self.cursor.execute("SELECT * FROM {} WHERE {} ORDER BY RAND() LIMIT 1;".format(table_name,sql_add))
res = self.cursor.fetchone()
# now_time = int(time.time())
return res
except:
return None
'''
刪除相應條件下的cookies
:param table_name:表名
:param data:格式爲{"province":"浙江","tax": "1"},表示清除province爲浙江且tax爲1的cookies數據
:param orer_time:是否只清除超時的數據?1:是 0:否
'''
def clear(self, table_name, data, orer_time=1):
sql_add = " AND ".join("`" + key + "`='" + value + "'" for key, value in data.items())
sql = "DELETE FROM {} WHERE {} AND `end_time`>{};".format(table_name, sql_add, int(time.time())) if orer_time==1 \
else "DELETE FROM {} WHERE {} ;".format(table_name, sql_add)
print(sql)
result = self.cursor.execute(sql)
self.db.commit()
return result
if __name__ == '__main__':
url = "https://www.cnblogs.com"
cookies = {'_ga': 'GA1.2.368799281.1551860816', '__gads': 'ID',
'Hm_lvt_159f5d3f9ae2bd304b8e99efa2e9ae34': '1551884072,1551937443',
'Hm_lpvt_159f5d3f9ae2bd304b8e99efa2e9ae34': '1551937443',
'sc_is_visitor_unique': 'rx11644382.1553693305.EBA7A35830044F4743585FA345B2D54D.1.1.1.1.1.1.1.1.1',}
data = {"province": "浙江3333333",
"tax": "1",
"cookies": json.dumps(cookies)
}
ck = CookDea("localhost",3306,"testx1","root","root")
ck.create_table("cookies_pool",data)
# ck.save("cookies_pool",data)
# data["province"] = "北京"
print(ck.save("cookies_pool",data))
ck.get_one("cookies_pool",{"province":"浙江","tax": "0",})
# print(ck.clear("cookies_pool",{"province":"北京2","tax": "1",},0))
其中create_table(self, table_name, data)可以根據data字典在數據庫創建相應的數據表,然後可以利用__insert(self, table_name, data)插入相應的data字典數據,可以說非常靈活。(提示,如果看不懂代碼的SQL操作,可以看看我之前的博客:Python中pymysql通過字典插入數據)
“你怎麼保證cookie數據的唯一性?所以你可以加個cookieID作爲cookie數據標識,也可以對重要的屬性進行哈希處理作爲唯一主鍵”,這是大佬和我說的話,其中還有一些PEP8的格式問題就不一一說了,於是乎,第二天開始了新的修改,代碼如下:
第二天修改
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import time,json,os
import hashlib
try:
import pymysql
except:
os.system("pip install pymysql")
class CookieClass:
def __init__(self,province,tax,cookies):
self.province = province
self.tax = str(tax)
self.cookies = str(cookies)#(cookies爲必有屬性-不可刪)
# 可將唯一對應屬性進行md5處理,__md5_deal()可接收多個參數(md5爲必有屬性-不可刪)
self.md5 = self.__md5_deal(tax)
'''
對接受的參數進行md5處理,並返回處理值
:return:返回md5處理值
'''
def __md5_deal(self, *args):
txt = "".join([str(i) for i in args])
md = hashlib.md5()
md.update(txt.encode())
return md.hexdigest()
class CookDea:
'''
初始化數據庫連接
:param host:數據庫IP地址
:param port:數據庫端口
:param database:所使用數據庫
:param user:連接用戶名
:param password:連接密碼
'''
def __init__(self, host, port, database, user, password, charset="utf8"):
# 連接數據庫
self.db = pymysql.connect(host=host, port=port, database=database, user=user, password=password, charset=charset,)
self.cursor = self.db.cursor()# 創建遊標對象
# self.table_name = table_name
'''
根據table_name和data在數據庫建立相應的表
:return:返回sql結果值
'''
def create_table(self, table_name, cookie_class):
data = cookie_class.__dict__
data = data.copy()
try:
data.pop("cookies")
data.pop("md5")
except:
pass
sql_add="".join('`{}` varchar(2000) DEFAULT NULL,'.format(k) for k in data.keys())
result = self.cursor.execute('''CREATE TABLE IF NOT EXISTS `{}` (
`md5` varchar(200) NOT NULL, {}
`cookies` varchar(2000) DEFAULT NULL,
`beg_time` int(20) DEFAULT NULL,
`end_time` int(20) DEFAULT NULL,
PRIMARY KEY (`md5`)
) DEFAULT CHARSET=utf8;'''.format(table_name,sql_add))
return result
'''
保存cookies信息到數據庫
:param table_name:表名
:param data:cookies數據,格式如:{"province":"浙江", "tax": "1234567", "cookies":"{"user":"123"}" }
:param alive_time:cookies有效時間(單位-秒)
注意:如果未創建數據表,請使用create_table(self, table_name, data)創建數據表
:return:返回sql保存結果值
'''
def save(self, table_name, cookie_class, alive_time=3600):
data = cookie_class.__dict__
beg_time = int(time.time())
data["beg_time"]=beg_time
data["end_time"]=beg_time+alive_time
result = self.__update(table_name, data)
if result==0:
result = self.__insert(table_name, data)
return result
'''
獲取相應條件下的隨機一條cookies信息
:param table_name:表名
:param tax:稅號
:return:返回查詢結果
'''
def get_by_tax(self, table_name, tax):
self.cursor.execute("SELECT * FROM {} WHERE `tax`='{}' ".format(table_name, tax))
res = self.cursor.fetchone()
return res
'''
獲取相應條件下的隨機一條cookies信息
:param table_name:表名
:param province:省份
:return:返回查詢結果
'''
def get_by_province(self, table_name, province):
self.cursor.execute("SELECT * FROM {} WHERE `province`='{}' ".format(table_name, province))
res = self.cursor.fetchone()
return res
'''
獲取相應條件下的隨機一條cookies信息
:param table_name:表名
:param *args:對應CookieClass類中__md5_deal()處理的參數
:return:返回查詢結果
'''
def get_by_md5(self, table_name, *args):
txt = "".join([str(i) for i in args])
md = hashlib.md5()
md.update(txt.encode())
md5_vlues = md.hexdigest()
self.cursor.execute("SELECT * FROM {} WHERE `md5`='{}' ".format(table_name, md5_vlues))
res = self.cursor.fetchone()
return res
'''
獲取相應條件下的隨機一條cookies信息
:param table_name:表名
:param data:格式爲{"province":"浙江","tax": "12345"},表示查詢province爲浙江且tax爲12345的一條數據
:return:返回查詢結果
'''
def get_by_other(self, table_name, data):
return self.__select(table_name, data)
'''
刪除數據庫table_name表中過時的cookies數據
:param table_name:表名
:return:返回刪除數量
'''
def clear(self, table_name):
sql = "DELETE FROM {} WHERE `end_time`<{};".format(table_name, int(time.time()))
result = self.cursor.execute(sql)
self.db.commit()
return result
'''
獲取數據庫table_name表中cookies數據總數量
:param table_name:表名
:return:返回查詢到的總數量
'''
def get_num(self, table_name):
sql = "SELECT COUNT(*) FROM {};".format(table_name)
self.cursor.execute(sql)
result = 0
try:
result = self.cursor.fetchone()[0]
except:
pass
return result
'''
刪除相應條件下的cookies
:param table_name:表名
:param data:格式爲{"province":"浙江","tax": "1"},表示清除province爲浙江且tax爲1的cookies數據
:param orer_time:是否只清除超時的數據?1:是 0:否
:return:返回刪除數量
'''
def clear_other(self, table_name, data, over_time=1):
sql_add = " AND ".join("`" + key + "`='" + value + "'" for key, value in data.items())
sql = "DELETE FROM {} WHERE {} AND `end_time`<{};".format(table_name, sql_add, int(time.time())) if over_time==1 \
else "DELETE FROM {} WHERE {} ;".format(table_name, sql_add)
print(sql)
result = self.cursor.execute(sql)
self.db.commit()
return result
'''
數據庫插入操作
:param table_name:表名
:param data:插入數據
:return:返回sql執行結果
'''
def __insert(self, table_name, data):
try:
sql = "insert into {}(%s) values(%s)".format(table_name)
res_sql = sql % (", ".join('`{}`'.format(k) for k in data.keys()), ', '.join('%({})s'.format(k) for k in data.keys()))
result = self.cursor.execute(res_sql,data)
print(res_sql%(data))
self.db.commit()
return result
except:
return 0
'''
數據庫更新操作
:param table_name:表名
:param data:更新數據
:return:返回sql執行結果
'''
def __update(self, table_name, data):
sql_add = " , ".join("`" + key + "`=%(" + key + ")s" for key in data.keys())
sql = "UPDATE {} SET {} WHERE `md5`='{}';".format(table_name, sql_add, data["md5"])
result = self.cursor.execute(sql,data) # 將字典data傳入
self.db.commit()
return result
'''
數據庫查詢操作
:param table_name:表名
:param data:查詢條件,格式爲{"province":"浙江","tax": "1"},表示查詢province爲浙江且tax爲1的一條數據
:return:返回sql執行結果
'''
def __select(self, table_name, data):
try:
sql_add = " AND ".join("`" + key + "`='" + value + "'"for key, value in data.items())
self.cursor.execute("SELECT * FROM {} WHERE {} ORDER BY RAND() LIMIT 1;".format(table_name,sql_add))
res = self.cursor.fetchone()
return res
except:
return None
if __name__ == '__main__':
url = "https://www.cnblogs.com"
cookies = {'_ga': 'GA1.2.368799281.1551860816', '__gads': 'ID',
'Hm_lvt_159f5d3f9ae2bd304b8e99efa2e9ae34': '1551884072,155193744113',
'Hm_lpvt_159f5d3f9ae2bd304b8e99efa2e9ae34': '1551937443',
'sc_is_visitor_unique': 'rx11644382.1553693305.EBA7A35830044F4743585FA345B2D54D.1.1.1.1.1.1.1.1.1',}
sTax = "1234567896"
table_name = "cookies_pool2"
# 創建對象
ck = CookDea("localhost",3306,"testx1","root","root")
# 創建Cookies信息保存對象
# j = CookieClass("北京4", "123456789013", json.dumps(cookies))
j = CookieClass("北京6", sTax, cookies)
# 在數據庫中生成CookieClass數據表CookDea4.py
print("生成CookieClass數據表:", ck.create_table(table_name,j))
# 插入Cookies對象信息
print("插入:",ck.save(table_name, j))
# 查詢Cookies對象信息
print("\n查詢:",ck.get_by_tax(table_name, sTax))
# 查詢Cookies對象信息
print("\n查詢get_by_province:",ck.get_by_province(table_name, "北京6"))
# 查詢Cookies對象信息
print("\nget_by_other1:",ck.get_by_other(table_name, {"tax":sTax}))
# 查詢Cookies對象信息
print("\nget_by_other2:",ck.get_by_other(table_name, {"province":"北京1"}))
# 查詢Cookies對象信息
print("\nget_by_md5:",ck.get_by_md5(table_name, sTax))
# 刪除數據庫table_name表中過時的cookies數據
print("\nclear:",ck.clear(table_name))
# 獲取數據庫table_name表中cookies數據總數量
print("\nget_num:",ck.get_num(table_name))
代碼中加了CookieClass類作爲Cookie數據標識,並且優化了部代碼,找大佬看過後,以下後面的部分對話:
(省略掉一些其他的細節,比如:cookie數據不應該封裝在CookieClass類中CookieClass應該只作爲Cookie的識別;我應該寫單元測試代碼(爲了方便閱讀,測試代碼就加在最後面了,雖然只有最終代碼的測試);給的接口應該儘可能的少(需要時可以擴展,但一定要保證原來的接口可用(所以這是一個難點));代碼規範問題等等)
(大佬)“多線程\多進程時能保證數據的準確性嗎?會不會產生錯亂?”
(我)思考了一下,答:“由於使用的是mysql,事務性數據庫,所以不會像存入txt文本那樣,多線程時會出現數據錯亂”
(大佬)“但是爲了以後的擴展,比如要求Cookie數據存入txt中,所以你設計的接口不應該太依賴底層的東西,你覺得怎麼設計?”
(我)再次思考了一下,答:“加鎖”
於是乎,開始了第三次修改-加讀寫鎖(參考文章:描述python實現讀寫鎖rwlock及場景+python readwritelock讀寫鎖的實現+python unittest之異常測試)
第三天修改
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import time, os, copy, hashlib, threading
try:
import pymysql
except:
os.system("pip install pymysql")
'''
Cookie對象
作爲cookies數據的標識,其中md5爲唯一主鍵
'''
class CookieID:
def __init__(self, s_province, s_tax):
if not (isinstance(s_province, str) and isinstance(s_tax, str)):
raise TypeError('CookieID()對象初始化參數類型錯誤!')
self.province = s_province
self.tax = s_tax
# 可將唯一對應屬性進行md5處理,__md5_deal()可接收多個參數(md5爲必有屬性-不可刪)
self.md5 = self.__md5_deal(s_tax)
'''
對接受的參數進行md5處理,並返回處理值
:return:返回md5處理值
'''
def __md5_deal(self, *args):
s_txt = "".join([str(i) for i in args])
md = hashlib.md5()
md.update(s_txt.encode())
return md.hexdigest()
'''
Cookie池對象,包括cookie信息的存取等
使用數據庫:Mysql
'''
class CookiePool:
'''
初始化數據庫連接
:param s_host: 數據庫IP地址
:param n_port: 數據庫端口
:param s_database: 所使用數據庫
:param s_user: 連接用戶名
:param s_password: 連接密碼
:param s_table_name: CookiePool數據所存的數據表
:param cookie_id: CookieID()實例化對象,如CookieID(s_province="北京", s_tax="")
'''
def __init__(self, s_host, n_port, s_database, s_user, s_password, s_table_name, cookie_id, s_charset="utf8"):
if not (isinstance(s_host, str) and isinstance(n_port, int) and isinstance(s_database, str)
and isinstance(s_user, str) and isinstance(s_password, str) and isinstance(s_table_name, str)
and isinstance(s_charset, str)):
raise TypeError('CookiePool()對象初始化參數類型錯誤!')
self.s_host = s_host
self.n_port = n_port
self.s_database = s_database
self.s_user = s_user
self.s_password = s_password
self.s_table_name = s_table_name
self.s_charset = s_charset
# 連接數據庫
self.db = pymysql.connect(host=self.s_host, port=self.n_port, database=self.s_database,
user=self.s_user, password=self.s_password, charset=self.s_charset)
cursor_flag = self.db.cursor()
try:
data = copy.deepcopy(cookie_id.__dict__)
data.pop("md5")
except:
raise TypeError('CookiePool()對象初始化參數類型錯誤! -- cookie_id')
sql_add = "".join('`{}` varchar(2000) DEFAULT NULL,'.format(k) for k in data.keys())
cursor_flag.execute('''CREATE TABLE IF NOT EXISTS `{}` (
`md5` varchar(200) NOT NULL, {}
`cookies` varchar(2000) DEFAULT NULL,
`beg_time` int(20) DEFAULT NULL,
`end_time` int(20) DEFAULT NULL,
PRIMARY KEY (`md5`)
) DEFAULT CHARSET=utf8;'''.format(self.s_table_name, sql_add))
# 讀寫鎖配置:一個鎖對象,允許多個同時“讀鎖”,但是隻有一個“寫鎖”
self._lock = threading.Lock()
self._read_ready = threading.Condition(self._lock)
self._write_ready = threading.Condition(self._lock)
self._writers = 0
'''
保存cookies信息到數據庫
:param cookie_id: CookieID()實例化對象
:param cookies: cookies數據
:param alive_time: cookies有效時間(單位-秒)
:return: 返回sql保存結果值
'''
def save(self, cookie_id, s_cookies, n_alive_time=3600):
if not isinstance(n_alive_time, int):
raise TypeError('參數類型錯誤!')
try:
dic_data = copy.deepcopy(cookie_id.__dict__)
except:
raise TypeError('參數類型錯誤!')
self.__acquire_write()
n_beg_time = int(time.time())
dic_data["beg_time"], dic_data["end_time"], dic_data["cookies"] = n_beg_time, n_beg_time + n_alive_time, str(
s_cookies)
n_result = self.__update(dic_data)
if 0 == n_result:
n_result = self.__insert(dic_data)
self.__release_write()
return n_result
'''
獲取相應條件下的cookies信息
:param cookie_id: CookieID()實例化對象(作爲查詢條件),過濾掉值爲""的屬性條件,
如CookieID(s_province="北京", s_tax="")則查詢s_province=="北京"的cookies數據
:param get_sum: 查詢數量,如果get_sum<=0則查詢全部滿足條件的數據
:param filter_overtime: 是否過濾超時的cookies數據,默認1(即過濾)
:return: 返回查詢結果
'''
def get_by_cookieid(self, cookie_id, n_get_sum=1, filter_overtime=1):
if not (isinstance(n_get_sum, int) and isinstance(filter_overtime, int)):
raise TypeError('參數類型錯誤!')
try:
data = copy.deepcopy(cookie_id.__dict__)
data.pop("md5")
except:
raise TypeError('參數類型錯誤! -- cookie_id')
# 刪除字典中值爲空的鍵值對
for k in list(data.keys()):
if not data[k]:
del data[k]
# print("data:", data, "get_sum:", n_get_sum)
return self.__select(data, n_get_sum, filter_overtime)
'''
刪除數據庫table_name表中過時的cookies數據
:return: 返回刪除數量
'''
def clear_allovertime(self):
db = pymysql.connect(host=self.s_host, port=self.n_port, database=self.s_database,
user=self.s_user, password=self.s_password, charset=self.s_charset)
cursor = db.cursor()
sql = "DELETE FROM {} WHERE `end_time`<{};".format(self.s_table_name, int(time.time()))
result = cursor.execute(sql)
db.commit()
db.close()
return result
'''
獲取數據庫table_name表中cookies數據總數量
:param table_name:表名
:return:返回查詢到的總數量
'''
def get_num(self):
db = pymysql.connect(host=self.s_host, port=self.n_port, database=self.s_database,
user=self.s_user, password=self.s_password, charset=self.s_charset)
cursor = db.cursor()
sql = "SELECT COUNT(*) FROM {};".format(self.s_table_name)
cursor.execute(sql)
result = 0
try:
result = cursor.fetchone()[0]
except:
pass
db.close()
return result
'''
數據庫插入操作
:param table_name:表名
:param data:插入數據
:return:返回sql執行結果
'''
def __insert(self, data):
try:
cursor = self.db.cursor()
sql = "insert into {}(%s) values(%s)".format(self.s_table_name)
res_sql = sql % (", ".join('`{}`'.format(k) for k in data.keys()),
', '.join('%({})s'.format(k) for k in data.keys()))
result = cursor.execute(res_sql, data)
self.db.commit()
return result
except:
return 0
'''
數據庫更新操作
:param table_name:表名
:param data:更新數據
:return:返回sql執行結果
'''
def __update(self, data):
cursor = self.db.cursor()
sql_add = " , ".join("`" + key + "`=%(" + key + ")s" for key in data.keys())
sql = "UPDATE {} SET {} WHERE `md5`='{}';".format(self.s_table_name, sql_add, data["md5"])
result = cursor.execute(sql, data) # 將字典data傳入
self.db.commit()
return result
'''
數據庫查詢操作
:param table_name:表名
:param data:查詢條件,格式爲{"province":"浙江","tax": "1"},表示查詢province爲浙江且tax爲1的一條數據
:param get_sum: 查詢數量限制
:param filter_overtime: 是否過濾超時的cookies數據,默認1(即過濾)
:return:返回sql執行結果
'''
def __select(self, data, get_sum=1, filter_overtime=1):
now_time = int(time.time())
db = pymysql.connect(host=self.s_host, port=self.n_port, database=self.s_database,
user=self.s_user, password=self.s_password, charset=self.s_charset)
cursor = db.cursor()
sql_add = " AND ".join("`" + key + "`='" + value + "'" for key, value in data.items())
sql = "SELECT * FROM {} WHERE {} "
sql = sql + "AND `end_time` > {} ".format(now_time) \
if 1 == filter_overtime else sql
sql = str(sql + "ORDER BY RAND() LIMIT {};").format(self.s_table_name, sql_add, get_sum) \
if get_sum > 0 else sql.format(self.s_table_name, sql_add)
cursor.execute(sql)
res = cursor.fetchall()
db.close()
return res
""" 獲取一個讀鎖。”"""
def __acquire_read(self):
with self._read_ready:
if self._writers > 0:
self._read_ready.wait()
""" 獲得一個寫鎖。直到沒有阻塞獲取寫入鎖。"""
def __acquire_write(self):
with self._write_ready:
self._writers += 1
if self._writers > 1:
self._write_ready.wait()
""" 釋放寫鎖。 """
def __release_write(self):
with self._write_ready:
self._writers -= 1
if self._writers < 0:
self._writers = 0
if not self._writers:
self._read_ready.notifyAll()
self._write_ready.notify()
if __name__ == '__main__':
cookies = {'_ga': 'GA1.2.368799281.1551860816', '__gads': 'ID',
'Hm_lvt_159f5d3f9ae2bd304b8e99efa2e9ae34': '1551884072,155193744113',
'Hm_lpvt_159f5d3f9ae2bd304b8e99efa2e9ae34': '1551937443',
'sc_is_visitor_unique': 'rx11644382.1553693305.EBA7A35830044F4743585FA345B2D54D.1.1.1.1.1.1.1.1.1', }
sTax = "123456789612"
# 創建Cookies信息保存對象
j = CookieID(s_province="bj", s_tax=sTax)
# 創建對象
ck = CookiePool("localhost", 3306, "testx1", "root", "root", "cookies_pool4", j)
# 插入Cookies對象信息
print("插入:", ck.save(j, cookies, n_alive_time=3))
# 查詢Cookies對象信息
print("\nget_by_cookieid:", ck.get_by_cookieid(CookieID(s_province="bj", s_tax=""), 0))
# 刪除數據庫table_name表中過時的cookies數據
print("\nclear_allovertime:", ck.clear_allovertime())
# 獲取數據庫table_name表中cookies數據總數量
print("\nget_num:", ck.get_num())
附上多線程測試代碼:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from CookiePool import CookieID, CookiePool
import requests, json, time, random, threading
import queue
# 設置用戶代理池
UPPOOL = [
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393"
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6',
'Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
]
class MyThread(threading.Thread):
def __init__(self, func):
threading.Thread.__init__(self)
self.func = func
def run(self):
self.func()
def worker():
while not q.empty():
flag = q.get()
if flag == 0:
print("get:" , len(ck.get_by_cookieid(CookieID(s_province="bj", s_tax=""), 0)))
else:
print(ck.save(flag[0], flag[1], flag[2]))
def main():
threads = []
for i in range(100000):
q.put([CookieID(s_province="bj", s_tax=str(i)), random.choices(UPPOOL), 10])
q.put(0)
q.put(0)
for i in range(threadNum): # 開啓10個線程
thread = MyThread(worker)
thread.start()
threads.append(thread)
for thread in threads:
thread.join()
if __name__ == '__main__':
# 創建對象
ck = CookiePool("localhost", 3306, "testx1", "root", "root", "cookies_pool4", CookieID(s_province="test", s_tax=""))
q = queue.Queue()
threadNum = 100
main()
改了很多東西,包括讀寫鎖,一些細節的優化,接口的縮減等等(原來的代碼文件標識是4,這次直接到7.3了)
但是被看過之後,還是有一些地方需要優化,如;參數類型規範檢測(建議封裝),各種異常情況檢測,拋出異常時應該儘可能給出運行環境信息,數據庫連接可以進行封裝,字段命名要求(按公司要求文檔來)等等
於是繼續努力,在第四天中午前終於把接口0.1.0版本寫好了(因爲是新人新入公司,很多地方不熟悉,所以有些慢,哈),代碼如下:(參考文章:Python:函數參數類型檢查+Python裝飾器實例:調用參數合法性驗證)
第四天修改
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import time, os, copy, hashlib, threading
try:
import pymysql
except:
os.system("pip install pymysql")
from inspect import signature
from functools import wraps
def type_check(*type_args, **type_kwargs):
"""
利用裝飾器,對方法參數進行合法性檢測
:return : 檢測不通過則拋出異常
"""
def decorate(func):
sig = signature(func)
bound_types = sig.bind_partial(*type_args, **type_kwargs).arguments
# print("bound_types:",bound_types)
@wraps(func)
def wrapper(*args, **kwargs):
bound_values = sig.bind(*args, **kwargs)
for name, value in bound_values.arguments.items():
if name in bound_types:
if not isinstance(value, bound_types[name]):
raise TypeError('參數類型錯誤! 參數 {} 必須爲: {}'.format(name, bound_types[name]))
return func(*args, **kwargs)
return wrapper
return decorate
class CookieID:
"""
Cookie對象
作爲cookies數據的標識,其中md5爲唯一主鍵
"""
@type_check(s_province=str, s_tax=str)
def __init__(self, s_province, s_tax):
if s_province == "" and s_tax == "":
raise TypeError('參數類型錯誤! 參數 s_province 和 s_tax 不能同時爲空')
self.province = s_province
self.tax = s_tax
# 可將唯一對應屬性進行md5處理,__md5_deal()可接收多個參數(md5爲必有屬性-不可刪)
self.md5 = self.__md5_deal(s_tax)
def __md5_deal(self, *args):
"""
對接受的參數進行md5處理,並返回處理值
:return:返回md5處理值
"""
s_txt = "".join([str(i) for i in args])
md5_o_md = hashlib.md5()
md5_o_md.update(s_txt.encode())
return md5_o_md.hexdigest()
class CookiePool:
"""
Cookie池對象,包括cookie信息的存取等
使用數據庫:Mysql
"""
@type_check(s_host=str, n_port=int, s_database=str, s_user=str, s_password=str, s_table_name=str, s_charset=str)
def __init__(self, s_host, n_port, s_database, s_user, s_password, s_table_name, cookie_id, s_charset="utf8"):
"""
初始化數據庫連接
:param s_host: 數據庫IP地址
:param n_port: 數據庫端口
:param s_database: 所使用數據庫
:param s_user: 連接用戶名
:param s_password: 連接密碼
:param s_table_name: CookiePool數據所存的數據表
:param cookie_id: CookieID()實例化對象,如CookieID(s_province="北京", s_tax="")
"""
self.s_host = s_host
self.n_port = n_port
self.s_database = s_database
self.s_user = s_user
self.s_password = s_password
self.s_table_name = s_table_name
self.s_charset = s_charset
# 連接數據庫
self.db = self.__get_conn()
cursor_flag = self.db.cursor()
try:
data = copy.deepcopy(cookie_id.__dict__)
data.pop("md5")
except:
raise TypeError('CookiePool()對象初始化參數類型錯誤! -- cookie_id參數異常:',cookie_id)
sql_add = "".join('`{}` varchar(2000) DEFAULT NULL,'.format(k) for k in data.keys())
try:
cursor_flag.execute('''CREATE TABLE IF NOT EXISTS `{}` (
`md5` varchar(200) NOT NULL, {}
`cookies` varchar(2000) DEFAULT NULL,
`beg_time` int(20) DEFAULT NULL,
`end_time` int(20) DEFAULT NULL,
PRIMARY KEY (`md5`)
) DEFAULT CHARSET=utf8;'''.format(self.s_table_name, sql_add))
except:
self.db.close()
# 讀寫鎖配置:一個鎖對象,允許多個同時“讀鎖”,但是隻有一個“寫鎖”
self._lock = threading.Lock()
self._read_ready = threading.Condition(self._lock)
self._write_ready = threading.Condition(self._lock)
self._writers = 0
@type_check(n_alive_time=int)
def save(self, cookie_id, s_cookies, n_alive_time=3600):
""""
保存cookies信息到數據庫
:param cookie_id: CookieID()實例化對象
:param s_cookies: cookies數據
:param n_alive_time: cookies有效時間(單位-秒)
:return: 返回sql保存結果值
"""
try:
dic_data = copy.deepcopy(cookie_id.__dict__)
except:
raise TypeError('參數類型錯誤!cookie_id應該爲CookieID實例化對象.',cookie_id)
try:
self.__acquire_write()
n_beg_time = int(time.time())
dic_data["beg_time"], dic_data["end_time"], dic_data["cookies"] = n_beg_time, n_beg_time + n_alive_time, str(
s_cookies)
n_result = self.__update(dic_data)
if 0 == n_result:
n_result = self.__insert(dic_data)
else:
pass
except Exception as e:
raise Exception("數據插入異常!cookie_id:{},s_cookies:{},n_alive_time:{},\nerror:{}".format(
cookie_id, s_cookies, n_alive_time, e))
finally:
self.__release_write()
return n_result
@type_check(n_get_sum=int, filter_overtime=int)
def get_by_cookieid(self, cookie_id, n_get_sum=1, filter_overtime=1):
"""
獲取相應條件下的cookies信息
:param cookie_id: CookieID()實例化對象(作爲查詢條件),過濾掉值爲""的屬性條件,
如CookieID(s_province="北京", s_tax="")則查詢s_province=="北京"的cookies數據
:param n_get_sum: 查詢數量,如果get_sum<=0則查詢全部滿足條件的數據
:param filter_overtime: 是否過濾超時的cookies數據,默認1(即過濾)
:return: 返回查詢結果
"""
try:
data = copy.deepcopy(cookie_id.__dict__)
data.pop("md5")
except:
raise TypeError('參數類型錯誤! -- cookie_id')
self.__acquire_read()
# 刪除字典中值爲空的鍵值對
for k in list(data.keys()):
if not data[k]:
del data[k]
return self.__select(data, n_get_sum, filter_overtime)
def clear_allovertime(self):
"""
刪除數據庫table_name表中過時的cookies數據
:return: 返回刪除數量
"""
self.__acquire_read()
db = self.__get_conn()
sql = ""
try:
cursor = db.cursor()
sql = "DELETE FROM {} WHERE `end_time`<{};".format(self.s_table_name, int(time.time()))
result = cursor.execute(sql)
db.commit()
except Exception as e:
raise Exception("數據插入異常!sql:{},\nerror:{}".format(sql, e))
return result
def get_num(self):
"""
獲取數據庫table_name表中cookies數據總數量
:return:返回查詢到的總數量
"""
self.__acquire_read()
db = self.__get_conn()
cursor = db.cursor()
sql = "SELECT COUNT(*) FROM {};".format(self.s_table_name)
cursor.execute(sql)
result = 0
try:
result = cursor.fetchone()[0]
except:
pass
db.close()
return result
def __insert(self, data):
"""
數據庫插入操作
:param data:插入數據
:return:返回sql執行結果
"""
sql = ""
try:
cursor = self.db.cursor()
sql = "insert into {}(%s) values(%s)".format(self.s_table_name)
sql = sql % (", ".join('`{}`'.format(k) for k in data.keys()),
', '.join('%({})s'.format(k) for k in data.keys()))
result = cursor.execute(sql, data)
self.db.commit()
return result
except Exception as e:
raise Exception("數據插入異常!data:{},\nsql:{},\nerror:{}".format(
data, sql, e))
def __update(self, data):
"""
數據庫更新操作
:param data:更新數據
:return:返回sql執行結果
"""
sql = ""
try:
cursor = self.db.cursor()
sql_add = " , ".join("`" + key + "`=%(" + key + ")s" for key in data.keys())
sql = "UPDATE {} SET {} WHERE `md5`='{}';".format(self.s_table_name, sql_add, data["md5"])
result = cursor.execute(sql, data) # 將字典data傳入
self.db.commit()
except Exception as e:
raise Exception("數據插入異常!data:{},\nsql:{},\nerror:{}".format(
data, sql, e))
return result
def __select(self, data, get_sum=1, filter_overtime=1):
"""
數據庫查詢操作
:param data:查詢條件,格式爲{"province":"浙江","tax": "1"},表示查詢province爲浙江且tax爲1的一條數據
:param get_sum: 查詢數量限制
:param filter_overtime: 是否過濾超時的cookies數據,默認1(即過濾)
:return:返回sql執行結果
"""
now_time = int(time.time())
db = self.__get_conn()
sql = ""
try:
cursor = db.cursor()
sql_add = " AND ".join("`" + key + "`='" + value + "'" for key, value in data.items())
sql = "SELECT * FROM {} WHERE {} "
sql = sql + "AND `end_time` > {} ".format(now_time) \
if 1 == filter_overtime else sql
sql = str(sql + "ORDER BY RAND() LIMIT {};").format(self.s_table_name, sql_add, get_sum) \
if get_sum > 0 else sql.format(self.s_table_name, sql_add)
cursor.execute(sql)
res = cursor.fetchall()
except Exception as e:
raise Exception("數據插入異常!data:{},\nsql:{},\nerror:{}".format(
data, sql, e))
finally:
db.close()
return res
def __get_conn(self):
"""
連接數據庫的方法,當連接數量太多導致異常時,進行一定次數內的重試
:return: 返回數據庫連接
"""
error_tmc = 1000
while True:
try:
db = pymysql.connect(host=self.s_host, port=self.n_port, database=self.s_database,
user=self.s_user, password=self.s_password, charset=self.s_charset)
return db
except Exception as e:
if "Too many connections" in str(e) and error_tmc > 0:
time.sleep(0.01)
error_tmc -= 1
else:
raise Exception('數據庫連接異常! connect:{},\nerror:{}'.format(self.__dict__, e))
def __acquire_read(self):
""" 獲取一個讀鎖。”"""
with self._read_ready:
if self._writers > 0:
self._read_ready.wait()
def __acquire_write(self):
""" 獲得一個寫鎖。直到沒有阻塞獲取寫入鎖。"""
with self._write_ready:
self._writers += 1
if self._writers > 1:
self._write_ready.wait()
def __release_write(self):
""" 釋放寫鎖。 """
with self._write_ready:
self._writers -= 1
if self._writers < 0:
self._writers = 0
if not self._writers:
self._read_ready.notifyAll()
self._write_ready.notify()
if __name__ == '__main__':
cookies = {'_ga': 'GA1.2.368799281.1551860816', '__gads': 'ID',
'Hm_lvt_159f5d3f9ae2bd304b8e99efa2e9ae34': '1551884072,155193744113',}
sTax = "123456789612"
# 創建CookieID對象
j = CookieID(s_province="bj", s_tax=sTax)
# print("-------","CookieID" in str(j.__class__))
# 創建CookiePool對象
ck = CookiePool("localhost", 3306, "testx1", "root", "root", "cookies_pool4", j)
# 插入Cookies對象信息(參數:j-CookieID對象; cookies-cookies信息; n_alive_time-有效時間)
print("插入Cookies對象信息:", ck.save(j, cookies, n_alive_time=3))
# 查詢Cookies對象信息(參數:j-CookieID對象; 1-查詢數量; 0-不過濾無效cookies)
print("\n查詢Cookies對象信息:", ck.get_by_cookieid(CookieID(s_province="bj", s_tax=""), 1, 0))
# 刪除數據庫中過時的cookies數據
print("\n刪除數據庫中過時的cookies數據:", ck.clear_allovertime())
# 獲取數據庫中cookies數據總數量
print("\n獲取數據庫中cookies數據總數量:", ck.get_num())
修改的地方有:修飾器作爲參數檢測、加入 __get_conn(self)返回數據庫連接、類的屬性保護等等,最後的0.1.1版本就不上傳了雖然問題不大,但公司也有保密要求,0.1.1修改的不多,整體思路和0.1.0一樣,最後附上測試代碼:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from .CookiePool.CookiePool import * # 導入被測試模塊中的函數
import unittest # 測試代碼需要使用的模塊
class TestMathFunc(unittest.TestCase): # 繼承unittest.TestCase
url = "https://www.cnblogs.com"
cookies = {'_ga': 'GA1.2.368799281.1551860816', '__gads': 'ID',
'Hm_lvt_159f5d3f9ae2bd304b8e99efa2e9ae34': '1551884072,155193744113',
'Hm_lpvt_159f5d3f9ae2bd304b8e99efa2e9ae34': '1551937443',
'sc_is_visitor_unique': 'rx11644382.1553693305.EBA7A35830044F4743585FA345B2D54D.1.1.1.1.1.1.1.1.1'}
sTax = "123456781"
j = CookieID(s_province="bj", s_tax=sTax) # 創建Cookies信息保存對象
ck = CookiePool("localhost", 3306, "testx1", "root", "root", "cookies_pool4", j) # 創建對象
"""Test CookieIDy"""
def test_CookieID1(self): # 只有以test 開頭的函數,才被認爲是測試用例
print("test:CookieID()-1")
print(CookieID(s_province="bj", s_tax="123456781"))
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", CookieID, "bj", 123123)
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", CookieID, 123, 123123)
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", CookieID, "", "")
# -----------------------------------------CookiePool----------------------------------------------------
"""Test CookiePool"""
def test_CookiePool1(self): # 只有以test 開頭的函數,才被認爲是測試用例
print("test:CookiePool()-2")
print(CookiePool("localhost", 3306, "testx1", "root", "root", "cookies_pool4", self.j))
print('測試 :CookiePool, 3306, 3306, "testx1", "root", "root", "cookies_pool4", self.j')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", CookiePool, 3306, 3306, "testx1", "root", "root", "cookies_pool4", self.j)
print('測試 : CookiePool, "localhost", "123", "testx1", "root", "root", "cookies_pool4", self.j')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", CookiePool, "localhost", "123", "testx1", "root", "root", "cookies_pool4", self.j)
print('測試 : CookiePool, "localhost", 3306, 3306, "root", "root", "cookies_pool4", self.j')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", CookiePool, "localhost", 3306, 3306, "root", "root", "cookies_pool4", self.j)
print('測試 :CookiePool, "localhost", 3306, "testx1", 3306, "root", "cookies_pool4", self.j')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", CookiePool, "localhost", 3306, "testx1", 3306, "root", "cookies_pool4", self.j)
print('測試 :CookiePool, "localhost", 3306, "testx1", "root", 3306, "cookies_pool4", self.j')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", CookiePool, "localhost", 3306, "testx1", "root", 3306, "cookies_pool4", self.j)
print('測試 :CookiePool, "localhost", 3306, "testx1", "root", "root", 3306, self.j')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", CookiePool, "localhost", 3306, "testx1", "root", "root", 3306, self.j)
print('測試 :CookiePool, "localhost", 3306, "testx1", "root", "root", "cookies_pool4", 3306')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", CookiePool, "localhost", 3306, "testx1", "root", "root", "cookies_pool4", 3306)
print('測試 :CookiePool, "localhost", 3306, "testx1", "root", "root", "cookies_pool4", "3306"')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", CookiePool, "localhost", 3306, "testx1", "root", "root", "cookies_pool4", "3306")
print('測試 : CookiePool, "localhost1", 3306, "testx1", "root", "root", "cookies_pool4", self.j')
self.assertRaisesRegexp(
Exception, "數據庫連接異常", CookiePool, "localhost1", 3306, "testx1", "root", "root", "cookies_pool4", self.j)
print('測試 : CookiePool, "localhost", 3306, "3306", "root", "root", "cookies_pool4", self.j')
self.assertRaisesRegexp(
Exception, "數據庫連接異常", CookiePool, "localhost", 3306, "3306", "root", "root", "cookies_pool4", self.j)
print('測試 :CookiePool, "localhost", 3306, "testx1", "3306", "root", "cookies_pool4", self.j')
self.assertRaisesRegexp(
Exception, "數據庫連接異常", CookiePool, "localhost", 3306, "testx1", "3306", "root", "cookies_pool4", self.j)
print('測試 :CookiePool, "localhost", 3306, "testx1", "root", "3306", "cookies_pool4", self.j')
self.assertRaisesRegexp(
Exception, "數據庫連接異常", CookiePool, "localhost", 3306, "testx1", "root", "3306", "cookies_pool4", self.j)
"""Test save"""
def test_save1(self): # 只有以test 開頭的函數,才被認爲是測試用例
print("test:save()-1")
self.assertEqual(1,self.ck.save(CookieID(s_province="bj", s_tax="123456781"), self.cookies))
print("test:save()-2")
self.assertEqual(1,self.ck.save(CookieID(s_province="bj", s_tax="123456781"), self.cookies, n_alive_time=1))
# print("result:",self.ck.save(CookieID(s_province="bj", s_tax="123456782"), self.cookies, n_alive_time=1))
print("test:save()-3")
print('測試 : self.ck.save, "1234", self.cookies')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", self.ck.save, "1234", self.cookies)
print('測試 :self.ck.save, "1234", self.cookies, "1"')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", self.ck.save, "1234", self.cookies, "1")
"""Test get_by_cookieid.py"""
def test_get_by_cookieid1(self):
print("test:get_by_cookieid()-1")
print("result:",self.ck.get_by_cookieid(CookieID(s_province="bj", s_tax=""), 0))
print("test:get_by_cookieid()-2")
print("result:",self.ck.get_by_cookieid(CookieID(s_province="bj", s_tax=""), 1))
print("test:get_by_cookieid()-3")
print("result:",self.ck.get_by_cookieid(CookieID(s_province="bj", s_tax=""), 1))
print("test:get_by_cookieid()-4")
print('測試 :self.ck.get_by_cookieid, "123", 0')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", self.ck.get_by_cookieid, "123", 0)
print('測試 : self.ck.get_by_cookieid, CookieID(s_province="bj", s_tax=""), "123", 1')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", self.ck.get_by_cookieid, CookieID(s_province="bj", s_tax=""), "123", 1)
print('測試 :self.ck.get_by_cookieid, CookieID(s_province="bj", s_tax=""), "0", "1"')
self.assertRaisesRegexp(
TypeError, "參數類型錯誤", self.ck.get_by_cookieid, CookieID(s_province="bj", s_tax=""), "0", "1")
"""Test get_by_cookieid.py"""
def test_get_num(self):
print("test:get_num()")
print("result:",self.ck.get_num())
flag = self.ck._s_database
print('測試 :self.ck.get_num ck._s_database = "1234"')
self.ck._s_database = "1234"
self.assertRaisesRegexp(
Exception, "數據庫連接異常", self.ck.get_num)
self.ck._s_database = flag
print('測試 : self.ck.get_num ck._s_table_name = "1234"')
flag = self.ck._s_table_name
self.ck._s_table_name = "1234"
self.assertRaisesRegexp(
Exception, "數據庫查詢異常", self.ck.get_num)
self.ck._s_table_name = flag
# """Test get_by_cookieid.py"""
# def test_clear_all_overtime(self):
# print("test:clear_allovertime()")
# print("result:",self.ck.clear_all_overtime())
# flag = self.ck._s_database
# self.ck._s_database = "1234"
# print("檢測self.ck._s_database:",self.ck._s_database)
# self.assertRaisesRegexp(
# Exception, "數據庫連接異常", self.ck.clear_all_overtime)
# self.ck._s_database = flag
if __name__ == '__main__':
unittest.main() # 執行測試用例
測試結果: