讀寫文件，多進程和多線程的一些總結

原創

2018-09-06 04:52

# coding:utf-8
"""
把大文件分塊
big_file.txt 是一個500M的5位數的亂序文檔
多線程並沒有提升速度
"""

import time
txtfile = ''
import threading
def txtmap(txtup):
with open('big_file.txt','r') as f:
i = 0
while i < 100000:
txt = f.read(1)
i += 1 if txt == ',' else 0
txtup += txt
# txtmap(txtfile)
start = time.time()
for i in range(100):
txti = threading.Thread(target = txtmap(txtfile))
txti.start()
txti.join()
print(time.time() - start)
def txtmap2(txtup):
with open('big_file.txt','r') as f:
i = 0
while i < 1000000:
txt = f.read(1)
i += 1 if txt == ',' else 0
txtup += txt
start = time.time()
for i in range(10):
txtmap2(txtfile)
print(time.time() - start)

多進程和隊列初試
import time
from multiprocessing import Process
from multiprocessing import Queue
num = 0
qnum = Queue()
qnum.put(num)
def testnum(num):
num += 1
qnum.put(num)
for i in range(10):
p = Process(target = testnum,args = (qnum.get(),))
p.start()
p.join()
# testnum(num)
print(qnum.get(),qnum.empty())
在這裏，qnum屬於實例化對象，不需要用global標記

多次測試，發現多進程加隊列，必須把文件指針位置也放進去，不然下一個讀取位置就會亂跳
with open('big_file.txt','r') as f:
q.put ((txtfile3,f.tell()))
def txtmap(qget):
txtup = qget[0]
i = 0
f.seek(qget[1])
while i < 10:
txt = f.read(1)
i += 1 if txt == ',' else 0
txtup += txt
q.put((txtup,f.tell()))
start = time.time()
for i in range(10):
txtp2i = Process(target = txtmap,args =(q.get(),))
txtp2i.start()
txtp2i.join()
print('多進程加隊列',time.time() - start,'\n',q.get())

以及這個args的賦值真是煩人，明明從q.get()出來的就是元組，它非要你=後面必須是一個元組的形式才行

# coding:utf-8
"""
把大文件分塊
big_file.txt 是一個500M的5位數的亂序文檔
多進加隊列速度 < 多進程全局變量（並不能達到程序設計的目的） < 多線程加隊列 < 多線程加全局變量 < 普通全局變量
多進程因爲進程間通訊必須藉助隊列Queue 或者管道pipe 並不能改變全局變量
"""

import os
import time
from multiprocessing import Process
from multiprocessing import Queue
import threading
txtfile = ''
txtfile2 = ''
txtfile3 = ''
txtfile4 = ''
q = Queue()

#我的本子是4核的，因爲python的GIL的限制，所以同時只能運行4個進程，多了就會等待
with open('big_file.txt','r') as f:
def txtmap():
i = 0
global txtfile
while i < 25:
txt = f.read(1)
i += 1 if txt == ',' else 0
txtfile += txt
print(txt)
print (os.getpid ())
print(txtfile)
start = time.time()
for i in range(4):
txtpi = Process(target = txtmap)
txtpi.start()
txtpi.join()
print('多進程全局變量',time.time() - start,'\n',txtfile)
if txtfile:
print(True)
else:
print(False)

with open('big_file.txt','r') as f:
def txtmap():
i = 0
global txtfile2
while i < 10:
txt = f.read(1)
i += 1 if txt == ',' else 0
txtfile2 += txt
start = time.time()
for i in range(10):
txtti = threading.Thread(target = txtmap)
txtti.start()
txtti.join()
print('多線程全局變量',time.time() - start,'\n',txtfile2)

with open('big_file.txt','r') as f:
q.put ((txtfile3,f.tell()))
def txtmap(qget):
txtup = qget[0]
i = 0
f.seek(qget[1])
while i < 25:
txt = f.read(1)
i += 1 if txt == ',' else 0
txtup += txt
print (os.getpid ())
q.put((txtup,f.tell()))
start = time.time()
for i in range(4):
txtp2i = Process(target = txtmap,args =(q.get(),))
txtp2i.start()
txtp2i.join()
print('多進程加隊列',time.time() - start,'\n',q.get()[0])

#因爲隊列q內的消息已被取完，所以再放進去一次，不然會一直處於阻塞狀態等待插入消息
q.put(txtfile3)
with open('big_file.txt','r') as f:
def txtmap(txtup):
i = 0
while i < 10:
txt = f.read(1)
i += 1 if txt == ',' else 0
txtup += txt
q.put(txtup)
start = time.time()
for i in range(10):
txtt2i = threading.Thread(target = txtmap,args = (q.get(),))
txtt2i.start()
txtt2i.join()
print('多線程加隊列',time.time() - start,'\n',q.get())

with open('big_file.txt','r') as f:
def txtmap2():
i = 0
global txtfile4
while i < 10:
txt = f.read(1)
i += 1 if txt == ',' else 0
txtfile4+= txt
start = time.time()
for i in range(10):
txtmap2()
print('普通全局變量',time.time() - start,'\n',txtfile4)

#os.path.geisize返回的文件大小就是seek指針的最後一個位置
print(os.path.getsize('big_file.txt'))

with open('big_file.txt','r') as f:
print(f.seek(0,2))

#終於看到了多進程同時進行，哦呼！甚是歡喜！而且和文件的指針並不會衝突
from multiprocessing import Process
import time
with open('test.txt', 'r') as f:
def readseek(num):
f.seek(num)
print(time.time(),f.read(10))
time.sleep(10)
p1 = Process(target = readseek, args = (20,))
p2 = Process(target = readseek, args = (60,))
p3 = Process(target = readseek, args = (120,))
p4 = Process(target = readseek, args = (160,))
p1.start()
p2.start()
p3.start()
p4.start()
p1.join()
p2.join()
p3.join()
p4.join()
print(time.time())

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

讀寫文件，多進程和多線程的一些總結

MySQL 核心模塊揭祕 | 18 期 | 鎖在內存里長什麼樣*

使用perf工具生成火焰圖

大齡程序員思考

響應式界面控件DevExtreme * 更強的數據分析和可視化功能

HttpSecurity 是如何組裝過濾器鏈的

數說海南——近6年海南各市縣人口簡單看

長序列中Transformers的高級注意力機制總結

WebStorm 創建 Vue 項目

讀寫文件，多進程和多線程的一些總結

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結