0. python 讀取excel
__author__ = 'HM'
#Use the excellent xlrd package, which works on any platform. That means you can read Excel files from Python in Linux! Example usage:
#Open the workbook
import xlrd
wb = xlrd.open_workbook('myworkbook.xls')
#Check the sheet names
wb.sheet_names()
#Get the first sheet either by index or by name
sh = wb.sheet_by_index(0)
sh = wb.sheet_by_name(u'Sheet1')
#Iterate through rows, returning each as a list that you can index:
for rownum in range(sh.nrows):
print sh.row_values(rownum)
#If you just want the first column:
first_column = sh.col_values(0)
#Index individual cells:
cell_A1 = sh.cell(0,0).value
cell_C4 = sh.cell(rowx=3,colx=2).value
#(Note Python indices start at zero but Excel starts at one)
#Turns out the put_cell() method isn’t supported, so ignore the following section (Thanks for the heads up, John!)
#Put something in the cell:
row = 0
col = 0
ctype = 1 # see below
value = 'asdf'
xf = 0 # extended formatting (use 0 to use default)
sh.put_cell(row, col, ctype, value, xf)
sh.cell(0,0) # text:u'asdf'
sh.cell(0,0).value # 'asdf'
#Possible ctypes: 0 = empty, 1 = string, 2 = number, 3 = date, 4 = boolean, 5 = error
1. Read from Excel process and write to CSV
__author__ = 'HM'
#Use the excellent xlrd package, which works on any platform. That means you can read Excel files from Python in Linux! Example usage:
#Open the workbook
import xlrd
import xlwt
wb_in = xlrd.open_workbook('123lie.xlsx')
fout = open("new123lie.csv", 'w')
#Get the first sheet either by index
sh_in = wb_in.sheet_by_index(0)
#Iterate through rows, returning each as a list that you can index:
for rownum in range(sh_in.nrows):
data = sh_in.row_values(rownum)
if data[0]>=-11.547 and data[0]<=11.547 and data[1]>=-11.547 and data[1]<=11.547:
data = [str(d) for d in data]
fout.write(','.join(data)+'\n')
fout.close()
3. 讀寫unicode文件
import codecs
f = codecs.open(fn, 'wb', 'utf-8')
f.write(u'1.python'+u'2.how to pythonic')
f.close()
f = codecs.open(fn, 'r', 'utf-8')
print f.readlines()
4. pickle讀寫
(1)簡單的讀寫:
__author__ = 'HM'
import cPickle as pickle
f = open('1.txt','w')
vtuple = {1:'a',2:'b'}
print vtuple
pickle.dump(vtuple,f)
f.close()
f = open('1.txt','r')
r = pickle.load(f)
print r
f.close()
(2)複雜的讀寫(自定義類+實例個數不確定):
寫:
import cPickle as pickle
class Tuple(object):
def __init__(self,dlist):
self.vid = dlist[2]
self.gtime = dlist[3]
self.gpsx = dlist[4]
self.gpsy = dlist[5]
self.gpsd = dlist[7]
self.pstate = dlist[8]
...
...(中間省略若干行)
for dlist in mcursor.fetchall():
vtuple = Tuple(dlist)
pickle.dump(vtuple,f)
讀:import cPickle as pickle
class Tuple(object):
def __init__(self,dlist):
self.vid = dlist[2]
self.gtime = dlist[3]
self.gpsx = dlist[4]
self.gpsy = dlist[5]
self.gpsd = dlist[7]
self.pstate = dlist[8]
f = open("F:\\0.txt",'r')
rlist = []
while 1:
try:
rlist.append(pickle.load(f))
except(EOFError,pickle.UnpicklingError):
break
for result in rlist:
print result.vid,result.gtime,result.gpsx,result.gpsy,result.gpsd,result.pstate
print "over"
f.close()
5. 用.csv還是.xlsx保存文件
今天分析組合網課的作業情況,本身的數據就比較亂,用.csv處理比較麻煩(.csv本身用逗號隔開,但如果單元裏有逗號。。。),後來用轉成.xlsx就ok了。
雖然.csv 文件比 .xlsx好處理,但由於它是以逗號分隔的,如果數據有問題(比如單元內容裏包含逗號等),處理起來更加麻煩,用xlsx反而更好。
總之:
如果數據比較好用.csv
如果數據不整齊用.xlsx