### 根據GFF3文件統計外顯子大小和數量以及內含子大小
with open('TSG.gff3', 'r') as f:
for line in f:
lin = line.strip().split('\t')
name = lin[8].split(';')[1].split('=')[-1]
if lin[2] == 'gene':
print '\n',
print name, lin[0], lin[3], lin[4],lin[6],
if lin[2] == 'exon':
print lin[3], lin[4],
# 上述程序統計每個基因的外顯子起始與結束的位置,保存爲1.txt,注意需要打開1.txt編輯刪除第一行的空行
with open('1.txt', 'r') as f:
for line in f:
lin = line.strip().split()
a = len(lin)
for i in range(6, a, 2):
exon = int(lin[i]) - int(lin[i-1]) + 1
print lin[0], exon
#上述程序則是計算每個外顯子的大小
with open('1.txt', 'r') as f:
for line in f:
lin = line.strip().split()
a = len(lin)
if a == 7:
print lin[0], '0'
if a > 7:
if lin[4] == '+':
for i in range(7, a, 2):
intron = abs(int(lin[i]) - int(lin[i-1]) - 1)
print lin[0], intron
if lin[4] == '-':
for i in range(8, a, 2):
intron = abs(int(lin[i]) + 1 - int(lin[i - 3]))
print lin[0], intron
#上述這個程序則是計算每個內含子的大小
with open('1.txt', 'r') as f:
for line in f:
lin = line.strip().split()
a = len(lin)
n = (a - 5)/2
print lin[0], n
#上述這個程序則是統計每個基因外顯子的數量