C/C++使用kseq.h,鏈接時加-lz
#include <string.h>
#include <zlib.h>
#include "kseq.h"
KSEQ_INIT(gzFile, gzread)
//readfile
int main(int argc, char *argv[])
{
gzFile fp;
kseq_t *ks;
fp = gzopen(argv[1],"r");
if(NULL == fp){
fprintf(stderr,"Fail to open file: %s\n", argv[1]);
return 0;
}
ks = kseq_init(fp);
while( kseq_read(ks) >= 0 ){
fprintf(stderr,"seq: %s\n", ks->seq.s);
}
kseq_destroy(ks);
gzclose(fp);
return EXIT_SUCCESS;
}
python使用SeqIO
#pip install biopython
from Bio import SeqIO
with open("./clean_total_genomic.fna",'r') as fq:
for record in SeqIO.parse(fq,'fasta'):
print(record.seq)
自己寫文件處理
使用fileopen,fasta尋找">",fastq按行處理