代碼如下
import sys
import re
FPKM_dict = {}
outfile = open('333.txt', 'w')
fr1 = open('filename', 'r')
lines1 = fr1.readlines()
list1 = ['geneID']
for line1 in lines1:
list1.append(line1.strip().split('_')[0])
outfile.write('\t'.join(list1) + '\n')
#以上寫完表頭第一列
fr2 = open('V1-1_FRAS192030797-1a.gtf', 'r')
lines2 = fr2.readlines()
for line2 in lines2:
list2 = line2.strip().split('\t')
if list2[2] == 'transcript' and 'FPKM' in str(list2[8]) and 'gene_id' in list2[8].split(';')[0] and 'Br' in list2[8].split(';')[0]:
gene_id = list2[8].split(';')[0].split()[1][1: -1]
FPKM_dict[gene_id] = []
for line1 in lines1:
mid_fr1 = open(line1.strip(), 'r')
mid_lines1 = mid_fr1.readlines()
for mid_line1 in mid_lines1:
list3 = mid_line1.strip().split('\t')
#print(list3[2])
if list3[2] == 'transcript' and 'FPKM' in list3[8] and 'gene_id' in list3[8].split(';')[0] and 'Br' in list3[8].split(';')[0]:
gene_id = re.search('gene_id\s\S+;', mid_line1).group().split()[1][1: -2]
FPKM_value = re.search('FPKM\s+"\d+.\d+"', mid_line1).group().split()[1][1: -1]
FPKM_dict[gene_id].append(FPKM_value)
mid_fr1.close()
for key1, value1 in FPKM_dict.items():
outfile.write(key1 + '\t' + '\t'.join(value1) + '\n')
outfile.close()
fr1.close()
fr2.close()