100天生信-Day11
最近一直在做濕實(shí)驗(yàn)纳鼎,需要大量設(shè)計(jì)引物,發(fā)現(xiàn)primer3有python版本裳凸,可以批量設(shè)計(jì)贱鄙,簡(jiǎn)直神器。
import primer3
import pandas as pd
## primer_condition
global_args = {
'PRIMER_NUM_RETURN': 10,
'PRIMER_OPT_SIZE': 23,
'PRIMER_MIN_SIZE': 20,
'PRIMER_MAX_SIZE': 25,
'PRIMER_OPT_TM': 59.0,
'PRIMER_MIN_TM': 57.0,
'PRIMER_MAX_TM': 61.0,
'PRIMER_MIN_GC': 40.0,
'PRIMER_MAX_GC': 60.0,
'PRIMER_THERMODYNAMIC_OLIGO_ALIGNMENT': 1,
'PRIMER_MAX_POLY_X': 100,
'PRIMER_INTERNAL_MAX_POLY_X': 100,
'PRIMER_SALT_MONOVALENT': 50.0,
'PRIMER_DNA_CONC': 50.0,
'PRIMER_MAX_NS_ACCEPTED': 0,
'PRIMER_MAX_SELF_ANY': 12,
'PRIMER_MAX_SELF_END': 8,
'PRIMER_PAIR_MAX_COMPL_ANY': 12,
'PRIMER_PAIR_MAX_COMPL_END': 8,
'PRIMER_PRODUCT_SIZE_RANGE': [140,160],
'PRIMER_GC_CLAMP': 1
}
## function of read fasta
def readfasta(lines):
seq = []
index = []
seqplast = ""
numlines = 0
for i in lines:
if ">" in i:
index.append(i.replace("\n", "").replace(">", ""))
seq.append(seqplast.replace("\n", ""))
seqplast = ""
numlines += 1
else:
seqplast = seqplast + i.replace("\n", "")
numlines += 1
if numlines == len(lines):
seq.append(seqplast.replace("\n", ""))
seq = seq[1:]
return index, seq
## function of split table in txt
def str_split(lines):
list2 = lines.split()
return list2
## read fasta
f = open('/Users/lichuanshun/Desktop/Ta_NaCl_cds_name.txt', 'r')
lines = f.readlines()
(index, seq) = readfasta(lines)
f.close()
## build table
primer_df = pd.DataFrame()
## primer finder, dic -> datafrme
for i in range(len(index)):
seq_args = {
'SEQUENCE_ID': str(index[i]),
'SEQUENCE_TEMPLATE': str(seq[i]),
'SEQUENCE_INCLUDED_REGION': [0,len(seq[i])-1],
}
GeneID = str(index[i])
primer3_result = primer3.bindings.designPrimers(seq_args, global_args)
## change dic
primer3_result_table_dict = {}
for j in range(primer3_result["PRIMER_PAIR_NUM_RETURNED"]):
primer_id = str(j)
for key in primer3_result:
if primer_id in key:
# 要將每個(gè)信息中的數(shù)字和下劃線去掉
info_tag = key.replace("_" + primer_id, "")
# 就是把不同的引物對(duì)結(jié)果歸到一起
try:
primer3_result_table_dict[info_tag]
except:
primer3_result_table_dict[info_tag] = []
finally:
primer3_result_table_dict[info_tag].append(primer3_result[key])
df_index = []
## append dataframe
for m in range(primer3_result["PRIMER_PAIR_NUM_RETURNED"]):
df_index.append(GeneID + "_" + str(m + 1))
primer3_result_df = pd.DataFrame(primer3_result_table_dict, index=df_index)
primer_df = primer_df.append(primer3_result_df)
## writing csv & txt
primer_df.to_csv("/Users/lichuanshun/Desktop/primer3_result.csv")
primer_df.to_csv("/Users/lichuanshun/Desktop/primer3_result.txt", sep='\t')
## read txt
f_gtf = open('/Users/lichuanshun/Desktop/primer3_result.txt', 'r')
lines_gtf = f_gtf.readlines()
f_gtf.close()
## writing fasta of primer
fo = open('/Users/lichuanshun/Desktop/qpcr_primer.txt', 'w')
for i in range(1,len(lines_gtf)):
fo.write('>' + str_split(lines_gtf[i])[0] + '_F' + '\n'
+ str_split(lines_gtf[i])[4] + '\n'
+ '>' + str_split(lines_gtf[i])[0] + '_R' + '\n'
+ str_split(lines_gtf[i])[5] + '\n' )
fo.close()
參考教程:
https://mp.weixin.qq.com/s/MA7Tw7KOwB1phZmUoEy02g
http://www.chenlianfu.com/?tag=primer