計算蛋白質 MW, pI, EC(消光系數(shù))
# -*- coding: UTF-8 -*-
__author__='zsc'
import sys
def ExPASY(P):
AA="ACDEFGHIKLMNPQRSTVWY"
AA_residue={'A': 71.0788,'C': 103.1388,'D': 115.0886,'E': 129.1155,'F': 147.1766,'G': 57.0519,'H': 137.1411,'I': 113.1594,'K': 128.1741,'L': 113.1594,'M': 131.1926,'N': 114.1038,'P': 97.1167,'Q': 128.1307,'R': 156.1875,'S': 87.0782,'T': 101.1051,'V': 99.1326,'W': 186.2132,'Y': 163.176}
pI_e={'C': 9.0,'D': 4.0,'E': 4.5,'H': 6.4,'K': 10.4,'R': 12.0,'Y': 10.0}
COOH="CDEY"
NH2="HKR"
number={k:P.count(k) for k in AA}
a=0
for k in AA:
a += number[k]*AA_residue[k]
E=(number['Y']*1490+number['W']*5500+number['C']/2*125)/a
def f(x) :
b=0
c=0
for m in COOH:
b += (number[m]*(10**x))/(10**x+10**pI_e[m])
for n in NH2:
c += (number[n]*10**pI_e[n])/(10**x+10**pI_e[n])
return b+10**x/(10**x+10**3.2)-10**8.2/(10**x+10**8.2)-c
r=3.2
s=12.0
x=(r+s)/2
for i in range(11) :
if f(x)>0:
s=x
x=(r+s)/2
elif f(x)<0:
r=x
x=(r+s)/2
str_out = "MW="+str(round((a+18.01524)/1000,2))+"kD\tpI="+str(round(x,2))+"\t"
str_out += "EC(消光系數(shù))="+str(round(E,2))+"\n"
return str_out
def calc(in_file, out_file):
id = ''
seq = ''
with open(in_file, 'r') as f_in:
with open(out_file, 'w') as f_out:
for line in f_in:
if line.strip() == '':
continue
if line[0] == '>':
id = line.strip()
else:
seq = line.strip()
f_out.write(id[1:]+"\t"+ExPASY(seq))
if __name__ == "__main__":
input_file = sys.argv[1]
output_file = sys.argv[2]
calc(input_file, output_file)