今天是2020-02-09
作者:沙雕學(xué)習(xí)小組
這里有視頻教程:
https://www.bilibili.com/video/av87724182
今天想實(shí)現(xiàn)這個(gè)功能:
差異分析得到了200多個(gè)基因(甚至更多)
我要一個(gè)一個(gè)把基因的summary信息得到,要手動(dòng)一個(gè)一個(gè)查可能要查到下個(gè)星期晓猛,周五就要匯報(bào)了啊……!
有python怕啥?!不要慌
動(dòng)手之前先動(dòng)腦
step1:獲取這個(gè)基因在NCBI上的summary信息——輸入gene.txt得到genesummary.txt
step2:檢查輸出文件是否有空行壁却,若有刪掉輸入genesummary.txt得到newsummary.txt
step3:批量翻譯——輸入newsummary.txt造锅,得到genetrans.txt
step1:獲取這個(gè)基因在NCBI上的summary信息——輸入gene.txt得到genesummary.txt
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Abao
from Bio import Entrez # pip install biopython
#from translate_api.translate_api import api # pip install translate_api
#from Pytrans import *
import re
Entrez.email = "shinningbzw@foxmail.com" # email
#這里修改文件路徑和文件名,絕對(duì)路徑
output_file = 'genesummary.txt' # 注意你的輸出文件路徑:絕對(duì)路徑
input_file = 'gene.txt'# 輸入文件:去重后的基因列表 (將基因列保存為 txt馏慨,uniq *.txt>gene_list.txt )
gene_list = []
line_c = []
count = len(open(input_file, 'r').readlines())
print("Waiting...")
#from Pytrans import *
import requests
from Pytrans import *
def google_translate(content):
'''google translation'''
js = Pytrans()
tk = js.getTk(content)
if len(content) > 4891:
print("too long9』础!写隶!")
return
param = {'tk': tk, 'q': content}
result = requests.get("""http://translate.google.cn/translate_a/single?client=t&sl=en
&tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss
&dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1&srcrom=0&ssel=0&tsel=0&kc=2""", params=param)
trans = result.json()[0]
ret = ''
# for i in range(len(trans)):
# line = trans[i][0]
# if line != None:
# ret += trans[i][0]
for i in range(len(trans)):
line = trans[i][0]
if line != None:
ret += trans[i][0]
return ret
#a = google_translate("hello倔撞,Input file will be translated, please be patient")
#print(a)
# get gene list
for line in open(input_file):
if line != "基因":
gene_list.append(line)
gene_list.remove(gene_list[0])
rm_pattern = re.compile('\[.*?\]')
with open(output_file, 'a+', encoding='utf-8') as f:
for line in gene_list:
gene = str(line.strip())
gene_term = "(" + gene +"[Gene Name]) AND Homo sapiens[Organism]"
Entrez.email = "shinningbzw@foxmail.com"
handle = Entrez.esearch(db="gene", term=gene_term)
gene_id = Entrez.read(handle)['IdList'][0]
sum_handle = Entrez.esummary(db="gene", id=gene_id)
sum_record = Entrez.read(sum_handle)
r_gene_sum = sum_record['DocumentSummarySet']['DocumentSummary'][0]['Summary']
gene_sum = rm_pattern.sub('', r_gene_sum)
#translation = google_translate(gene_sum)
#f.write(gene + "\n" + gene_sum + "\n" + translation + "\n")
f.write(gene + "\n" + gene_sum + "\n" )
line_c.append("b")
if count % len(line_c) == 0:
perc = (len(line_c) / count) * 100
print("Completed " + str(int(perc)) + "%")
step2:檢查輸出文件是否有空行,若有刪掉樟澜。輸入genesummary.txt得到newsummary.txt
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:cici
#這里修改你的文件路徑误窖,請(qǐng)看清文件名奧~
with open('genesummary.txt', 'r', encoding='utf-8') as fr, open('newsummary.txt', 'w', encoding='utf-8') as fd:
for text in fr.readlines():
if text.split():
fd.write(text)
print('輸出成功....')
step3:批量翻譯
這里先寫(xiě)個(gè)函數(shù)
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Topshi
import execjs
class Pytrans():
def __init__(self):
self.ctx = execjs.compile("""
function TL(a) {
var k = "";
var b = 406644;
var b1 = 3293161072;
var jd = ".";
var $b = "+-a^+6";
var Zb = "+-3^+b+-f";
for (var e = [], f = 0, g = 0; g < a.length; g++) {
var m = a.charCodeAt(g);
128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023),
e[f++] = m >> 18 | 240,
e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224,
e[f++] = m >> 6 & 63 | 128),
e[f++] = m & 63 | 128)
}
a = b;
for (f = 0; f < e.length; f++) a += e[f],
a = RL(a, $b);
a = RL(a, Zb);
a ^= b1 || 0;
0 > a && (a = (a & 2147483647) + 2147483648);
a %= 1E6;
return a.toString() + jd + (a ^ b)
};
function RL(a, b) {
var t = "a";
var Yb = "+";
for (var c = 0; c < b.length - 2; c += 3) {
var d = b.charAt(c + 2),
d = d >= t ? d.charCodeAt(0) - 87 : Number(d),
d = b.charAt(c + 1) == Yb ? a >>> d: a << d;
a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d
}
return a
}
""")
def getTk(self, text):
return self.ctx.call("TL", text)
調(diào)用這個(gè)函數(shù)——輸入newsummary.txt,得到genetrans.txt
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Topshi
from Pytrans import *
import requests
def google_translate(content):
'''google translation'''
js = Pytrans()
tk = js.getTk(content)
if len(content) > 4891:
print("too longV确 E场!")
return
param = {'tk': tk, 'q': content}
result = requests.get("""http://translate.google.cn/translate_a/single?client=t&sl=en
&tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss
&dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1&srcrom=0&ssel=0&tsel=0&kc=2""", params=param)
trans = result.json()[0]
ret = ''
for i in range(len(trans)):
line = trans[i][0]
if line != None:
ret += trans[i][0]
return ret
a = google_translate("hello毒费,Input file will be translated, please be patient")
print(a)
genotype_annotation_list = []
translate_file = open('genetrans.txt', "a+", encoding='utf-8')
with open('newsummary.txt', 'r') as f: #有空行會(huì)報(bào)錯(cuò)丙唧!!
for element in f:
genotype_annotation_list.append(element.strip())
# print(genotype_annotation_list)
count = 0
for ga in genotype_annotation_list:
translation = google_translate(ga)
#translate_file.write(ga + '\t' + translation + '\n')
translate_file.write(translation + '\n')
count += 1
print('complete', '%.1f%%' % ((count / len(genotype_annotation_list)) * 100))
歡迎關(guān)注我的公眾號(hào):