第一題:指定基因集轉(zhuǎn)換為gmt格式
Jimmy給的答案已經(jīng)很高效了廉油,
library(clusterProfiler)
data(gcSample)
names(gcSample)
file="sink-examp.txt"
gs=gcSample
write.gmt <- function(gs,file){
sink(file)
lapply(names(gs), function(i){
cat( paste(c(i,'tmp',gs[[i]]),collapse='\t') )
cat('\n')
})
sink()
}
write.gmt(gs,file)
我寫的無非是加了一個標(biāo)題牲芋,改用了for循環(huán)
library(clusterProfiler)
data(gcSample)
gcSample
names(gcSample)
write.gmt<-function(gs,file){
sink(file)
cat(paste(c("GeneSet","Description","Genes"),collapse = "\t"),"\n")
names=names(gs)
for (i in names){
cat(paste(c(i,"Description",gs[[i]]),collapse = "\t"),"\n")
}
sink()
}
file="sink-examp.txt"
write.gmt(gcSample,file)
第二題:詞云
我封裝了一個函數(shù)撩笆,用于詞云制作
#
library(rvest)
WordMiner=function(keyword="miRNA",n_pages=20){# keyword="immunotherapy" #選擇在pubmed上檢索的關(guān)鍵詞
titles=list()
for (i in 1:n_pages){
url<-paste0("https://pubmed.ncbi.nlm.nih.gov/?term=",keyword,"&page=",i)
webpage<-read_html(url)
title_data_html<-html_nodes(webpage,
"a.docsum-title")%>%html_text(trim = T)
titles[[i]]=title_data_html
}
titles=unlist(titles) #準(zhǔn)備好要用的titles
# Install
#install.packages("tm") # for text mining
#install.packages("SnowballC") # for text stemming
#install.packages("wordcloud") # word-cloud generator
#install.packages("RColorBrewer") # color palettes
# Load
library("tm")
library("SnowballC")
library("wordcloud")
library("RColorBrewer")
text=titles
#Load the data as a corpus
docs <- Corpus(VectorSource(text))
#Inspect the content of the document
inspect(docs)
#Text Transformation
toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))
docs <- tm_map(docs, toSpace, "/")
docs <- tm_map(docs, toSpace, "@")
docs <- tm_map(docs, toSpace, "\\|")
inspect(docs)
#Clearning the text
# Convert the text to lower case
docs <- tm_map(docs, content_transformer(tolower))
# Remove numbers
docs <- tm_map(docs, removeNumbers)
# Remove english common stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))
# Remove your own stop word
# specify your stopwords as a character vector
docs <- tm_map(docs, removeWords, c("blabla1", "blabla2"))
# Remove punctuations
docs <- tm_map(docs, removePunctuation)
# Eliminate extra white spaces
docs <- tm_map(docs, stripWhitespace)
# Text stemming
# docs <- tm_map(docs, stemDocument)
#Build a term-document matrix
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 10)
#Generate the Word cloud
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 2,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
}
調(diào)用函數(shù)
WordMiner(keyword = "TP53",n_pages = 20) #選擇關(guān)鍵詞和挖掘的頁碼
此外捺球,附上我以前用powerbi制作的詞云
http://www.reibang.com/p/d65bc194797f
http://www.reibang.com/p/245f0c34691b
以及制作的app
https://app.powerbi.com/view?r=eyJrIjoiODMxNmY0MDAtZDg3YS00YWUwLWJlYjktMDA2YWM4MDY5YTdhIiwidCI6ImUyZmFkYTNhLWZiNjktNGJkZS1hZmE4LWNlM2M2YWU2YjkyYiIsImMiOjZ9