par(no.readonly = T)
顯示繪圖的全局參數(shù)
RISmed包,挖掘期刊和詞圖繪制
library(RISmed)
search_topic <- c('("Heart"[Title/Abstract] OR "Heart"[MeSH Terms])')
search_query <- EUtilsSummary(search_topic,db='pubmed',
retmax=100,datetype='pdat',
mindate=2019,maxdate=2020
)
#retmax設置最大獲取量,mindate和maxdate設置檢索時間
#查看檢索內(nèi)容和文獻的pmid
summary(search_query)
QueryId(search_query)
#EUtilsGet爬取文獻信息
records <- EUtilsGet(search_query)
#信息挖掘跟匆,此處以找出發(fā)表最多的雜志為例
library(tidyverse)
#獲取第一篇摘要信息
records@AbstractText[1]
#獲取第一篇文章類型
records@PublicationType[1]
#提取檢索結(jié)果,用tibble搜集數(shù)據(jù)
pubmed <- tibble('Title'=ArticleTitle(records),
'Year'=YearPubmed(records),
'journal'=ISOAbbreviation(records))
#作圖查看論文發(fā)表最多的雜志
library(ggplot2)
ggplot(pubmed,aes(fct_infreq(journal)))+ #因子按照頻率排序
geom_bar()+
coord_flip()+
theme_classic()
mesh主題詞提取及詞云繪制
#mesh詞提取
word <- records@Mesh
word <- word[!is.na(word)]
#去除重復的mesh詞
distinct(word[[1]],Heading,keep_all=T) #去除第一篇的重復的mesh詞
word <- lapply(word,distinct,Heading,.keep_all=T)
#批量提取第一列詞
wordtable <- list()
for (i in 1:length(word)){
wordtable[[i]] = word[[i]][,1]
}
#計算詞頻
wordcd <- table(unlist(wordtable))
#詞云可視化
library(wordcloud2)
wordcloud2(wordcd) #交互性詞云
library(wordcloud)
library(RColorBrewer)
wordcd <- as.data.frame(wordcd)
wordcloud(wordcd$Var1,wordcd$Freq,col=rev(brewer.pal(7,'Set2')))
pubmed.mineR包爬取下載好的摘要信息
1.pubmed下載摘要信息
#pubmed.mineR包
library(pubmed.mineR)
#導入下載好的摘要
pubmed_abstracts <- readabs('abstract-Dimethylfu-set.txt')
#查看雜志信息
pubmed_abstracts@Journal[1:10]
#查看第一篇摘要信息
pubmed_abstracts@Abstract[1]
#設置分詞參數(shù)
Sys.setlocale('LC_ALL','C')
#摘要文本分詞
abswords <- word_atomizations(pubmed_abstracts)
#可視化前20個
library(ggpubr)
ggdotchart(abswords[1:50,],x='words',y='Freq', #顯示前50個高頻詞
sorting='descending',
add = 'segments',
ggtheme=theme_pubr(),
rotate=TRUE
)