首先堤框,爬取了兩萬多的成語。
然后欣硼,根據(jù)這些成語實現(xiàn)成語接龍锅棕。
# 爬蟲
library(rvest)
if(F){
# 修改這個
# https://chengyu.911cha.com/pinyin_d.html
# 頁碼 https://chengyu.911cha.com/pinyin_a_p1.html
url <-"https://chengyu.911cha.com/pinyin_a_p4.html"
# 從網(wǎng)頁讀取html代碼
webpage <- read_html(url,encoding="UTF-8")
idiom <- webpage%>%html_nodes("body div.mainbox div.panel div.mcon ul li")%>%html_text()
rm(url,webpage)
}
# 從這里開始,爬取成語數(shù)據(jù) ------------------------------------------------------------
idiom=list()
n=1
for (i in letters) {
print(i)
for (j in 1:20) {
url=paste("https://chengyu.911cha.com/pinyin_",i,"_p",j,".html",sep = "")
webpage <- read_html(url,encoding="UTF-8")
idiom[[n]] <- webpage%>%html_nodes("body div.mainbox div.panel div.mcon ul li")%>%html_text()
if(length(idiom[[n]])==0){
break();
}else{
n=n+1
}
}
}
# 這里就爬取了所有的成語數(shù)據(jù)荆烈,現(xiàn)在要做的就是編寫成語接龍
# 隨便輸入一個成語拯勉,返回接下來的成語
idiom_arr=unlist(idiom)
Idioms_solitaire=function(word){
library(stringr)
last_character=str_sub(string = word,start = -1,end = -1)
star_character=str_sub(string = idiom_arr,start = 1,end = 1)
Match_data=idiom_arr[str_detect(str = star_character,pattern = last_character)]
n=sample(1:length(Match_data),1)
return(Match_data[n])
}
# Idioms_solitaire("阿薩大噶")
# 成語接龍 -內(nèi)部使用了Idioms_solitaire的函數(shù)
per_fun=function(word){
result=0
n=1
while (length(Idioms_solitaire(word))!=0&!is.na(word)) {
print(word)
result[n]=word
n=n+1
word=Idioms_solitaire(word)
#Sys.sleep(1)
}
return(result)
}
per_fun("放虎歸山")
#[1] "放虎歸山"
#[1] "山高皇帝遠"
#[1] "遠涉重洋"
#[1] "洋洋灑灑"
#[1] "灑灑瀟瀟"
#[1] "瀟灑風流"
#[1] "流水高山"
#[1] "山棲谷隱"
#[1] "隱晦曲折"
#[1] "折戟沉沙"
#[1] "沙鷗翔集"
#[1] "集矢之的"
#[1] "的一確二"
#[1] "二話沒說"
#[1] "說東談西"
#[1] "西方凈土"
#[1] "土階茅屋"
#[1] "屋上烏"
#[1] "烏焦巴弓"
#[1] "弓折刀盡"
#[1] "盡善盡美"
#[1] "美人計"
#[1] "計較錙銖"
#[1] "銖積寸累"
#[1] "累牘連篇"