from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
hots_content=''
with open("d:\weibo.txt", 'w')as f:
driver = webdriver.Chrome()
driver.get('https://m.weibo.cn/')
# 點(diǎn)擊“大家都在搜”
? ? driver.find_element_by_class_name('m-search').click()
time.sleep(1)
# 定位熱搜榜所在的大標(biāo)簽,使用了顯式等待
? ? hotlist_all = WebDriverWait(driver,5,0.5).until(EC.visibility_of_element_located((By.CLASS_NAME, "m-col-2")))
#hotlist_all2 = driver.find_element_by_class_name('m-col-2')
? ? # 找出熱搜榜大標(biāo)簽里的所有元素
? ? hoteles = hotlist_all.find_elements_by_class_name('m-item-box')
# 找到“微博熱搜榜”并點(diǎn)擊
? ? hotlist = hoteles[-1]
hotlist.click()
# 顯式等待精堕,定位實(shí)時(shí)熱點(diǎn)即寒,每分鐘刷新頁(yè)面部分
? ? all_hots = WebDriverWait(driver, 5, 0.5).until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "card4")))
#all_hots = driver.find_elements_by_class_name('card4')
? ? for hotin all_hots:
# 獲取熱搜標(biāo)題
? ? ? ? hot_summary = hot.find_element_by_class_name('m-text-cut').text
# 獲取每條熱搜點(diǎn)擊率
? ? ? ? hot_click_amount = hot.find_element_by_xpath(
'//*[@id="app"]/div[1]/div[1]/div[2]/div/div/div[3]/div/div/div/div/span[2]/span[2]').text
icon = hot.find_elements_by_class_name("m-link-icon")
if icon:
img = icon[0].find_element_by_tag_name('img')
src = img.get_attribute('src')
if "hot" in src:
hottag ='熱'
? ? ? ? ? ? ? ? hots =f'{hot_summary, hot_click_amount, hottag}'
? ? ? ? ? ? ? ? print(hots)
hots_content += hots +'\n'
? ? ? ? ? ? elif "recom" in src:
hottag ='薦'
? ? ? ? ? ? ? ? hots =f'{hot_summary, hot_click_amount, hottag}'
? ? ? ? ? ? ? ? print(hots)
hots_content += hots +'\n'
? ? ? ? ? ? elif "new" in src:
hottag ='新'
? ? ? ? ? ? ? ? hots =f'{hot_summary, hot_click_amount, hottag}'
? ? ? ? ? ? ? ? print(hots)
hots_content += hots +'\n'
? ? ? ? ? ? elif "fei" in src:
hottag ='沸'
? ? ? ? ? ? ? ? hots =f'{hot_summary, hot_click_amount, hottag}'
? ? ? ? ? ? ? ? print(hots)
hots_content += hots +'\n'
? ? driver.quit()
f.write(hots_content)
最后打印結(jié)果:
('兩會(huì)2020', '1205575', '熱')
('建議將HPV疫苗納入國(guó)家免疫規(guī)劃', '1205575', '沸')
('中國(guó)不存在隱性軍費(fèi)問(wèn)題', '1205575', '新')
('清華學(xué)霸放棄保研成為女特種兵', '1205575', '沸')
('建議取消生育三孩以上處罰', '1205575', '沸')
('計(jì)劃今明兩年制定修改法律17部', '1205575', '新')
('建議離婚過(guò)錯(cuò)方少分或不分財(cái)產(chǎn)', '1205575', '沸')
('鄭愷苗苗結(jié)婚', '1205575', '沸')
('張寶艷建議保護(hù)離異家庭兒童親情權(quán)', '1205575', '新')
('民法典草案累計(jì)收到42.5萬(wàn)人102萬(wàn)條意見', '1205575', '新')
('張馳 溫婉', '1205575', '熱')
('孫耀威給老婆轉(zhuǎn)5201314元', '1205575', '熱')
('張藝興騎馬舞', '1205575', '新')
('程曉玥', '1205575', '熱')
('GAI方言RAP', '1205575', '新')
('俄女排名教將申請(qǐng)吉尼斯紀(jì)錄', '1205575', '新')
('老師改編量子力學(xué)版消愁', '1205575', '新')
('建議將欺詐發(fā)行罪最高刑提至無(wú)期', '1205575', '新')
('KPL常規(guī)賽收官大戰(zhàn)', '1205575', '新')