啟動webdriver
try+catch+except+finally的用法順序刻炒,可以在error拋出時候繼續(xù)運行腳本。
支持多種選擇器碎罚,推薦xpath或css選擇器
從xpath里傳遞變量的方法:http://stackoverflow.com/questions/32874539/using-a-variable-in-xpath-in-python-selenium
不完善的爬cnki期刊數(shù)據(jù)的腳本:
主要作用是從xls文件中讀取期刊名字搜索欣硼,點擊后進入期刊具體信息頁面
利用xpath尋找2017年所到刊數(shù),返回寫入到新的xls文件中去映穗。
<pre>
coding=utf-8
import xlrd
import xlwt
import sys
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def DragInfo(keyword):
elem=driver.find_element_by_id("txt_1_value1")
elem.send_keys(keyword)
elem.send_keys(Keys.RETURN)
try:
wait=WebDriverWait(driver,20)
wait.until(EC.presence_of_element_located((By.XPATH,'//[@id="searchResult"]/dl/dd[1]/div[2]/h1/a')))
result=driver.find_element_by_xpath('//[@id="searchResult"]/dl/dd[1]/div[2]/h1/a')
# if driver.find_element_by_xpath('//*[@id="searchResult"]/div[1]/span[1]/em').text=='0':
# print 0
# raise e
result.click()
#####獲取當(dāng)前頁面句柄
default_window = driver.current_window_handle
#####獲取所有頁面句柄
handles = driver.window_handles
#####如果new_window句柄不是當(dāng)前句柄赫蛇,用switch_to_window方法切換
for handle in handles:
if handle != default_window:
driver.switch_to_window(handle)
wait=WebDriverWait(driver,20)
wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="2017_Year_Issue"]/dd')))
result2017=driver.find_elements_by_xpath('//a[contains(@id,"2017")]')
#version=driver.find_element_by_xpath("http://p[text()='出版周期']/span").text
count=len(result2017)
driver.close()
#driver.switch_to_window(handles[0])
return count
except Exception,e:
print Exception,":",e
finally:
driver.switch_to_window(handles[0])
elem.clear()
def SolveXls():
menu=""
count=0
version=""
# info=[count,version]
data=xlrd.open_workbook('search_list.xls')
new_data=xlwt.Workbook(encoding='utf-8')
new_table=new_data.add_sheet('A Test Sheet')
table=data.sheets()[0]
nrows=table.nrows
ncols=table.ncols
search_list=table.col_values(0)
for i in range(nrows):
menu= table.row_values(i)[0]
print menu
count=DragInfo(menu)
new_table.write(i,0,menu)
new_table.write(i,1,count)
print count
new_data.save('result_test.xls')
if name == 'main':
driver = webdriver.Chrome()
driver.get("http://navi.cnki.net/")
elem=driver.find_element_by_id("txt_1_value1")
# reload(sys)
# sys.setdefaultencoding('utf8')
SolveXls()
</pre>