Selenium類封裝
#!/usr/bin/env python
#--coding:utf-8-*-
import random
#導(dǎo)入selenium模塊
from selenium import webdriver
#selenium鍵盤事件
#引入 keys 包
from selenium.webdriver.common.keys import Keys
'''
其他常用到的鼠標事件,只需在后面加上,demo: obj.getElementByLinkText('愛情').click()
clear()
context_click() 右擊
double_click() 雙擊
drag_and_drop() 拖動
move_to_element() 鼠標懸停在一個元素上
click_and_hold() 按下鼠標左鍵在一個元素上
'''
'''
繼承Abstract類
'''
from Abstract import Downloader_Abstract
class Downloader_Selenium(Downloader_Abstract):
def __init__(self , data = 'firefox'):
__strs = data.lower()
if __strs == "firefox":
self.driver = webdriver.Firefox()
elif __strs == "chrome":
self.driver = webdriver.Chrome()
elif __strs == "phantomjs":
'''
若選擇phantomjs,給默認加上User-Agent偽裝
'''
'''
此段代碼用于phantomjs偽裝User-Agent
'''
from selenium.webdriver import DesiredCapabilities
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
headers = {
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.8',
'Cache-Control': 'max-age=0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
'Connection': 'keep-alive',
'Referer': 'http://movie.douban.com/'
}
for key, value in headers.iteritems():
desired_capabilities['phantomjs.page.customHeaders.{}'.format(key)] = value
desired_capabilities[
'phantomjs.page.customHeaders.User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
self.driver = webdriver.PhantomJS(desired_capabilities=desired_capabilities)
else:
print "Error Message: Not Installed Firefox or Chrome or PhantomJs"
'''
selenium 獲取網(wǎng)頁源碼
'''
def getHtml(self , url):
self.open(url)
return self.getPageSource()
'''
根據(jù)標簽元素定位并在input框中輸入內(nèi)容
@:param tag 'id','class','name','tag_name','link_text','partial_link_text'
@:param element 正則表達式或id,class,Xpath名稱
@:param data 輸入的內(nèi)容
'''
def sendKeysByElement(self , tag , element , data):
if tag == "id":
return self.getElementById(element).send_keys(data)
elif tag == "class":
return self.getElementByClass(element).send_keys(data)
elif tag == "name":
return self.getElementByName(element).send_keys(data)
elif tag == "tag_name":
return self.getElementByTagName(element).send_keys(data)
elif tag == "link_text":
return self.getElementByLinkText(element).send_keys(data)
elif tag == "partial_link_text":
return self.getElementByPartialLinkText(element).send_keys(data)
else:
print "Error Message: function 'sendKeysByElement' First parameters wrong , select : 'id','class','name','tag_name','link_text','partial_link_text'"
'''
selenium 用于打開網(wǎng)頁
'''
def open(self , url):
return self.driver.get(url)
'''
selenium 獲取網(wǎng)頁資源
'''
def getPageSource(self):
return self.driver.page_source
'''
selenium 獲取網(wǎng)頁Title
'''
def getPageTitle(self):
return self.driver.title
'''
selenium 退出瀏覽器
'''
def quit(self):
return self.driver.quit()
'''
根據(jù) id 定位元素
'''
def getElementById(self , element):
return self.driver.find_element_by_id(str(element))
'''
根據(jù) class name 定位元素
'''
def getElementByClass(self , element):
return self.driver.find_element_by_class_name(str(element))
'''
根據(jù) name 定位元素
demo: <input type="submit" name="btnK" jsaction="sf.chk" value="Google 搜索">
find_element_by_name("btnK")
'''
def getElementByName(self , element):
return self.driver.find_element_by_name(str(element))
'''
根據(jù) tag name 定位元素
find_element_by_tag_name("div")
'''
def getElementByTagName(self , element):
return self.driver.find_element_by_tag_name(str(element))
def getElementByXpath(self , element):
return self.driver.find_element_by_xpath(str(element))
'''
根據(jù) link text定位元素
demo : <a name="tj_news">新 聞</a>
<a name="tj_tieba">貼 吧</a>
通過 link text 定位元素:
find_element_by_link_text("新 聞")
find_element_by_link_text("貼 吧")
'''
def getElementByLinkText(self , element):
return self.driver.find_element_by_link_text(str(element))
'''
根據(jù)partial link text定位元素
@:param element
demo : <a name="tj_news">新 聞</a>
<a name="tj_tieba">貼 吧</a>
find_element_by_partial_link_text("新")
find_element_by_partial_link_text("吧")
'''
def getElementByPartialLinkText(self , element):
return self.driver.find_element_by_partial_link_text(str(element))
'''
將頁面滾動條移動到頁面任意位置
'''
def scroll(self , top = random.randint(100,999)):
topsize = top
js="var q=document.documentElement.scrollTop="+str(topsize)
return self.driver.execute_script(js)
'''
獲取當前URL
'''
def getCurrentUrl(self):
return self.driver.current_url
'''
設(shè)置超時時間
'''
def setTimeOut(self , times = "60"):
return self.driver.implicitly_wait(times)
'''
設(shè)置窗口最大化
'''
def setMaxWindow(self):
return self.driver.maximize_window()
實例化調(diào)用類
from selenium.webdriver.common.keys import Keys
from Downloader.Selenium import Downloader_Selenium
import time
import os
import datetime
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
now = datetime.datetime.now()
now_time = now.strftime("%Y-%m-%d %H_%M_%S")
class KeywordTool(object):
def __init__(self , driver , url):
self.obj = Downloader_Selenium(driver)
self.obj.open(url)
self.obj.setTimeOut()
def getKeyword(self , domain = "United States" , language = "English" , keyword = ""):
try:
#定位國別下拉框
select_one_xpath = '/html/body/span/span/span[1]/input'
keywords_input_xpath = '//*[@id="edit-keyword"]'
self.obj.getElementByXpath('//*[@id="select2-edit-domain-container"]').click()
self.obj.getElementByXpath(select_one_xpath).send_keys(domain)
self.obj.getElementByXpath(select_one_xpath).send_keys(Keys.ENTER)
#定位語言下拉框
self.obj.getElementByXpath('//*[@id="select2-edit-language-container"]').click()
self.obj.getElementByXpath(select_one_xpath).send_keys(language)
self.obj.getElementByXpath(select_one_xpath).send_keys(Keys.ENTER)
#搜索框輸入
self.obj.getElementByXpath(keywords_input_xpath).clear()
self.obj.getElementByXpath(keywords_input_xpath).send_keys(keyword)
self.obj.getElementByXpath(keywords_input_xpath).send_keys(Keys.ENTER)
try:
content = self.obj.getPageSource()
return content
except Exception as e:
print e
except Exception as e:
print e
def closeWindow(self):
return self.obj.quit()