selenium在應(yīng)對(duì)需要登錄驗(yàn)證的網(wǎng)頁(yè)爬取上是個(gè)很好的工具鬼悠,但是在登錄過(guò)后拿到cookie亏娜,就可以直接使用cookie訪問(wèn)
以下是一個(gè)cookie登錄的案例;目標(biāo)網(wǎng)站:https://aso100.com/
導(dǎo)入相關(guān)庫(kù)
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
實(shí)現(xiàn)登錄
userName = 'magic111'
passWord = '121314'
driver = webdriver.Chrome()
driver.get('https://aso100.com/account/signin')
# 等待20秒直到訪問(wèn)成功
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, '//*[@id="username"]')))
user_name = driver.find_element_by_xpath('//*[@id="username"]')
user_name.send_keys(userName)
pass_word = driver.find_element_by_xpath('//*[@id="password"]')
pass_word.send_keys(passWord)
submit = driver.find_element_by_xpath('//*[@id="submit"]')
submit.click()
# 等待20秒直到訪問(wèn)成功
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, '//*[@id="search-kw"]')))
# 獲取cookies
cookie_list = driver.get_cookies()
print(cookie_list)
driver.close()
driver.quit()
1、使用selenium設(shè)置cookie登錄
driver = webdriver.Chrome()
# 要先訪問(wèn)一次這個(gè)域名
driver.get('https://aso100.com')
for item in cookie_list: driver.add_cookie({
'domain': '.aso100.com',
'name': item['name'],
'value': item['value'],
'path': '/',
'expires': None
})
driver.get('https://aso100.com/account/setting/type/dataCenter')
input('是否有效')
driver.close()
driver.quit()
2、使用requests設(shè)置cookie登錄
import requests
cookies = ";".join([item["name"] + "=" + item["value"] + "" for item in cookie_list])
print(cookies)
session = requests.Session()
# cookie要放到headers里
headers = {
'Cookie': cookies
}
html = session.get(url='https://aso100.com/account/setting/type/dataCenter', headers=headers).content.decode()
print(html)