今天在用requests爬取網站數據時,遇到了一個問題俭正,代碼和報錯信息如下:
import re
import requests
class Handle_Lagou(object):
def __init__(self):
#使用session保存cookies信息
self.lagou_session = requests.session()
self.header = {
'Connection': 'close',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
self.city_list=""
#獲取全國所有城市列表的方法
def handle_city(self):
city_search = re.compile(r'zhaopin/">(.*?)</a>')
city_url = "https://www.lagou.com/jobs/allCity.html"
city_result = self.handle_request(method="GET",url=city_url)
print(city_result)
def handle_request(self,method,url,data=None,info=None):
if method == "GET":
response = self.lagou_session.get(url=url,headers=self.header)
return response.text
if __name__=='__main__':
lagou = Handle_Lagou()
lagou.handle_city()
運行后發(fā)現出現了兩個錯誤
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='www.lagou.com', port=443): Max retries exceeded with url: /jobs/allCity.html (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')])")))
requests.exceptions.SSLError: HTTPSConnectionPool(host='www.lagou.com', port=443): Max retries exceeded with url: /jobs/allCity.html (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')])")))
然后就上網找了找,看了大部分相關問題的博客后,找到了兩個解決辦法(其實是一個):
import re
import requests
#在此處設置取消警告信息
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# 這是另一個大佬的辦法涤躲,可惜過于復雜,我沒看懂贡未,簡簡單單才是真
# requests.packages.urllib3.disable_warnings()
# requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += 'HIGH:!DH:!aNULL'
# try:
# requests.packages.urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST += 'HIGH:!DH:!aNULL'
# except AttributeError:
# # no pyopenssl support used / needed / available
# pass
class Handle_Lagou(object):
def __init__(self):
#使用session保存cookies信息
self.lagou_session = requests.session()
self.header = {
'Connection': 'close',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
self.city_list=""
#獲取全國所有城市列表的方法
def handle_city(self):
city_search = re.compile(r'zhaopin/">(.*?)</a>')
city_url = "https://www.lagou.com/jobs/allCity.html"
city_result = self.handle_request(method="GET",url=city_url)
print(city_result)
def handle_request(self,method,url,data=None,info=None):
if method == "GET":
# 在此處設置verify = False
response = self.lagou_session.get(url=url,headers=self.header,verify=False)
return response.text
if __name__=='__main__':
lagou = Handle_Lagou()
lagou.handle_city()