#-*- coding-8 -*-
import requests
import lxml
import sys
from bs4 import BeautifulSoup
import xlwt
import time
import urllib
def craw(url,key_word,x):
? ? User_Agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0'
#? ? if x == 0:
#? ? ? ? re = 'http://www.qichacha.com/search?key='+key_word
#? ? else:
#? ? ? ? re = 'https://www.qichacha.com/search?key={}#p:{}&'.format(key_word,x-1)
? ? re = r'https://www.qichacha.com/search?key='+key_word
? ? headers = {
? ? ? ? ? ? 'Host':'www.qichacha.com',
? ? ? ? ? ? 'Connection': 'keep-alive',
? ? ? ? ? ? 'Accept':r'text/html, */*; q=0.01',
? ? ? ? ? ? 'X-Requested-With': 'XMLHttpRequest',
? ? ? ? ? ? 'User-Agent':r'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
? ? ? ? ? ? 'Referer': re,
? ? ? ? ? ? 'Accept-Encoding':'gzip, deflate, br',
? ? ? ? ? ? 'Accept-Language':'zh-CN,zh;q=0.9',
? ? ? ? ? ? 'Cookie':r'xxxxxxxxx這里換成你的cookiexxxxxxxx這里換成你的cooki