python版本:3.5
爬蟲源代碼:
#coding=utf-8
'''urllib默認(rèn)不支持https'''
import urllib.request
from common.common_file import write_to_file,write_list_to_file
keywd = "壁紙"
#對中文進(jìn)行編碼
keywd = urllib.request.quote(keywd)
url = "http://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&tn=baidu&wd=" + keywd + "&oq=python&rsv_pq=ef318e6b00021ada&rsv_t=70893fgCSlBbfewSWWVzczCYMPleih3zBm3PycB856h%2B6EvnaFEyOAFxKxk&rqlang=cn&rsv_enter=0&rsv_jmp=slow"
req = urllib.request.Request(url)
data = urllib.request.urlopen(req).read()
print(type(data))
file="e:\\test\\2.html"
write_to_file(file,"wb",data)
common_file.py
#coding=utf-8
def write_to_file(file,lst):
'''
將list寫入文件
:param file:要寫入的文件全路徑
:param lst:list類型的變量
:return:
'''
if not isinstance(lst,list) == True:
print("不是list類型")
return
file_name=open(file,'w')
# print(len(lst) - 1)
for i in range(0,len(lst)):
# print(i,lst[i])
file_name.write(lst[i] + "\n")
file_name.close()
def write_to_file(file,mtd,txt):
'''
普通的寫入文件的方法
:param file: 要寫入的文件的全路徑
:param mtd: 打開的方法要尔,是w還是r還是wb等
:param txt: 要寫入的文字(內(nèi)容)
:return:
'''
with open(file,mtd) as file_name:
file_name.write(txt)
def write_list_to_file(file,str,lst):
'''
將list寫入文件
:param file:要寫入的文件全路徑
:param lst:list類型的變量
:return:
'''
if not isinstance(lst,list) == True:
print("不是list類型")
return
file_name=open(file,str)
# print(len(lst) - 1)
for i in range(0,len(lst)):
# print(i,lst[i])
file_name.write(lst[i] + "\n")
file_name.close()