需要引入的模塊
#!/usr/bin/env python3
#-*-encoding:utf-8-*-
from urllib import request,parse
import re
import os
import time
url請求
def open_url(url):
str1 = request.Request(url)
response = request.urlopen(str1)
html = response.read().decode('gbk')
return html
獲取詳情頁url
def get_urls():
url = 'http://www.mm131.com/'
html = open_url(url)
url_pattern = re.compile(r'<a target="_blank" )
item1 = re.findall(url_pattern,html)
return item1
保存
def save(urls):
for each in list(range(len(urls))):
url = 'http://www.mm131.com/'+urls[each]
detail_html = open_url(url)
time.sleep(5)
main_img_pattern = re.compile(r'<img alt="(.*?)" src="(.*?)"')
img_title = re.findall(main_img_pattern,detail_html)
page_pattern = re.compile(r'<span class="page-ch">共(.*?)頁</span>')
page_total = re.findall(page_pattern,detail_html)
title_pattern = re.compile(r'<h5>(.*?)<\h5>')
#標題名
titles = re.findall(title_pattern,detail_html)
#創(chuàng)建文件夾
if os.path.exists('/home/yzw/mm131/'+titles[0]):#是否存在
os.chdir(titles[0])
else:
try:
os.mkdir('/home/yzw/mm131/'+titles[0])
os.chdir('/home/yzw/mm131/'+titles[0])
except:
continue
for i in range(int(page_total[0])):
if i==0:
next_url = url
else:
num = i+1
a = urls[each][0:-5]
next_url = 'http://www.mm131.com/'+str(a)+'_'+str(num)+'.html'
every_html = open_url(next_url)
every_img_pattern = re.compile(r'<img alt="(.*?)" src="(.*?)"')
img_title = re.findall(every_img_pattern,every_html)
print (img_title)
#標題名
try:
title = img_title[0][0]
#圖片
img = img_title[0][1]
request.urlretrieve(img,title)
except:
continue
time.sleep(5)
開始抓取
urls = get_urls()
title = 'mm131'
os.mkdir(title)
os.chdir(title)
save(urls)
抓取結果
.
.
.
[('紋身小妹夏美醬酥胸半露誘惑十足(圖45)', 'http://img1.mm131.com/pic/2274/45.jpg')]
[('紋身小妹夏美醬酥胸半露誘惑十足(圖46)', 'http://img1.mm131.com/pic/2274/46.jpg')]
[('紋身小妹夏美醬酥胸半露誘惑十足(圖47)', 'http://img1.mm131.com/pic/2274/47.jpg')]
[('紋身小妹夏美醬酥胸半露誘惑十足(圖48)', 'http://img1.mm131.com/pic/2274/48.jpg')]
[('紋身小妹夏美醬酥胸半露誘惑十足(圖49)', 'http://img1.mm131.com/pic/2274/49.jpg')]
[('紋身小妹夏美醬酥胸半露誘惑十足(圖50)', 'http://img1.mm131.com/pic/2274/50.jpg')]
[('紋身小妹夏美醬酥胸半露誘惑十足(圖51)', 'http://img1.mm131.com/pic/2274/51.jpg')]
[('美護士沈夢瑤制服寫真大膽張腿很誘惑(圖1)', 'http://img1.mm131.com/pic/2746/1.jpg')]
[('美護士沈夢瑤制服寫真大膽張腿很誘惑(圖2)', 'http://img1.mm131.com/pic/2746/2.jpg')]
[('美護士沈夢瑤制服寫真大膽張腿很誘惑(圖3)', 'http://img1.mm131.com/pic/2746/3.jpg')]
[('美護士沈夢瑤制服寫真大膽張腿很誘惑(圖4)', 'http://img1.mm131.com/pic/2746/4.jpg')]
.
.
.