#-*- coding: UTF-8 -*-
import requests
import os
from bs4 import BeautifulSoup
from selenium import webdriver
import time
#圖片保存請(qǐng)求類
class BeautifulUtil():
#構(gòu)造方法
def __init__(self,url,path):
self.web_url = url
self.path = path
#是否需要?jiǎng)?chuàng)建文件
def mkdir(self):
path = self.path.strip()
isExists = os.path.exists(path)
if not isExists:
print("文件不存在瓶蝴,需要?jiǎng)?chuàng)建")
os.mkdir(path)
return True
else:
print("文件本地已存在")
return False
#請(qǐng)求網(wǎng)絡(luò)
def request(self,url):
r = requests.get(url)
return r
#獲取文件夾里面的所有文件名稱
def get_files(self):
files = os.listdir(self.path)
return files
#保存圖片
def save_img(self,url,name):
print("開始請(qǐng)求數(shù)據(jù)....")
img = self.request(url)
print("開始保存圖片")
f = open(name,'ab')
f.write(img.content)
print("圖片保存成功")
time.sleep(0.2)
f.close()
#處理網(wǎng)絡(luò)url
def deal_url(self,str):
end_pos = str.index('?')
str = str[:end_pos]
return str
#保存全部圖片
def spider(self):
driver = webdriver.PhantomJS('/Users/syf/Store/python/phantomjs-2.1.1-macosx/bin/phantomjs')
driver.get(self.web_url)
#獲取到數(shù)據(jù)所在的iframe
driver.switch_to.frame('g_iframe')
html = driver.page_source
self.mkdir()
#切換文件里面去,準(zhǔn)備生成圖片
os.chdir(self.path)
#獲取文件夾下所有的文件
file_names = self.get_files()
#獲取到所有的圖片
all_li = BeautifulSoup(html,'lxml').find(id='m-song-module').find_all('li')
for li in all_li:
#獲取需要的信息
album_img = self.deal_url(li.find('img')['src'])
album_name = li.find('p',class_='dec')['title']
album_date = li.find('span',class_='s-fc3').text
#進(jìn)行一次判斷名字力麸,如果過長(zhǎng),裁剪
if len(album_name) >= 50:
album_name = album_name[:50]
photo_name = album_date + ' - ' + album_name.replace('/','').replace(':','') +'.jpg'
print(album_img,photo_name)
if photo_name in file_names:
print("已經(jīng)存在圖片两残,無需下載")
else:
self.save_img(album_img,photo_name)
#運(yùn)行
if __name__ == '__main__':
b = BeautifulUtil('http://music.163.com/#/artist/album?id=11127&limit=120','/Users/syf/Desktop/photo_img')
b.spider()
demo
運(yùn)行結(jié)果:
png