直接上代碼(pycharm):
import requests
import os
import re
import urllib.request
from lxmlimport etree
#抓取網(wǎng)頁(yè)
def get_page(url):
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'}
response=requests.get(url,headers=headers)
return response.text
#獲取視頻地址
def get_video_urls(page):
html=etree.HTML(page)
video_urls=html.xpath('//div[@class="tit"]/a/@href')
return video_urls
#下載視頻
def download_video(real_url,video_name):
path='E:/py_workspace/video/v1/{}.mp4'.format(video_name)
if not os.path.exists(path):
print('正在下載:{}'.format(video_name))
urllib.request.urlretrieve(real_url,path)
print('{}下載完畢'.format(video_name))
else:
print('已經(jīng)存在')
pass
if __name__=='__main__':
url='http://www.v1.cn'
? ? page=get_page(url)
video_urls=get_video_urls(page)
for video_urlin video_urls:
if str(video_url).endswith('.shtml'):
video_url=url+video_url
page=get_page(video_url)
#獲取視頻的真正地址
? ? ? ? ? ? real_url=re.compile(' <param name="FlashVars" value=".*?videoUrl=(.*?)">')
real_url=re.findall(real_url,page)[0]
#獲取視頻名稱
? ? ? ? ? ? video_name=re.compile('<h2>(.*?)</h2>')
video_name=re.findall(video_name,page)[0]
#下載視頻
? ? ? ? ? ? download_video(real_url,video_name)