使用urllib,urllib2,beautifulsoup.
執(zhí)行下面代碼搁料,輸入要查找的美劇名即可:
查詢到所有結(jié)果并遍歷每一個(gè)結(jié)果中不同視頻格式的所有下載鏈接
# coding: utf-8
import urllib
import urllib2
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf8')
host= "http://www.meijutt.com"
def getUrlRespHtml(url,data=None):
? ?heads = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
? ? ? ? ? ?'Accept-Charset':'GB2312,utf-8;q=0.7,*;q=0.7',
? ? ? ? ? ?'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
? ? ? ? ? ?'Cache-Control':'max-age=0',
? ? ? ? ? ?'Host':host,
? ? ? ? ? ?'Connection':'keep-alive',
? ? ? ? ? ?'Referer':url,
? ? ? ? ? ?'User-Agent':'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.14) Gecko/20110221 Ubuntu/10.10 (maverick) Firefox/3.6.14'}
? ?req = urllib2.Request(url)
? ?opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
? ?opener.addheaders = heads.items()
? ?if data:
? ? ? ?data = urllib.urlencode(data)
? ? ? ?respHtml = opener.open(req,data).read()
? ?else:
? ? ? ?respHtml = opener.open(req).read()
? ?return respHtml
def get_bt(url):
? ?html = getUrlRespHtml(url)
? ?bs_html = BeautifulSoup(html)
? ?download_list = bs_html.find_all('div','down_list')
? ?for index,down_list in enumerate(download_list):
? ? ? ?if len(download_list)>1:
? ? ? ? ? ?print '<<<<<視頻格式%s>>>>>'%(index+1)
? ? ? ?for li in down_list.find_all('li'):
? ? ? ? ? ?f_attrs = li.find('input','down_url').attrs
? ? ? ? ? ?print f_attrs.get('value')
def get_bts(searchword):
? ?data ={'searchword':searchword}
? ?url = host+'/search.asp'
? ?html=getUrlRespHtml(url,data)
? ?bs = BeautifulSoup(html)
? ?cn_box2 = bs.find_all('div','cn_box2')
? ?print '\n搜索結(jié)果:%s\n'%len(cn_box2)
? ?for index,div in enumerate(cn_box2):
? ? ? ?print '------搜索結(jié)果%s------'%(index+1)
? ? ? ?attrs = div.a.attrs
? ? ? ?title = attrs.get('title')
? ? ? ?href = attrs.get('href')
? ? ? ?print title,(host+href)
? ? ? ?get_bt(host+href)
? ? ? ?print '------end--------\n'
searchword=raw_input("請輸入要搜索的美劇:")
get_bts(searchword.encode('gb2312'))