- 單線程,多線程下載某云音樂(lè)
import re
import urllib.request
import requests
from bs4 import BeautifulSoup
import os
import time
from Threads import BaseThread
PATH = lambda p: os.path.abspath(
os.path.join(os.path.dirname(__file__), p)
)
'''
https://music.163.com/playlist?id= 得到播放列表
http://music.163.com/song/media/outer/url?id= 得到下載鏈接
urllib.request.urlretrieve 把遠(yuǎn)程下載的mp3文件下載到本地
'''
class Music163:
def __init__(self):
pass
def get_music_163(self, id):
user_agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 ' \
'Safari/537.36 '
headers = {'User-Agent': user_agent}
data = requests.get("https://music.163.com/playlist?id=" + id, headers).text
soup = BeautifulSoup(data, 'lxml')
temp = []
for i in soup.find("ul", {"class", "f-hide"}).find_all("a"):
pattern = re.compile('<a .*?id=(.*?)">(.*?)</a>', re.S)
items = re.findall(pattern, str(i))
temp.append([items[0][0], items[0][1]])
return temp
# 批量下載
def download(self, value):
for i in value:
if os.path.isfile(PATH("mp3/" + i[1] + ".mp3")):
print("%s已經(jīng)被下載了" % i[1])
else:
url = 'http://music.163.com/song/media/outer/url?id=' + i[0] + '.mp3'
urllib.request.urlretrieve(url, '%s' % PATH("mp3/" + i[1] + ".mp3"))
print("%s下載成功" % i[1])
# 單個(gè)下載
def get(self, value):
if os.path.isfile(PATH("mp3/" + value[1] + ".mp3")):
print("%s已經(jīng)被下載了" % value[1])
else:
url = 'http://music.163.com/song/media/outer/url?id=' + value[0] + '.mp3'
urllib.request.urlretrieve(url, '%s' % PATH("mp3/" + value[1] + ".mp3"))
print("%s下載成功" % value[1])
# 多線程
def multi_thread():
id = "2786226719" # 播放的列表id
start_time = time.time()
threads = []
mc = Music163()
data = mc.get_music_163(id)
count = len(data)
for i in range(0, count):
threads.append(BaseThread(mc.get(data[i])))
for j in range(0, count):
threads[j].start()
for k in range(0, count):
threads[k].join()
end_time = time.time()
print("共耗時(shí)%.2f" % (end_time - start_time) + "秒")
# 多線程47秒
# 運(yùn)行單線程
def run():
id = "2786226719" # 播放的列表id
start_time = time.time()
mc = Music163()
data = mc.get_music_163(id)
mc.download(data)
end_time = time.time()
print("共耗時(shí)%.2f" % (end_time - start_time) + "秒")
# 單線程43秒
if __name__ == "__main__":
# run()
multi_thread()
- 單線程共下載100首歌,耗時(shí)9.09秒
- 多線程共下載100首歌,耗時(shí)9.60秒
- 協(xié)程下載的代碼
mport time
from multiprocessing import Process
from gevent import monkey
import urllib.request
import BaseMusic163
monkey.patch_all()
import gevent
import os
PATH = lambda p: os.path.abspath(
os.path.join(os.path.dirname(__file__), p)
)
'''
協(xié)程發(fā)請(qǐng)求,
'''
class Producer(object):
def __init__(self):
self._rungevent()
def _rungevent(self):
jobs = []
id = "2786226719" # 播放的列表id
start_time = time.time()
mc = BaseMusic163.Music163()
data = mc.get_music_163(id)
count = len(data)
for i in range(count): # windows下有1024端口限制
jobs.append(gevent.spawn(self.produce(data[i])))
gevent.joinall(jobs)
end_time = time.time()
print("共耗時(shí)%.2f" % (end_time - start_time) + "秒")
def produce(self, value):
if os.path.isfile(PATH("mp3/" + value[1] + ".mp3")):
print("%s已經(jīng)被下載了" % value[1])
else:
url = 'http://music.163.com/song/media/outer/url?id=' + value[0] + '.mp3'
urllib.request.urlretrieve(url, '%s' % PATH("mp3/" + value[1] + ".mp3"))
print("%s下載成功" % value[1])
def main():
p1 = Process(target=Producer, args=())
p1.start()
if __name__ == '__main__':
main()
-
下載時(shí)間
結(jié)論
- 昨天測(cè)試牵辣,發(fā)現(xiàn)是協(xié)程>多線程>單線程
- 今天測(cè)試卻是:多線程>協(xié)程>單線程
- 當(dāng)然也會(huì)出現(xiàn)單線程耗時(shí)反而比多線程耗時(shí)短的情況
- 一直流傳多進(jìn)程+協(xié)程摔癣,可以解決python的GIL問(wèn)題,因?yàn)楸敬螠y(cè)試的數(shù)據(jù)不多纬向,使用的也是單進(jìn)程+協(xié)程的方式择浊,后續(xù)對(duì)協(xié)程的測(cè)試,有機(jī)會(huì)進(jìn)行大量數(shù)據(jù)的測(cè)試逾条,采用多進(jìn)程+協(xié)程的方式進(jìn)行測(cè)試
- 源碼獲取