思路
- 先從http head中獲取文件的大小
- 將大小分隔成若干份(一個線程下載一份)
- 通過seek將下載的塊的內(nèi)容寫到文件的對應的位置韭脊,對每一個線程下載的數(shù)據(jù)塊進行拼接
代碼(下載百度首頁的圖片為例)
import requests
import threading
class downloader:
def __init__(self):
self.url = "https://ss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superman/img/logo/bd_logo1_31bdc765.png"
self.num = 8
self.name = "baidu.png"
r = requests.head(self.url)
# 獲取文件大小
self.total = int(r.headers['Content-Length'])
print self.total
# 獲取每個線程下載的區(qū)間
def get_range(self):
ranges = []
offset = int(self.total/self.num)
for i in range(self.num):
if i == self.num-1:
ranges.append((i*offset,''))
else:
ranges.append((i*offset,(i+1)*offset))
return ranges # [(0,100),(100,200),(200,"")]
# 通過傳入開始和結束位置來下載文件
def download(self,start,end):
headers = {'Range':'Bytes=%s-%s'%(start,end),'Accept-Encoding':'*'}
res = requests.get(self.url,headers=headers)
print "%s-%s download success"%(start,end)
# 將文件指針移動到傳入?yún)^(qū)間開始的位置
self.fd.seek(start)
self.fd.write(res.content)
def run(self):
self.fd = open(self.name,"wb")
thread_list = []
n = 0
for ran in self.get_range():
# 獲取每個線程下載的數(shù)據(jù)塊
start,end = ran
n += 1
thread = threading.Thread(target=self.download,args=(start,end))
thread.start()
thread_list.append(thread)
for i in thread_list:
# 設置等待草描,避免上一個數(shù)據(jù)塊還沒寫入率触,下一數(shù)據(jù)塊對文件seek,會報錯
i.join()
self.fd.close()
if __name__ == "__main__":
downloader().run()
最后編輯于 :
?著作權歸作者所有,轉載或內(nèi)容合作請聯(lián)系作者