最近剛接觸python,解決了一個(gè)以前無(wú)法解決的問(wèn)題,就是下載蘋果開源項(xiàng)目的代碼問(wèn)題,話不多說(shuō)送上我寫的腳本会烙,希望對(duì)你有用:
#!/usr/bin/python
# coding=utf-8
import requests
import os
import re
import sys, getopt
def download_file(url, home_path):
if not home_path.endswith("/"):
home_path = home_path + "/"
if not url.endswith("/"):
url = url + "/"
print "url: "+ url
print "home_path: " + home_path
r = requests.get(url)
html = r.text
#篩選數(shù)據(jù)
text_array = html.split("\n")
for line in text_array:
match = re.match(r'^(<tr>){1}(<td.*</td>){3}(</tr>){1}$', line)
match1 = re.findall(r'<td valign="top"><a href=".*"><img', line)
if match and match1:
#先搞定可以直接下載的
match2 = re.findall(r'"\w+\.?\w*"', match1[0])
if match2.__len__() == 2:
file_name = match2[1].replace('"', "")
file_path = home_path + file_name
file_download_url = url + file_name
if os.path.exists(file_path):
os.remove(file_path)
file_r = requests.get(file_download_url)
with open(file_path, "wb") as code:
code.write(file_r.content)
print("write file " + file_name + " at:" + file_path)
#在搞定有文件目錄的
match3 = re.findall(r'"\w+\.?\w*/"', match1[0])
if match3:
dir_name = match3[0].replace('"', "")
home_path_tmp = home_path + dir_name
if os.path.exists(home_path_tmp):
os.popen('rm -rf ' + home_path_tmp)
os.mkdir(home_path_tmp)
url_tmp = url + dir_name
download_file(url_tmp, home_path_tmp)
def main(argv):
url = ""
home_path = ""
try:
options, args = getopt.getopt(argv, "hu:o:", ["help", "url=", "output="])
for option, value in options:
if option in ("-h", "--help"):
print("""
-u --url: 輸入的url
-o --output: 保存的路徑
""")
if option in ("-u", "--url"):
url = "" + value
elif option in ("-o", "--output"):
home_path = "" + value
else:
print "unknow arg: " + value
sys.exit()
except getopt.GetoptError:
sys.exit()
download_file(url, home_path)
if __name__ == '__main__':
main(sys.argv[1:])
使用方法:
python Test.py -u "https://opensource.apple.com/source/libdispatch/libdispatch-187.10/" -o "你要保存代碼的地方"