import requests
import time
def getHtmlText(url):
try:
r = requests.get(url, timeout = 30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text[:1000];
except:
return "產(chǎn)生異常"
if __name__ == "__main__":
url = "https://item.jd.com/100003717483.html"
s_time = time.time();
print(getHtmlText(url))
import requests
import os
import traceback
def getPicture(url, root, path):
try:
if not os.path.exists(root):
os.mkdir(root)
if not os.path.exists(path):
try:
r = requests.get(url, timeout = 30)
r.raise_for_status()
r.encoding = r.apparent_encoding
except:
print("產(chǎn)生異常")
return
with open(path, 'wb') as f:
f.write(r.content)
f.close()
print("保存成功")
else:
print("文件已存在")
except Exception as e:
print(str(e))
print("文件存取錯誤")
if __name__ == "__main__":
root = "C://users//minghua//documents//get//"
url = "http://img13.360buyimg.com//n0/jfs/t1/60838/7/2192/143412/5d074d65E15353d21/12dd3bb5a9658f3c.jpg"
path = root + url.split('/')[-1]
getPicture(url, root, path)
import requests
from bs4 import BeautifulSoup
import bs4
def getHtmlText(url):
try:
r = requests.get(url, timeout = 30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except Exception as e:
print(e)
return ""
def makeUniList(text, ulist):
soup = BeautifulSoup(text,'html.parser')
for tr in soup.find('tbody').children:
if(isinstance(tr, bs4.element.Tag)):
td = tr.find_all('td')
ulist.append([td[0].string, td[1].string, td[2].string])
def printUniList(ulist, num):
tplt = "{0:^10}\t{1:{3}^20}\t{2:^10}"
print(tplt.format("排名", "名稱", "位置", chr(12288)))
for i in range(num):
print(tplt.format(ulist[i][0], ulist[i][1], ulist[i][2], chr(12288)))
def main():
url = "http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html"
ulist = []
text = getHtmlText(url)
makeUniList(text, ulist)
printUniList(ulist, 30)
main()
最后編輯于 :
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者