進階,獲取豆瓣top250的電影.評分,簡評
運行結果
/Library/Frameworks/Python.framework/Versions/3.5/bin/python3.5 /Users/wjw/PycharmProjects/class9/豆瓣.py
[
<Movie:
score = (9.6)
name = (肖申克的救贖)
quote = (希望讓人自由。)
>,
<Movie:
score = (9.4)
name = (這個殺手不太冷)
quote = (怪蜀黍和小蘿莉不得不說的故事。)
>,
<Movie:
score = (9.4)
name = (阿甘正傳)
quote = (一部美國近現代史赔硫。)
>,
<Movie:
score = (9.4)
name = (霸王別姬)
quote = (風華絕代。)
>,
<Movie:
score = (9.5)
name = (美麗人生)
quote = (最美的謊言愧驱。)
>,
<Movie:
score = (9.2)
name = (千與千尋)
quote = (最好的宮崎駿,最好的久石讓。 )
>,
<Movie:
score = (9.4)
name = (辛德勒的名單)
quote = (拯救一個人臼寄,就是拯救整個世界岖是。)
>,
<Movie:
score = (9.2)
name = (海上鋼琴師)
quote = (每個人都要走一條自己堅定了的路帮毁,就算是粉身碎骨实苞。 )
>,
<Movie:
score = (9.3)
name = (機器人總動員)
quote = (小瓦力,大人生烈疚。)
>,
<Movie:
score = (9.2)
name = (盜夢空間)
quote = (諾蘭給了我們一場無法盜取的夢黔牵。)
>,
<Movie:
score = (9.1)
name = (泰坦尼克號)
quote = (失去的才是永恒的。 )
>,
<Movie:
score = (9.1)
name = (三傻大鬧寶萊塢)
quote = (英俊版憨豆爷肝,高情商版謝耳朵猾浦。)
>,
<Movie:
score = (9.2)
name = (放牛班的春天)
quote = (天籟一般的童聲,是最接近上帝的存在灯抛。 )
>,
<Movie:
score = (9.2)
name = (忠犬八公的故事)
quote = (永遠都不能忘記你所愛的人跃巡。)
>,
<Movie:
score = (9.1)
name = (大話西游之大圣娶親)
quote = (一生所愛。)
>,
<Movie:
score = (9.1)
name = (龍貓)
quote = (人人心中都有個龍貓牧愁,童年就永遠不會消失素邪。)
>,
<Movie:
score = (9.2)
name = (教父)
quote = (千萬不要記恨你的對手,這樣會讓你失去理智猪半。)
>,
<Movie:
score = (9.2)
name = (亂世佳人)
quote = (Tomorrow is another day.)
>,
<Movie:
score = (9.1)
name = (天堂電影院)
quote = (那些吻戲兔朦,那些青春,都在影院的黑暗里被淚水沖刷得無比清晰磨确。)
>,
<Movie:
score = (8.9)
name = (當幸福來敲門)
quote = (平民勵志片沽甥。 )
>,
<Movie:
score = (9.0)
name = (搏擊俱樂部)
quote = (邪惡與平庸蟄伏于同一個母體,在特定的時間互相對峙乏奥。)
>,
<Movie:
score = (9.0)
name = (楚門的世界)
quote = (如果再也不能見到你摆舟,祝你早安,午安邓了,晚安恨诱。)
>,
<Movie:
score = (9.1)
name = (觸不可及)
quote = (滿滿溫情的高雅喜劇。)
>,
<Movie:
score = (9.1)
name = (指環(huán)王3:王者無敵)
quote = (史詩的終章骗炉。)
>,
<Movie:
score = (8.9)
name = (羅馬假日)
quote = (愛情哪怕只有一天照宝。)
>]
Process finished with exit code 0
源代碼
import requests
from lxml import html
class Model(object):
def __repr__(self):
class_name = self.__class__.__name__
properties = ('{0} = ({1})'.format(k, v) for k, v in self.__dict__.items())
return '\n<{0}:\n {1}\n>'.format(class_name, '\n '.join(properties))
class Movie(Model):
def __init__(self):
super(Movie, self).__init__()
self.name = ''
self.score = 0
self.quote = ''
self.cover_url = ''
def movie_from_div(div):
movie = Movie()
movie.name = div.xpath('.//span[@class="title"]')[0].text
movie.score = div.xpath('.//span[@class="rating_num"]')[0].text
movie.quote = div.xpath('.//span[@class="inq"]')[0].text
img_url = div.xpath('.//div[@class="pic"]/a/img/@src')[0]
print(img_url)
movie.cover_url = img_url
return movie
def movies_from_url(url):
page = requests.get(url)
root = html.fromstring(page.content)
# <div class="item">
movie_divs = root.xpath('//div[@class="item"]')
# movies = [movie_from_div(div) for div in movie_divs]
# 上面一行相當于下面四行
movies = []
for div in movie_divs:
movie = movie_from_div(div)
movies.append(movie)
return movies
def download_img(url, name):
r = requests.get(url)
with open(name, 'wb') as f:
f.write(r.content)
def save_covers(movies):
for m in movies:
download_img(m.cover_url, m.name + '.jpg')
def main():
url = 'https://movie.douban.com/top250'
movies = movies_from_url(url)
print(movies)
save_covers(movies)
if __name__ == '__main__':
main()