由于需要看通知內容排拷,但是每次從手機上看需要輸入賬號密碼之后才能看,于是便萌生了用 Python 獲取通知內容并定時發(fā)送到自己郵箱的想法锅尘。
實現(xiàn)并不算復雜监氢,用 BeautifulSoup 抓取內容,Redis 記錄文章是否閱讀過藤违,Jinja2 是郵件內容的模板引擎浪腐。用和風天氣
的API 在郵件正文前加了個天氣預報。
只是有一個點要注意顿乒,啟動程序前要先留意 locale (Linux 命令)輸出的內容是否為zh_CN.UTF-8议街。最后我是寫了個 shell 腳本啟動并在運行前 export LC_ALL=zh_CN.UTF-8
Redis
Welcome to Jinja2 — Jinja2 Documentation (2.9)
yagmail 0.10.190 : Python Package Index
API說明文檔 | 和風天氣
Beautiful Soup 4.4.0 文檔 — beautifulsoup 4.4.0 文檔
程序在啟動的時候加-t
的參數(shù)只會給自己的郵箱發(fā)郵件,用作測試(當然需要提前配置好)
實現(xiàn)如下:
主文件
#!/usr/bin/python3
# -*- coding:utf-8 -*-
'''
【留意h甸特漩!】
啟動程序前要先留意 locale (Linux 命令)輸出的內容是否為zh_CN.UTF-8
建議寫 shell 腳本啟動并在運行前 export LC_ALL=zh_CN.UTF-8
'''
from conf import *
from sys import argv
from urllib.parse import unquote
from bs4 import BeautifulSoup
from jinja2 import Environment,FileSystemLoader,select_autoescape
import re,os,json,time,redis,yagmail,requests
session = requests.Session()
session.headers.update({'UserAgent':'Mozilla/5.0 (X11; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0'})
jinja2_env = Environment(
loader = FileSystemLoader(os.getcwd()+'/template'),
autoescape = select_autoescape(['html'])
)
def printf(string):
if string:
print(time.strftime("%Y-%m-%d %H:%M:%S : ", time.localtime()) + string)
def article_id_exist(id):
if not id:
printf('empty article id')
return False
if not id.isdigit():
printf('need number instead of other value type')
return False
r = redis.StrictRedis(host='localhost', port=6379, db=0)
if not r.get(id):
r.set(id,"True")
return False
else:
return True
def get_weather_data():
weather_request_url = WEATHER_API_URL + 'city=' + WEATHER_API_CITY + '&&key=' + WEATHER_API_KEY
weather_request = requests.get(weather_request_url)
weather_data = []
if weather_request.status_code == 200:
weather_data = json.loads(weather_request.content.decode(encoding='utf-8'))
return weather_data
def get_index():
session.get(url=ROOT_URL+'/UserLogin.aspx')
result = session.post(ROOT_URL+'/UserLogin.aspx', data=LOGIN_DATA)
if result.status_code == 200 and result.url == ROOT_URL+'/':
category = session.get(ROOT_URL + '/ArticleList.aspx?category=4')
if category.status_code == 200:
return category.content
else:
printf('login failed '+str(result.status_code) )
def parse_html(html):
if not html:
printf('empty html')
return
html_soup = BeautifulSoup(html, 'lxml')
articles = html_soup.find('div', attrs={'class': 'articles'})
if not articles:
printf('article not found')
return
article_result = []
for val in articles.find_all('p'):
article_id = val.find('a')['href'][-6:]
article_url = ROOT_URL + val.find('a')['href'][1:]
article_date = val.find_all('span')[1].getText()[:-1]
article_title = val.find('a')['title']
article_author = val.span['title']
article_attachment = []
article_excerpt = ''
if article_id_exist(article_id):
printf('article exist in database %s' % (article_title))
continue
article_detail = session.get(url=article_url)
if article_detail.status_code != 200:
printf('get article detail error %s' % (article_id) )
continue
article_soup = BeautifulSoup(article_detail.content,'lxml')
article_content = article_soup.find('div', attrs={'id': 'articleBody'})
article_link = article_content.find_all('a')
attachment_url_pattern = re.compile(r'http://news.gdut.edu.cn/DepartmentUploadFiles/(.+)/files/(.+)')
for link in article_link:
if 'http://news.gdut.edu.cn/DepartmentUploadFiles' not in link['href']:
printf('%s do not have file attachment' % (link['href']))
continue
match = attachment_url_pattern.match(link['href'])
if not match:
printf('%s do not have file attachment' % (link['href']))
continue
attachment_name = match.group(2)
attachment_url = link['href']
if '%' in attachment_name:
attachment_name = unquote(attachment_name)
article_attachment.append({'attach_name':attachment_name,'attach_url':attachment_url})
info = ''.join(article_content.getText().split())
info = info.replace(article_title, '')
info = info.replace('單位:'+article_author,'')
article_excerpt = article_excerpt.join(info[:150])
article_result.append(
{
'url':article_url,
'date':article_date,
'title':article_title,
'author':article_author,
'excerpt':article_excerpt,
'attachment':article_attachment
}
)
return article_result
if __name__ == '__main__':
welcome_string = [
'周日:今天是周末的最后一天,好好珍惜時間\n',
'周一:你從周末的作息里調整過來了嗎吧雹?把上周的通知郵件都刪了吧\n',
'周二:吾日三省吾身\n',
'周三:生活仍將繼續(xù)\n',
'周四:未來近在咫尺\n',
'周五:明天就是周末了,加油涂身!\n',
'周六:你今天打算做什么雄卷?別浪費時間\n',
]
welcome_content = welcome_string[ int( time.strftime('%w',time.localtime(time.time())) ) ]
update_content = '最近更新:'+VERSION+':'+ANNOUNCEMENT+'\n'
weather_data = get_weather_data()
printf('get weather data finish')
weather_render = jinja2_env.get_template('weather.html')
now = weather_data['HeWeather5'][0]['now']
forecast = weather_data['HeWeather5'][0]['hourly_forecast'];
weather_content = weather_render.render(now=now,forecast=forecast)
index = get_index()
article_data = parse_html(index)
article_render = jinja2_env.get_template('article.html')
if article_data:
article_content = article_render.render(articles=article_data)
else:
article_content = article_render.render()
mail_client = yagmail.SMTP(user=SEND_MAIL_USER, password=SEND_MAIL_PWD, host=SEND_MAIL_HOST, port=SEND_MAIL_PORT)
mail_content = welcome_content + weather_content + update_content + article_content
if len(argv) == 2 and '-t' in argv:
for addr in SEND_TO_LIST_TEST:
printf('sending[test user]: ' + addr)
mail_client.send(addr, subject=SEND_MAIL_SUBJECT, contents=mail_content)
time.sleep(1)
else:
for addr in SEND_TO_LIST:
printf('sending : '+addr)
mail_client.send(addr,subject=SEND_MAIL_SUBJECT,contents =mail_content)
time.sleep(1)
同級目錄下的 conf.py 的配置文件
#!/usr/bin/python3
# -*- coding:utf-8 -*-
import time
#通知網站的地址
ROOT_URL = 'http://test.com'
LOGIN_DATA = {}
LOGIN_DATA['__VIEWSTATE'] = '/wEPDwUKLTQwOTA4NzE2NmQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFI2N0bDAwJENvbnRlbnRQbGFjZUhvbGRlcjEkQ2hlY2tCb3gxBufpEJuDDaf6eTj0A4Cn2Erf8u98KcGrQqATTB3mEaQ='
LOGIN_DATA['__EVENTVALIDATION'] = '/wEWBQKb37HjDwLgvLy9BQKi4MPwCQL+zqO2BAKA4sljg4IvzC7ksG01o7aN0RZUOKEC4lV0bTeXI4zrbaQsj0c='
# 聯(lián)系校內人員獲取賬號密碼,此處的賬號密碼無效
LOGIN_DATA['ctl00$ContentPlaceHolder1$userEmail'] = 'test'
LOGIN_DATA['ctl00$ContentPlaceHolder1$userPassWord'] = 'test'
LOGIN_DATA['ctl00$ContentPlaceHolder1$CheckBox1'] = 'on'
LOGIN_DATA['ctl00$ContentPlaceHolder1$Button1'] = '%E7%99%BB%E5%BD%95'
#發(fā)送者郵箱
SEND_MAIL_USER = 'account'
#發(fā)送者郵箱對應的密碼
SEND_MAIL_PWD = 'password'
#騰訊企業(yè)郵箱
SEND_MAIL_HOST = 'smtp.exmail.qq.com'
#發(fā)送端口
SEND_MAIL_PORT = 465
#郵件正文標題
SEND_MAIL_SUBJECT = time.strftime("%Y-%m-%d",time.localtime()) + '@今日校內通知'
#接收郵件的人
SEND_TO_LIST = [
'mail@mail.com',
]
#用來測試接收郵件的用戶蛤售,加上-t選項即可
SEND_TO_LIST_TEST = ['mail@mail.com']
#和風天氣API地址
WEATHER_API_URL = 'https://free-api.heweather.com/v5/weather?'
#天氣API城市丁鹉,拼音漢字均可
WEATHER_API_CITY = 'guangzhou'
#免費版key,一天4000次調用悍抑,注冊后可用
WEATHER_API_KEY = 'key'
ANNOUNCEMENT = '重構鳄炉,使用模板引擎取代字符串拼接生成郵件內容(https://github.com/ypingcn/)'
VERSION = '2017.09.26'
template文件夾的內容是郵件正文的模板
- article.html
{%- if articles %}
<p> 今日的新聞通知如下 </p>
<ul>
{%- for article in articles %}
<li>
<a href='{{article.url}}'>
<font color="red"> {{ article.title }} </font>
</a>
{{ article.author }} - {{ article.date }}
{{ article.excerpt }}
{%- for link in article.attachment %}
<a href='{{link.attach_url}}'>{{ link.attach_name }}</a>
{%- endfor %}
</li>
{%- endfor %}
</ul>
{%- else %}
<p> 暫無未讀的新聞通知 </p>
{%- endif %}
- weather.html
<p>天氣:{{ now.cond.txt }}杜耙,氣溫:{{ now.tmp }}℃搜骡,體感溫度:{{ now.fl }}攝氏度</p>
<br>未來幾個小時內的天氣預報為:
{%- for hour in forecast %}
<br>{{ hour.date }} : {{ hour.cond.txt }}
{%- endfor %}
寫的不是太好,還是有很多需要改正的地方佑女。以后再作修改记靡。
來自個人 Python 文集