之前在工作中需要用到自動(dòng)查收郵件和下載附件感混,于是用python做了一些自動(dòng)化的嘗試弧满。以下代碼主要實(shí)現(xiàn)了兩個(gè)功能此熬,一個(gè)是下載并解碼郵件正文(print_info函數(shù)),另一個(gè)是下載郵件附件(get_att函數(shù))募谎。所有代碼都已在python3.6下運(yùn)行通過阴汇,不過需要修改自己郵箱的賬號(hào)密碼、郵箱服務(wù)器拐纱、附件保存地址哥倔。
from email.parser import Parser
from email.header import decode_header
from email.utils import parseaddr
import email
import poplib
#python郵件讀取
def guess_charset(msg):
charset = msg.get_charset() #獲取msg編碼方式
if charset is None:
content_type = msg.get('Content-Type', '').lower() #獲取內(nèi)容類型字符串
pos = content_type.find('charset=') #內(nèi)容類型中查找“charset=”字符串的位置
if pos >= 0:
charset = content_type[pos + 8:].strip() #若存在上述字符串,則返回內(nèi)容類型
return charset
def decode_str(s):
value, charset = decode_header(s)[0] #Returns a list of (string, charset) pairs
if charset:
value = value.decode(charset)
return value
def get_att(msg, fpath):
attachment_files = []
for part in msg.walk():
file_name = part.get_filename()#獲取附件名稱類型
contType = part.get_content_type()
if file_name:
h = email.header.Header(file_name)
dh = email.header.decode_header(h) #對(duì)附件名稱進(jìn)行解碼咕宿,
#Returns a list of (string, charset) pairs containing each of the decoded parts of the header
filename = dh[0][0]
if dh[0][1]:
filename = decode_str(str(filename,dh[0][1]))#將附件名稱可讀化
print(filename)
#filename = filename.encode("utf-8")
data = part.get_payload(decode=True)#下載附件
attachment_files.append(filename)
with open(fpath + filename, 'wb') as att_file: #在指定目錄下創(chuàng)建文件府阀,注意二進(jìn)制文件需要用wb模式打開
att_file.write(data) #保存文件
return attachment_files
def print_info(msg, indent=0):
if indent == 0:
for header in ['From', 'To', 'Subject']:
value = msg.get(header, '')
if value:
if header=='Subject':
value = decode_str(value)
else:
hdr, addr = parseaddr(value) #根據(jù)地址返回一個(gè)realname和email address組成的元組
name = decode_str(hdr)
value = u'%s <%s>' % (name, addr) #u指后面字符串以Unicode格式進(jìn)行編碼试浙。該代碼指按照前邊格式組織后邊兩個(gè)數(shù)據(jù)
print('%s%s: %s' % (' ' * indent, header, value))
if (msg.is_multipart()):
parts = msg.get_payload()
for n, part in enumerate(parts):
print('%spart %s' % (' ' * indent, n))
print('%s--------------------' % (' ' * indent))
print_info(part, indent + 1)
else:
content_type = msg.get_content_type()
if content_type=='text/plain' or content_type=='text/html':
content = msg.get_payload(decode=True)
charset = guess_charset(msg)
if charset:
content = content.decode(charset)
print('%sText: %s' % (' ' * indent, content + '...'))
else:
print('%sAttachment: %s' % (' ' * indent, content_type))
return content
email = '12345678@qq.com'
password = 'abcdefghigk' #該步驟輸入密碼
pop3_server = 'pop.qq.com' #修改對(duì)應(yīng)的郵箱服務(wù)器
server = poplib.POP3_SSL(pop3_server)
server.set_debuglevel(1)
print(server.getwelcome().decode('utf-8'))
server.user(email)
server.pass_(password)
print('Messages: %s. Size: %s' % server.stat())
resp, mails, octets = server.list()
index = len(mails)
for i in range(index,index-30,-1):
#倒序遍歷郵件
resp, lines, octets = server.retr(i) #retr返回一個(gè)三元素列表田巴,第一個(gè)元素是response挟秤,即返回狀態(tài)艘刚;第二個(gè)元素是一個(gè)列表,包括每一行的數(shù)據(jù);第三個(gè)數(shù)據(jù)是octets
msg_content = b'\r\n'.join(lines).decode('utf-8') #返回line1\r\nline2\r\n 連接lines列表中的所有行岗喉。
#解析郵件:
msg = Parser().parsestr(msg_content)
#獲取郵件時(shí)間
date1 = time.strptime(msg.get("Date")[0:24],'%a, %d %b %Y %H:%M:%S') #格式化收件時(shí)間
date2 = time.strftime("%Y%m%d", date1)#郵件時(shí)間格式轉(zhuǎn)換
print(msg.get("from")) #發(fā)件人
print_info(msg) #獲取郵件內(nèi)容
server.quit()