import requests
import json
import re
headers = {
'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0
Mobile/15A372Safari/604.1',
'cookie':'SINAGLOBAL=4840874402537.198.1560220210050; '
'SCF=Al45B-3Yy8PRwpBxWrPVwC4KWFAgGR-K8wmHVOdZAvHlPBrN37LJmYK5XUy-LpOxJIO9sKiJ1DrRFkfWWiJOp9g.; '
'login=609423641c81693ee710ee69b0d0e34c; _s_tentry=login.sina.com.cn; '
'Apache=941276552050.3962.1560254570611; '
'ULV=1560254570632:5:5:5:941276552050.3962.1560254570611:1560254071344; '
'webim_unReadCount=%7B%22time%22%3A1560254573029%2C%22dm_pub_total%22%3A
4%2C%22chat_group_pc%22%3A0%2C%22allcountNum%22%3A9%2C%22msgbox%22%3A0%7D; '
'login_sid_t=5a2760576b886a56beefe6dba6fefe88; cross_origin_proto=SSL; '
'UOR=ent.china.com.cn,widget.weibo.com,login.sina.com.cn; '
'SUB=_2A25x--jNDeRhGedJ6VoX9SjKzzyIHXVTcV0FrDV8PUJbmtAKLWfRkW9NViqjNjVIAYg5Zqc3Q5pOtTJOyjna0kGM; '
'SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhiYQ5eDzjWlO-mzTFlCvA55NHD95QpS0zRSo-cSoB7Ws4DqcjZgJH7IGLEBP8fqBtt;
SUHB=0sqwkAiAGL641B; '
'SRT=D.QqHBJZPtINsgO!Mb4cYGSdM1ibS6dDbOT!kw5ebHNEYdPFidJmSpMERt4EP1RcsrA4kJP-
SNTsVuObS9Vd9KTbSHAPbeiP9oRqiMiQBLWEM1O3bgUcmnTrWi*B.vAflW-P9Rc0lR-
ykSDvnJqiQVbiRVPBtS!r3JZPQVqbgVdWiMZ4siOzu4DbmKPWfTPbMidPm5c9QUm0kUQ94McoDW!msi4uzMD!s4QBOJG9N4-
0kRsi6AZWQVqMmVdigdcW1NDWHMroEAbvtSeECWGA7; '
'SRF=1560254621; ALF=1591790621; SSOLoginState=1560254621'
}
url = 'https://m.weibo.cn/feed/group?gid=201109200354798847'
def get_info(url,page):
res = requests.get(url,headers = headers)
json_data = json.loads(res.text)
statuses = json_data['data']['statuses']
for statuse in statuses:
text = statuse['text']
new_text = re.sub('[a-zA-Z0-9\s<="_>:/.?%]+','',text,re.S)
print(new_text)
next_cursor = json_data['data']['next_cursor']
page = page + 1
if page <=20:
next_url =f'https://m.weibo.cn/feed/group?gid=201109200354798847&max_id={next_cursor}'
get_info(next_url,page)
else:
pass
get_info(url,1)
屏幕截圖.jpg