嗨嘍,大家好呀~這里是愛(ài)看美女的茜茜吶
環(huán)境介紹:
python 3.8 解釋器
pycharm 編輯器
第三方模塊:
requests >>> pip install requests
protobuf >>> pip install protobuf
如何安裝python第三方模塊:
win + R 輸入 cmd 點(diǎn)擊確定, 輸入安裝命令 pip install 模塊名 (pip install requests) 回車(chē)
在pycharm中點(diǎn)擊Terminal(終端) 輸入安裝命令
代碼展示
import requests
import dm_pb2
from google.protobuf import text_format
import re
from datetime import datetime
import csv
with open("danmu.csv", mode='w', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(["彈幕所在位置", "彈幕內(nèi)容", "彈幕發(fā)布時(shí)間"])
headers = {
'cookie': "buvid3=355AA300-6A61-04E5-A05C-E891D886F69632716infoc; b_nut=1675085932; i-wanna-go-back=-1; _uuid=387EA3810-FBF5-E92C-827E-2510B578C5B9A33232infoc; buvid4=15C69C98-F6A7-EC6A-872F-E69C1840DD6D33724-023013021-1pW1w45e5fZS9RtebDiGZw%3D%3D; nostalgia_conf=-1; rpdid=|(kmJY|k))lY0J'uY~l|)lmY|; b_ut=5; is-2022-channel=1; buvid_fp_plain=undefined; CURRENT_BLACKGAP=0; LIVE_BUVID=AUTO3216755179681630; header_theme_version=CLOSE; CURRENT_PID=17897430-d93d-11ed-a1f4-675e4c96ff79; FEED_LIVE_VERSION=V8; CURRENT_QUALITY=80; fingerprint=58d6d808ef27a6225c943be7ca980284; buvid_fp=58d6d808ef27a6225c943be7ca980284; enable_web_push=DISABLE; CURRENT_FNVAL=4048; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MDIzODAyNjYsImlhdCI6MTcwMjEyMTAwNiwicGx0IjotMX0.hHZgEl37y35RHgNUEbXnT3y_rtg_w3d1O46vW5TreIQ; bili_ticket_expires=1702380206; SESSDATA=0f019744%2C1717673066%2Ca41c0%2Ac2CjArLmPZFHNFg3B5H60pjRwiqJSLXDG8l2Pb_74Q11o8NmBWyKegdnFb6ivxUL255pwSVjRoaXFXVmFoRlFXY3VCRTAybEpud2ltaXFkRzZXQ25uZ3h0VGxrdGg3bWcxQ2hJN3d4VEZQRjRRTnd5cUx2TmJfUUdlWVZocVRfb281QnJHSklrTkJ3IIEC; bili_jct=f2a37b8a7351e9987d90f80d72dab593; DedeUserID=422789639; DedeUserID__ckMd5=fc4901c78719b545; b_lsid=125EDCFE_18C4E7B181A; home_feed_column=5; browser_resolution=1920-963; sid=6qcgbo4l; PVID=2",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
url = 'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=323723441&pid=715024588&segment_index=1&pull_mode=1&ps=0&pe=120000&web_location=1315873&w_rid=8138667fe7c9a9d9aa23f488f69e5c2d&wts=1702124018'
# 1.發(fā)送請(qǐng)求
response = requests.get(url=url, headers=headers)
my_seg = dm_pb2.DmSegMobileReply()
data = response.content
my_seg.ParseFromString(data)
for i in my_seg.elems:
parse_data = text_format.MessageToString(i, as_utf8=True)
try:
progress = re.findall('progress: (.*)', parse_data)[0]
except:
progress = 1000
minutes, seconds = divmod(int(progress) // 1000, 60)
current_time = f'{minutes:02d}:{seconds:02d}'
content = re.findall('content: (.*)', parse_data)[0]
ctime = re.findall('ctime: (.*)', parse_data)[0]
date_time = datetime.fromtimestamp(int(ctime)).strftime('%Y-%m-%d %H:%M:%S')
print(current_time, content, date_time)
with open("danmu.csv", mode='a', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow([current_time, content, date_time])
from datetime import datetime
import re
import requests
import dm_pb2
from google.protobuf import text_format
import csv
with open('danmu.csv', mode='w', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(['彈幕時(shí)間', '彈幕出現(xiàn)位置', '彈幕內(nèi)容'])
def time_str_to_milliseconds(time_str):
"""將時(shí)間字符串轉(zhuǎn)換為毫秒溪王。"""
h, m, s = map(int, time_str.split(':'))
return ((h * 60 + m) * 60 + s) * 1000
# start_time = "00:23:58"
# end_time = "00:26:03"
# # 轉(zhuǎn)換為毫秒
# start_ms = time_str_to_milliseconds(start_time)
# end_ms = time_str_to_milliseconds(end_time)
def get_data(url):
headers = {
'Cookie': "buvid3=5CB78B54-F1B3-FCE6-F1AD-C0831287EFD881020infoc; b_nut=1699856581; i-wanna-go-back=-1; b_ut=7; _uuid=F625CC83-C9D9-101035-7C36-D3BDFD6BE10CF80953infoc; enable_web_push=DISABLE; home_feed_column=5; DedeUserID=422789639; DedeUserID__ckMd5=fc4901c78719b545; header_theme_version=CLOSE; CURRENT_FNVAL=4048; buvid4=A6C069B5-4DB6-437A-1160-A2D1E031AFF772289-023083014-j%2BEVJ7V9TtLMVIMXjUkPKw%3D%3D; fingerprint=b3a2765a971ea2692a81ff8b1844fae5; buvid_fp_plain=undefined; buvid_fp=b3a2765a971ea2692a81ff8b1844fae5; rpdid=|(kmJYmkk~k)0J'uYmm)lY~k~; PVID=1; SESSDATA=1a664f71%2C1717565740%2C48bce%2Ac1CjCHJjBfBSiCSW6Dfm5CAL39PzQZEKS9eUW3s5GUBHFuBSQ-KUhgo1bPfAdpSv22A1oSVnhWOUkwbnprSnY4MEVnd1dkNXBFYTVQWk1fYkJkeUZjZmFsRjJSSDB0MndxRmFZRUJTQjRjd0xwMkY2ZWtZal9sTWV6azZZclRTQ0dVNmFzZW14N1FnIIEC; bili_jct=365ff75a8dd1510cb2cdd93895923f7e; sid=4ggq2j9r; bp_video_offset_422789639=872607904249675833; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MDIyNzI5NzcsImlhdCI6MTcwMjAxMzcxNywicGx0IjotMX0.Mn0QVb_HBWG4wdx-IaVgx9UB4CkJW8P5QVS4LDqQGvA; bili_ticket_expires=1702272917; browser_resolution=1562-1010; innersign=0; b_lsid=A5D8EDDF_18C4D46CC84",
'Referer': "https://www.bilibili.com/bangumi/play/ep327584",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
resp = requests.get(url=url, headers=headers)
return resp
def parse_data(resp):
my_seg = dm_pb2.DmSegMobileReply()
# DATA是二進(jìn)制數(shù)據(jù)
# 比如你可以這么寫(xiě)
DATA = resp.content
# 或者這么寫(xiě)
my_seg.ParseFromString(DATA)
# 理論上此時(shí)文件已經(jīng)被逆序列化了幕侠,你可以通過(guò) print(my_seg.elems)來(lái)得到逆序列化后的數(shù)據(jù)
for j in my_seg.elems:
parse_data = text_format.MessageToString(j, as_utf8=True)
pattern = r"id: (\d+)\nprogress: (\d+)\nmode: (\d+)\nfontsize: (\d+)\ncolor: (\d+)\nmidHash: \"([^\"]+)\"\ncontent: \"([^\"]+)\"\nctime: (\d+)\nweight: (\d+)\nidStr: \"([^\"]+)\""
matches = re.finditer(pattern, parse_data)
filtered_danmakus = []
for match in matches:
progress = int(match.group(2))
# # 檢查是否在指定時(shí)間范圍內(nèi)
# if start_ms <= progress <= end_ms:
# 轉(zhuǎn)換 progress 為視頻位置(格式:mm:ss)
minutes, seconds = divmod(progress // 1000, 60)
video_position = f"{minutes:02d}:{seconds:02d}"
# 轉(zhuǎn)換 ctime 為日期時(shí)間格式
ctime = int(match.group(8))
date_time = datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S')
# 彈幕內(nèi)容
content = match.group(7)
# 添加到篩選結(jié)果列表
filtered_danmakus.append({
"video_position": video_position,
"date_time": date_time,
"content": content
})
# 輸出篩選后的彈幕
for danmaku in filtered_danmakus:
print("彈幕出現(xiàn)位置:", '00:'+danmaku["video_position"])
print("彈幕時(shí)間:", danmaku["date_time"])
print("彈幕內(nèi)容:", danmaku["content"])
print("----------")
with open('danmu.csv', mode='a', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow([danmaku["date_time"], '00:'+danmaku["video_position"], danmaku["content"]])
url_list = [
'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=197711172&pid=328492664&segment_index=1&pull_mode=1&ps=0&pe=120000&web_location=1315873&w_rid=3078e56400ad93df33859b09b8464f6b&wts=1702103538',
'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=197711172&pid=328492664&segment_index=1&pull_mode=1&ps=120000&pe=360000&web_location=1315873&w_rid=db9e8a1b66eacfb77d7e92762ac3fc4b&wts=1702103541',
'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=197711172&pid=328492664&segment_index=2&web_location=1315873&w_rid=9fe6b7defe3bcd611f6ec7bbd8a57553&wts=1702103541',
'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=197711172&pid=328492664&segment_index=3&web_location=1315873&w_rid=59a05c03d41c295ad57e0cd23db695eb&wts=1702103541',
'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=197711172&pid=328492664&segment_index=4&web_location=1315873&w_rid=48a794c85798922aac2ce4a5ad779544&wts=1702103541',
'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=197711172&pid=328492664&segment_index=5&web_location=1315873&w_rid=62fa8d41489f2b58f2a8577e3e654ef0&wts=1702103541',
'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=197711172&pid=328492664&segment_index=6&web_location=1315873&w_rid=0d9313ee507d135bce658616e694fb39&wts=1702103541',
'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=197711172&pid=328492664&segment_index=7&web_location=1315873&w_rid=151cf518a34b72ceeb35fec82b30cd43&wts=1702103541',
'https://api.bilibili.com/x/v2/dm/wbi/web/seg.so?type=1&oid=197711172&pid=328492664&segment_index=8&web_location=1315873&w_rid=394bda938a8a775152f1ee7641d0d4bb&wts=1702103541'
]
for url in url_list:
resp = get_data(url)
parse_data(resp)
詞云圖
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# 讀取數(shù)據(jù)
df = pd.read_csv('danmu.csv')
text = " ".join(review for review in df['彈幕內(nèi)容'])
# 生成詞云圖
wordcloud = WordCloud(width=800, height=800,
font_path=r'C:/Windows/Fonts/simhei.ttf',
background_color='white',
min_font_size=10).generate(text)
# 展示詞云圖
plt.figure(figsize=(8, 8), facecolor=None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad=0)
plt.show()
尾語(yǔ)
感謝你觀看我的文章吶~本次航班到這里就結(jié)束啦 ??
希望本篇文章有對(duì)你帶來(lái)幫助 ??梳毙,有學(xué)習(xí)到一點(diǎn)知識(shí)~
躲起來(lái)的星星??也在努力發(fā)光徙赢,你也要努力加油(讓我們一起努力叭)赂弓。