原創(chuàng)文章欠雌,歡迎轉(zhuǎn)載蹄梢。轉(zhuǎn)載請注明:轉(zhuǎn)載自IT人故事會,謝謝富俄!
原文鏈接地址:「docker實戰(zhàn)篇」python的docker- 抖音視頻抓冉础(中)(25)
本次主要針對python對上次抖音分享的頁面中的_signature進(jìn)行解析并完成抖音視頻的下載。源碼:https://github.com/limingios/dockerpython.git (源碼/「「docker實戰(zhàn)篇」python的docker- 抖音視頻抓然舯取(下)(24))
https://github.com/limingios/dockerpython.git (谷歌插件)
找到方法幕袱,完成本地的html的生成
其實就是復(fù)制出來分享頁面的函數(shù),然后通過函數(shù)悠瞬,調(diào)用的方式完成_signature的生成们豌。
html_foot.txt
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
</body>
</html>
<script type="text/javascript">
html_foot.txt
!function(t) {
if (t.__M = t.__M || {},
!t.__M.require) {
var e, n, r = document.getElementsByTagName("head")[0], i = {}, o = {}, a = {}, u = {}, c = {}, s = {}, l = function(t, n) {
if (!(t in u)) {
u[t] = !0;
var i = document.createElement("script");
if (n) {
var o = setTimeout(n, e.timeout);
i.onerror = function() {
clearTimeout(o),
n()
}
;
var a = function() {
clearTimeout(o)
};
"onload"in i ? i.onload = a : i.onreadystatechange = function() {
("loaded" === this.readyState || "complete" === this.readyState) && a()
}
}
return i.type = "text/javascript",
i.src = t,
r.appendChild(i),
i
}
}, f = function(t, e, n) {
var r = i[t] || (i[t] = []);
r.push(e);
var o, a = c[t] || c[t + ".js"] || {}, u = a.pkg;
o = u ? s[u].url || s[u].uri : a.url || a.uri || t,
l(o, n && function() {
n(t)
}
)
};
n = function(t, e) {
"function" != typeof e && (e = arguments[2]),
t = t.replace(/\.js$/i, ""),
o[t] = e;
var n = i[t];
if (n) {
for (var r = 0, a = n.length; a > r; r++)
n[r]();
delete i[t]
}
}
,
e = function(t) {
if (t && t.splice)
return e.async.apply(this, arguments);
t = e.alias(t);
var n = a[t];
if (n)
return n.exports;
var r = o[t];
if (!r)
throw "[ModJS] Cannot find module `" + t + "`";
n = a[t] = {
exports: {}
};
var i = "function" == typeof r ? r.apply(n, [e, n.exports, n]) : r;
return i && (n.exports = i),
n.exports && !n.exports["default"] && Object.defineProperty && Object.isExtensible(n.exports) && Object.defineProperty(n.exports, "default", {
value: n.exports
}),
n.exports
}
,
e.async = function(n, r, i) {
function a(t) {
for (var n, r = 0, h = t.length; h > r; r++) {
var p = e.alias(t[r]);
p in o ? (n = c[p] || c[p + ".js"],
n && "deps"in n && a(n.deps)) : p in s || (s[p] = !0,
l++,
f(p, u, i),
n = c[p] || c[p + ".js"],
n && "deps"in n && a(n.deps))
}
}
function u() {
if (0 === l--) {
for (var i = [], o = 0, a = n.length; a > o; o++)
i[o] = e(n[o]);
r && r.apply(t, i)
}
}
"string" == typeof n && (n = [n]);
var s = {}
, l = 0;
a(n),
u()
}
,
e.resourceMap = function(t) {
var e, n;
n = t.res;
for (e in n)
n.hasOwnProperty(e) && (c[e] = n[e]);
n = t.pkg;
for (e in n)
n.hasOwnProperty(e) && (s[e] = n[e])
}
,
e.loadJs = function(t) {
l(t)
}
,
e.loadCss = function(t) {
if (t.content) {
var e = document.createElement("style");
e.type = "text/css",
e.styleSheet ? e.styleSheet.cssText = t.content : e.innerHTML = t.content,
r.appendChild(e)
} else if (t.url) {
var n = document.createElement("link");
n.href = t.url,
n.rel = "stylesheet",
n.type = "text/css",
r.appendChild(n)
}
}
,
e.alias = function(t) {
return t.replace(/\.js$/i, "")
}
,
e.timeout = 5e3,
t.__M.define = n,
t.__M.require = e
}
}(this)
__M.define("douyin_falcon:node_modules/byted-acrawler/dist/runtime", function(l, e) {
Function(function(l) {
return '?e(e,a,r){?(b[e]||(b[e]=t("x,y","?x "+e+" y"?)(r,a)}?a(e,a,r){?(k[r]||(k[r]=t("x,y","?new x[y]("+Array(r+1).join(",x[?y]")?(1)+")"?)(e,a)}?r(e,a,r){?n,t,s={},b=s.d=r?r.d+1:0;for(s["$"+b]=s,t=0;t<b;t?)s[n="$"+t]=r[n];for(t=0,b=s?=a?;t<b;t?)s[t]=a[t];?c(e,0,s)}?c(t,b,k){?u(e){v[x?]=e}?f?{?g=?,t?ing(b?g)}?l?{try{y=c(t,b,k)}catch(e){h=e,y=l}}for(?h,y,d,g,v=[],x=0;;)switch(g=?){case 1:u(!?)?4:?f??5:u(?(e){?a=0,r=e?;???{?c=a<r;?c&&u(e[a?]),c}}(???6:y=?,u(?(y??8:if(g=?,l??g,g=?,y===c)b+=g;else if(y!==l)?y?9:?c?10:u(s(???11:y=?,u(?+y)?12:for(y=f?,d=[],g=0;g<y?;g?)d[g]=y.charCodeAt(g)^g+y?;u(String.fromCharCode.apply(null,d??13:y=?,h=delete ?[y]?14:???59:u((g=?)?(y=x,v.slice(x-=g,y?:[])?61:u(?[?])?62:g=?,k[0]=65599*k[0]+k[1].charCodeAt(g)>>>0?65:h=?,y=?,?[y]=h?66:u(e(t[b?],?,???67:y=?,d=?,u((g=?).x===c?r(g.y,y,k):g.apply(d,y??68:u(e((g=t[b?])<"<"?(b--,f?):g+g,?,???70:u(!1)?71:?n?72:?+f??73:u(parseInt(f?,36??75:if(?){b??case 74:g=?<<16>>16?g?76:u(k[?])?77:y=?,u(?[y])?78:g=?,u(a(v,x-=g+1,g??79:g=?,u(k["$"+g])?81:h=?,?[f?]=h?82:u(?[f?])?83:h=?,k[?]=h?84:?!0?85:?void 0?86:u(v[x-1])?88:h=?,y=?,?h,?y?89:u(??{?e?{?r(e.y,arguments,k)}?e.y=f?,e.x=c,e}?)?90:?null?91:?h?93:h=??0:??;default:u((g<<16>>16)-16)}}?n=this,t=n.Function,s=Object.keys||?(e){?a={},r=0;for(?c in e)a[r?]=c;?a?=r,a},b={},k={};?r'.replace(/[?-?]/g, function(e) {
return l[15 & e.charCodeAt(0)]
})
}("v[x++]=?v[--x]?t.charCodeAt(b++)-32?function ?return ?))?++?.substr?var ?.length?()?,b+=?;break;case ?;break}".split("?")))()('gr$Daten Иb/s!l y?y?g,(lfi~ah`{mv,-n|jqewVxp{rvmmx,&eff?kx[!cs"l".Pq%widthl"@q&heightl"vr*getContextx$"2d[!cs#l#,*;?|u.|uc{uq$fontl#vr(fillTextx$$龘???2<[#c}l#2q*shadowBlurl#1q-shadowOffsetXl#$$limeq+shadowColorl#vr#arcx88802[%c}l#vr&strokex[ c}l"v,)}eOmyoZB]mx[ cs!0s$l$Pb<k7l l!r&lengthb%^l$1+s$j?l s#i$1ek1s$gr#tack4)zgr#tac$! +0o![#cj?o ]!l$b%s"o ]!l"l$b*b^0d#>>>s!0s%yA0s"l"l!r&lengthb<k+l"^l"1+s"j?l s&l&z0l!$ +["cs\'(0l#i\'1ps9wxb&s() &{s)/s(gr&Stringr,fromCharCodes)0s*yWl ._b&s o!])l l Jb<k$.aj;l .Tb<k$.gj/l .^b<k&i"-4j!?+& s+yPo!]+s!l!l Hd>&l!l Bd>&+l!l <d>&+l!l 6d>&+l!l &+ s,y=o!o!]/q"13o!l q"10o!],l 2d>& s.{s-yMo!o!]0q"13o!]*Ld<l 4d#>>>b|s!o!l q"10o!],l!& s/yIo!o!].q"13o!],o!]*Jd<l 6d#>>>b|&o!]+l &+ s0l-l!&l-l!i\'1z141z4b/@d<l"b|&+l-l(l!b^&+l-l&zl\'g,)gk}ejo{?cm,)|yn~Lij~em["cl$b%@d<l&zl\'l $ +["cl$b%b|&+l-l%8d<@b|l!b^&+ q$sign ', [Object.defineProperty(e, "__esModule", {
value: !0
})])
});
dycs = __M.require("douyin_falcon:node_modules/byted-acrawler/dist/runtime")
signc = dycs.sign(&&&&)
document.title = signc
document.write(signc)
</script>
handle_douyin_movie.py 下載代碼
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2019/2/20 17:39
# @Author : Aries
# @Site :
# @File : handle_douyin_movie.py.py
# @Software: PyCharm
import json
import os
import requests
import re
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
#分享ID
share_id = "89923219116"
share_url = "https://www.douyin.com/share/user/"+share_id
header = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36"
}
#dytk 和tac的正則表達(dá)式
dytk_search = re.compile(r"dytk: '(.*?)'")
tac_search = re.compile(r"<script>tac=(.*?)</script>")
response = requests.get(url=share_url,headers=header)
#處理獲取dytk 和tac
dytk = re.search(dytk_search,response.text).group(1)
tac = re.search(tac_search,response.text).group(1)
#tac封裝成為js的格式
tac = "var tac="+tac+";"
# html頁面的編寫合成 header + tac+ foot
with open("html_head.txt") as f1:
f1_read = f1.read()
with open("html_foot.txt") as f2:
f2_read = f2.read().replace("&&&&","89923219116")
with open("test.html","w") as f_w:
f_w.write(f1_read+"\n"+tac+"\n"+f2_read)
# signature = input("秘鑰為:")
chrome_options = Options()
chrome_options.add_argument("--headless")
abspath = os.path.abspath(r"D:\Program Files\chromedriver\chromedriver.exe")
douyin_driver = webdriver.Chrome(executable_path=abspath,chrome_options=chrome_options,)
douyin_driver.get("file:///E:\\dockerpython\\python\\douyin\\test.html")
signature = douyin_driver.title
douyin_driver.quit()
movie_url = "https://www.douyin.com/aweme/v1/aweme/post/?user_id="+share_id+"&count=21&max_cursor=0&aid=1128&_signature="+signature+"&dytk="+dytk
#接口不太穩(wěn)定涯捻,所以要使用while循環(huán)一直調(diào)用
while True:
movie_reponse = requests.get(url=movie_url,headers=header)
if json.loads(movie_reponse.text)["aweme_list"] == []:
#time.sleep(1)
continue
else:
print(movie_reponse.text)
for item in json.loads(movie_reponse.text)["aweme_list"]:
video_url = item["video"]["play_addr"]["url_list"][0]
video_response = requests.get(url=video_url,headers=header)
with open("douyin.mp4","wb") as v:
#不能使用video_response.text,必須使用content才可以把內(nèi)容寫進(jìn)去
v.write(video_response.content)
break
最終結(jié)果
里面關(guān)于chromedriver的配置直接引入他的路徑最穩(wěn)了望迎,我比較喜歡這種方式網(wǎng)上很多搞環(huán)境變量的導(dǎo)致電腦很慢不建議障癌。
PS:基本上抖音視頻下載的都已經(jīng)完成了,下次對于需要注意的做下總結(jié)擂煞。