瀏覽器抓取真實直播源地址(純前端JS解析)
網上搜索各種平臺的直播源地址都是滿天飛,但是經常會有失效的時候,因爲官方也會定期的升級系統修改各種參數或鏈接讓直播源不能永久,所以敝人一直崇尚的是 授人以魚不如授人以漁
,與其給直播源別人,不如教大家如何去爬取直播源,就算失效了也不怕。
0. 前言
本人業餘時間喜歡用虎牙看直播,所以第一個便是想到如何抓取虎牙的直播源。
在抓取之前,需要了解視頻直播源的分類和區別,可以自行了解hls
,flv
,m3u8
等知識。
Tips: 本教程只是教大家如何利用前端調試技巧和爬蟲基本操作,不作爲商業用途,各位童鞋耗子尾汁。
1. 瀏覽器抓取流程
首選打開虎牙官網,隨便找個直播間:https://m.huya.com/949527
,這裏是使用的手機端的網頁(因爲手機端的簡單)
隨便看了下,沒有ajax
請求,那麼地址定是隨頁面帶進來了,現在大部分直播網頁都是SSR
(服務器端渲染),所以只能去頁面源代碼找找:
好傢伙!直接就找到了一個很想地址的東西 liveLineUrl
,是一個m3u8的地址:
在線m3u8播放測試網站:https://www.m3u8play.com/
這個網站可以測試播放源是不是好的,來!試一下!
就很完美!
但是就這麼簡單的嗎?
我又試了一下我經常看的【一起看】的直播間,來看看電影啥的,結果:
這是咋回事。。。然後對比下前後兩個鏈接發現了問題,下面是【一起看】的鏈接:
然後想到 liveLineUrl
這個參數不是全局變量嗎,控制檯打印看一下,再仔細對比發現參數變了
有個fm
參數已經變成了seqid
:
先試下控制檯打印的能不能播放:
行,司馬懿出來了,現在只用分析如何破解參數即可。
2. 參數解析
Ctrl + Shift + F
搜索 liveLineUrl
, 然後找到這裏處理url
的js
,打個斷點調試一下,看看怎麼處理的:
斷點進入 Object(m.default)(window.liveLineUrl)
可以看到這裏就是處理參數的地方,最後返回的就是解析後的參數字符串:
我整理了下解析函數,重新實現了一下:
function parseUrl(url){
let params = url.split("?")[1];
params = params.split("&");
let paramsObj = {
};
for (let i = 0; i < params.length; i++) {
let item = params[i].split("=");
2 === item.length && (paramsObj[item[0]] = item[1])
}
let mainUrl = url.split("?")[0];
let r = mainUrl.split("/");
let streamName = r[r.length - 1].replace(/.(flv|m3u8)/g, "");
let {
fm: fm, wsTime: wsTime, wsSecret: u, ...others} = paramsObj;
let fmParse = Base64.decode(decodeURIComponent(fm));
let p = fmParse.split("_")[0];
let time = parseInt(1e4 * (new Date).getTime() + 1e4 * Math.random());
let newWsSecret = `${
p}_0_${
streamName}_${
time}_${
wsTime}`;
newWsSecret = md5.hex(newWsSecret);
let y = "";
Object.keys(others).forEach(e=>{
y += `&${
e}=${
others[e]}`
});
return `${
mainUrl}?wsSecret=${
newWsSecret}&wsTime=${
wsTime}&u=0&seqid=${
time}${
y}`;
}
其中用到了Base64
和MD5
相關函數:
// md5下載:https://raw.githubusercontent.com/emn178/js-md5/master/src/md5.js
let Base64 = {
_keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",
encode: function(e) {
var t = "";
var n, r, i, s, o, u, a;
var f = 0;
e = Base64._utf8_encode(e);
while (f < e.length) {
n = e.charCodeAt(f++);
r = e.charCodeAt(f++);
i = e.charCodeAt(f++);
s = n >> 2;
o = (n & 3) << 4 | r >> 4;
u = (r & 15) << 2 | i >> 6;
a = i & 63;
if (isNaN(r)) {
u = a = 64
} else if (isNaN(i)) {
a = 64
}
t = t + this._keyStr.charAt(s) + this._keyStr.charAt(o) + this._keyStr.charAt(u) + this._keyStr.charAt(a)
}
return t
},
decode: function(e) {
var t = "";
var n, r, i;
var s, o, u, a;
var f = 0;
e = e.replace(/[^A-Za-z0-9+/=]/g, "");
while (f < e.length) {
s = this._keyStr.indexOf(e.charAt(f++));
o = this._keyStr.indexOf(e.charAt(f++));
u = this._keyStr.indexOf(e.charAt(f++));
a = this._keyStr.indexOf(e.charAt(f++));
n = s << 2 | o >> 4;
r = (o & 15) << 4 | u >> 2;
i = (u & 3) << 6 | a;
t = t + String.fromCharCode(n);
if (u != 64) {
t = t + String.fromCharCode(r)
}
if (a != 64) {
t = t + String.fromCharCode(i)
}
}
t = Base64._utf8_decode(t);
return t
},
_utf8_encode: function(e) {
e = e.replace(/rn/g, "n");
var t = "";
for (var n = 0; n < e.length; n++) {
var r = e.charCodeAt(n);
if (r < 128) {
t += String.fromCharCode(r)
} else if (r > 127 && r < 2048) {
t += String.fromCharCode(r >> 6 | 192);
t += String.fromCharCode(r & 63 | 128)
} else {
t += String.fromCharCode(r >> 12 | 224);
t += String.fromCharCode(r >> 6 & 63 | 128);
t += String.fromCharCode(r & 63 | 128)
}
}
return t
},
_utf8_decode: function(e) {
var t = "";
var n = 0;
var r = c1 = c2 = 0;
while (n < e.length) {
r = e.charCodeAt(n);
if (r < 128) {
t += String.fromCharCode(r);
n++
} else if (r > 191 && r < 224) {
c2 = e.charCodeAt(n + 1);
t += String.fromCharCode((r & 31) << 6 | c2 & 63);
n += 2
} else {
c2 = e.charCodeAt(n + 1);
c3 = e.charCodeAt(n + 2);
t += String.fromCharCode((r & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
n += 3
}
}
return t
}
}
3. 源碼及播放器實現
播放器官網: videojs
來吧,直接上全部代碼:
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta name="applicable-device" content="pc,mobile">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/video-js.min.css">
<title>虎牙直播</title>
</head>
<body>
<video id="player" class="video-js vjs-16-9 vjs-big-play-centered" controls preload="auto" data-setup="{}">
<source src="https://bitdash-a.akamaihd.net/content/sintel/hls/playlist.m3u8" type="application/x-mpegURL">
<p class="vjs-no-js">
To view this video please enable JavaScript, and consider upgrading to a web browser that
<a href="https://videojs.com/html5-video-support/" target="_blank">supports HTML5 video</a>
</p>
</video>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/video.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@videojs/[email protected]/dist/videojs-http-streaming.min.js"></script>
<script src="js/md5.js"></script>
<script>
let Base64 = {
_keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",
encode: function(e) {
var t = "";
var n, r, i, s, o, u, a;
var f = 0;
e = Base64._utf8_encode(e);
while (f < e.length) {
n = e.charCodeAt(f++);
r = e.charCodeAt(f++);
i = e.charCodeAt(f++);
s = n >> 2;
o = (n & 3) << 4 | r >> 4;
u = (r & 15) << 2 | i >> 6;
a = i & 63;
if (isNaN(r)) {
u = a = 64
} else if (isNaN(i)) {
a = 64
}
t = t + this._keyStr.charAt(s) + this._keyStr.charAt(o) + this._keyStr.charAt(u) + this._keyStr.charAt(a)
}
return t
},
decode: function(e) {
var t = "";
var n, r, i;
var s, o, u, a;
var f = 0;
e = e.replace(/[^A-Za-z0-9+/=]/g, "");
while (f < e.length) {
s = this._keyStr.indexOf(e.charAt(f++));
o = this._keyStr.indexOf(e.charAt(f++));
u = this._keyStr.indexOf(e.charAt(f++));
a = this._keyStr.indexOf(e.charAt(f++));
n = s << 2 | o >> 4;
r = (o & 15) << 4 | u >> 2;
i = (u & 3) << 6 | a;
t = t + String.fromCharCode(n);
if (u != 64) {
t = t + String.fromCharCode(r)
}
if (a != 64) {
t = t + String.fromCharCode(i)
}
}
t = Base64._utf8_decode(t);
return t
},
_utf8_encode: function(e) {
e = e.replace(/rn/g, "n");
var t = "";
for (var n = 0; n < e.length; n++) {
var r = e.charCodeAt(n);
if (r < 128) {
t += String.fromCharCode(r)
} else if (r > 127 && r < 2048) {
t += String.fromCharCode(r >> 6 | 192);
t += String.fromCharCode(r & 63 | 128)
} else {
t += String.fromCharCode(r >> 12 | 224);
t += String.fromCharCode(r >> 6 & 63 | 128);
t += String.fromCharCode(r & 63 | 128)
}
}
return t
},
_utf8_decode: function(e) {
var t = "";
var n = 0;
var r = c1 = c2 = 0;
while (n < e.length) {
r = e.charCodeAt(n);
if (r < 128) {
t += String.fromCharCode(r);
n++
} else if (r > 191 && r < 224) {
c2 = e.charCodeAt(n + 1);
t += String.fromCharCode((r & 31) << 6 | c2 & 63);
n += 2
} else {
c2 = e.charCodeAt(n + 1);
c3 = e.charCodeAt(n + 2);
t += String.fromCharCode((r & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
n += 3
}
}
return t
}
}
function parseUrl(url){
let params = url.split("?")[1];
params = params.split("&");
let paramsObj = {
};
for (let i = 0; i < params.length; i++) {
let item = params[i].split("=");
2 === item.length && (paramsObj[item[0]] = item[1])
}
let mainUrl = url.split("?")[0];
let r = mainUrl.split("/");
let streamName = r[r.length - 1].replace(/.(flv|m3u8)/g, "");
let {
fm: fm, wsTime: wsTime, wsSecret: u, ...others} = paramsObj;
let fmParse = Base64.decode(decodeURIComponent(fm));
let p = fmParse.split("_")[0];
let time = parseInt(1e4 * (new Date).getTime() + 1e4 * Math.random());
let newWsSecret = `${
p}_0_${
streamName}_${
time}_${
wsTime}`;
newWsSecret = md5.hex(newWsSecret);
let y = "";
Object.keys(others).forEach(e=>{
y += `&${
e}=${
others[e]}`
});
return `${
mainUrl}?wsSecret=${
newWsSecret}&wsTime=${
wsTime}&u=0&seqid=${
time}${
y}`;
}
let e = parseUrl("//al.hls.huya.com/src/1423787831-1423787831-6115122170587774976-2847699118-10057-A-0-1-imgplus_2000.m3u8?wsSecret=f9aaf4fcbe42e724d152c265cf1837fb&wsTime=5ff71b32&fm=RFdxOEJjSjNoNkRKdDZUWV8kMF8kMV8kMl8kMw%3D%3D&ctype=tars_mobile&txyp=o%3Aj10%3B&fs=bgct&&sphdcdn=al_7-tx_3-js_3-ws_7-bd_2-hw_2&sphdDC=huya&sphd=264_*-265_*&t=103");
let t = videojs("#player");
t.src(e);
t.play();
</script>
</body>
</html>
看看諸葛亮彈琴退仲達
:
4. 總結
- 目前發現虎牙【一起看】欄目下的需要把
url
做第二次解析,普通直播間可以直接拿來播放; - 有人可能說爲啥要用移動端的頁面解析,而不用PC端的,其實也可以的,分析源碼可以找到一個
config
對象,裏面就包含了所需的信息,看到這些參數熟悉不?(同樣的【一起看】欄目的需要二次解析):