原文地址
1、B站視頻真實地址分析
我一直覺得編程語言只是一種工具,重要的是思想🐶。下面先來分析下B站視頻的真實地址。
1.1 獲取視頻的信息數據
使用PC通過瀏覽器隨便打開一個B站的視頻,右鍵檢查
或者是按F12
,查看網頁源代碼。我們會發現有一個script
標籤內的內容是這樣的。
嗯,彷彿這就是視頻的信息了。下面我們將其複製出來,格式化一下。
window.__playinfo__ = {
"data": {
"accept_format": "flv720,flv480,mp4",
"accept_description": ["高清 720P", "清晰 480P", "流暢 360P"],
"accept_quality": [64, 32, 16],
"dash": {
"video": [{
"id": 64,
"baseUrl": "http://cn-sdbz-cu-v-05.bilivideo.com/upgcxcode/18/80/187918018/187918018-1-30064.m4s?expires=1589032200&platform=pc&ssig=7TagzkxicmXQCX-eJG1rWw&oi=1894210281&trid=4b3d732f515544e49c843d5f2c87f64bu&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=388810686&logo=80000000",
"base_url": "http://cn-sdbz-cu-v-05.bilivideo.com/upgcxcode/18/80/187918018/187918018-1-30064.m4s?expires=1589032200&platform=pc&ssig=7TagzkxicmXQCX-eJG1rWw&oi=1894210281&trid=4b3d732f515544e49c843d5f2c87f64bu&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=388810686&logo=80000000",
"backupUrl": ["http://cn-sdyt-cu-v-11.bilivideo.com/upgcxcode/18/80/187918018/187918018-1-30064.m4s?expires=1589032200&platform=pc&ssig=7TagzkxicmXQCX-eJG1rWw&oi=1894210281&trid=4b3d732f515544e49c843d5f2c87f64bu&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=388810686&logo=40000000", "http://cn-hbcd2-cu-v-07.bilivideo.com/upgcxcode/18/80/187918018/187918018-1-30064.m4s?expires=1589032200&platform=pc&ssig=7TagzkxicmXQCX-eJG1rWw&oi=1894210281&trid=4b3d732f515544e49c843d5f2c87f64bu&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=388810686&logo=40000000"],
"backup_url": ["http://cn-sdyt-cu-v-11.bilivideo.com/upgcxcode/18/80/187918018/187918018-1-30064.m4s?expires=1589032200&platform=pc&ssig=7TagzkxicmXQCX-eJG1rWw&oi=1894210281&trid=4b3d732f515544e49c843d5f2c87f64bu&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=388810686&logo=40000000", "http://cn-hbcd2-cu-v-07.bilivideo.com/upgcxcode/18/80/187918018/187918018-1-30064.m4s?expires=1589032200&platform=pc&ssig=7TagzkxicmXQCX-eJG1rWw&oi=1894210281&trid=4b3d732f515544e49c843d5f2c87f64bu&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=388810686&logo=40000000"],
"bandwidth": 1883922,
"mimeType": "video/mp4",
"mime_type": "video/mp4",
"width": 720,
"height": 1280,
"SegmentBase": {"Initialization": "0-974", "indexRange": "975-1162"},
"segment_base": {"initialization": "0-974", "index_range": "975-1162"},
"codecid": 7
},],
"audio": [{
"id": 30280,
"baseUrl": "http://cn-sdyt-cu-v-05.bilivideo.com/upgcxcode/18/80/187918018/187918018-1-30280.m4s?expires=1589032200&platform=pc&ssig=ud9zkd5aAUp7mB4yPjI_LA&oi=1894210281&trid=4b3d732f515544e49c843d5f2c87f64bu&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=388810686&logo=80000000",
"base_url": "http://cn-sdyt-cu-v-05.bilivideo.com/upgcxcode/18/80/187918018/187918018-1-30280.m4s?expires=1589032200&platform=pc&ssig=ud9zkd5aAUp7mB4yPjI_LA&oi=1894210281&trid=4b3d732f515544e49c843d5f2c87f64bu&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=388810686&logo=80000000",
"backupUrl": ["http://cn-hbcd2-cu-v-14.bilivideo.com/upgcxcode/18/80/187918018/187918018-1-30280.m4s?expires=1589032200&platform=pc&ssig=ud9zkd5aAUp7mB4yPjI_LA&oi=1894210281&trid=4b3d732f515544e49c843d5f2c87f64bu&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=388810686&logo=40000000", "http://cn-sdjn2-cu-v-05.bilivideo.com/upgcxcode/18/80/187918018/187918018-1-30280.m4s?expires=1589032200&platform=pc&ssig=ud9zkd5aAUp7mB4yPjI_LA&oi=1894210281&trid=4b3d732f515544e49c843d5f2c87f64bu&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=388810686&logo=40000000"],
"backup_url": ["http://cn-hbcd2-cu-v-14.bilivideo.com/upgcxcode/18/80/187918018/187918018-1-30280.m4s?expires=1589032200&platform=pc&ssig=ud9zkd5aAUp7mB4yPjI_LA&oi=1894210281&trid=4b3d732f515544e49c843d5f2c87f64bu&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=388810686&logo=40000000", "http://cn-sdjn2-cu-v-05.bilivideo.com/upgcxcode/18/80/187918018/187918018-1-30280.m4s?expires=1589032200&platform=pc&ssig=ud9zkd5aAUp7mB4yPjI_LA&oi=1894210281&trid=4b3d732f515544e49c843d5f2c87f64bu&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=388810686&logo=40000000"],
"bandwidth": 319474,
"mimeType": "audio/mp4",
"mime_type": "audio/mp4",
"SegmentBase": {"Initialization": "0-919", "indexRange": "920-1107"},
"segment_base": {"initialization": "0-919", "index_range": "920-1107"},
},]
}
}, "session": "996ecc0413599104d175e5c254e70fb7", "videoFrame": {}
}
我刪除了一些沒有的信息,通過上面的信息我們可以得到以下幾點信息:
- B站的視頻是音視頻分離的。
- 我們可以從js中獲取真實音視頻地址。
- 提供了
"高清 720P", "清晰 480P", "流暢 360P"
方式供我們選擇。不要問我爲什麼沒有1080P
的,俺也不知道😢。
下面我們再來看一下B站自己發送請求的信息。
我們發現每次請求時都攜帶了,此次請求文件的字節位置信息。
1.2 抓取B站視頻的思路
- 請求想要下載視頻的地址,獲取頁面html。
- 從頁面中解析出視頻的基本信息,音視頻的url地址等信息。
- 下載音視頻文件,發送請求時帶上請求的範圍(
range
)。(注:
本文的實現中,沒有使用多線程,直接請求的整個文件)- 將下載完成的音視頻文件合併成完成的視頻文件。
1.3 用到的第三方庫和軟件
ffmpeg
:用於合併音視頻文件。官方網址:http://ffmpeg.org/。
ffmpeg
:是處理音視頻的利器,感興趣可以搜索相關資料瞭解下。本文只要你下載下來,將ffmpeg
位置寫到代碼變量裏就可以了。
requests
:用於發送Http請求ffmpeg-python
:方便操作ffmpeg
HTMLParser
:python自帶的HTML解析工具
hutool
:一個國人開源的Java工具包。強烈推薦。
jsoup
:Java解析HTML的利器
2、代碼實現
注:
本文中代碼下載音視頻均採用的單線程的方式,如果使用多線程,一定要計算好每個請求的請求範圍,以及下載完成後,合併文件時的順序。
2.1 Python的實現
解析html
class BiliHTMLParser(HTMLParser):
"""
繼承自HTMLParser。用於解析html
"""
def __init__(self):
super().__init__()
self.isTitle = 0
self.videoName = "" # 視頻名稱
self.videoInfo = {} # 視頻信息
def handle_starttag(self, tag, attrs):
if tag != 'title':
return
self.isTitle += 1
def handle_endtag(self, tag):
if tag == 'title' and self.isTitle:
self.isTitle -= 1
def handle_data(self, data):
"""
獲取當前頁面的視頻信息
:param data: tag中的數據
:return:
"""
if data and self.isTitle: # 用於獲取視頻名稱
self.videoName = data
if data.startswith('window.__playinfo__='):
infoStr = data.split('window.__playinfo__=')[-1] # 截取`window.__playinfo__=`之後的字符串
self.videoInfo = json.loads(infoStr) # 字符串轉字典dict
獲取視頻信息
def getVideo(videoInfo, videoName):
"""
:param videoInfo: 視頻信息字典dict
:param videoName: 視頻名稱
:return:
"""
# 獲取視頻的url和初始的大小範圍
videoBaseUrl = videoInfo['data']['dash']['video'][0]['baseUrl']
videoBaseRange = videoInfo['data']['dash']['video'][0]['SegmentBase']['Initialization']
# 獲取音頻的url和初始的大小範圍
audioBaseUrl = videoInfo['data']['dash']['audio'][0]['baseUrl']
audioBaseRange = videoInfo['data']['dash']['audio'][0]['SegmentBase']['Initialization']
# 文件下載
videoSize = getVideoInfo(videoBaseUrl, videoBaseRange)
videoFileName = downloadFile(videoBaseUrl, videoSize, "video", videoName)
audioSize = getVideoInfo(audioBaseUrl, audioBaseRange)
audioFileName = downloadFile(audioBaseUrl, audioSize, "audio", videoName)
# 合併文件
outFilePath = "./%s/%s.mp4" % (videoName, videoName)
mergeFiles(videoFileName, audioFileName, outFilePath)
def getVideoInfo(baseUrl, range):
"""
獲取視頻或音頻文件的總大小
:param baseUrl:
:param range:
:return:
"""
headers = {
'Referer': videoUrl,
'Range': 'bytes=%s' % (range),
}
videoRes = requests.get(url=baseUrl, headers=headers)
# 獲取視頻總大小
headersInfo = videoRes.headers
total = headersInfo['Content-Range'].split('/')[-1]
print('資源的總字節數:%s' % total)
return total
下載音視頻
def downloadFile(url, totalSize, type, videoName):
"""
下載資源
:param url: 資源url
:param totalSize: 資源總大小
:param type: video/audio
:param videoName: 視頻名稱
:return:
"""
headers = {
'Referer': videoUrl,
'Range': "bytes=%s-%s" % (str(0), str(totalSize))
}
fileDir = "./%s" % videoName
if not os.path.exists(fileDir):
os.mkdir(fileDir)
fileName = "./%s/%s-%s.mp4" % (videoName, videoName, type)
if not os.path.exists(fileName):
res = requests.get(url=url, headers=headers, stream=True)
print("開始下載:%s" % type)
data = res.content
with open(fileName, 'wb') as file_obj:
file_obj.write(data)
print("完成%s的下載" % type)
return fileName
合併音視頻
def mergeFiles(videoFilePath, audioFilePath, outFilePath):
"""合併音視頻"""
print("開始合併音視頻")
videoFile = ffmpeg.input(videoFilePath)
audioFile = ffmpeg.input(audioFilePath)
stream = ffmpeg.output(videoFile, audioFile, outFilePath, vcodec='copy', acodec='copy')
ffmpeg.run(stream, cmd=ffmpegPath)
print("合併音視頻完成")
2.2 Java實現
解析Html
/** 解析HTML獲取相關信息 */
private static void htmlParser(){
HttpResponse res = HttpRequest.get(VIDEO_URL).timeout(2000).execute();
String html = res.body();
Document document = Jsoup.parse(html);
Element title = document.getElementsByTag("title").first();
// 視頻名稱
VIDEO_INFO.videoName = title.text();
// 截取視頻信息
Pattern pattern = Pattern.compile("(?<=<script>window.__playinfo__=).*?(?=</script>)");
Matcher matcher = pattern.matcher(html);
if (matcher.find()) {
VIDEO_INFO.videoInfo = new JSONObject(matcher.group());
} else {
System.err.println("未匹配到視頻信息,退出程序!");
return;
}
getVideoInfo();
}
獲取視頻信息
/** 解析視頻和音頻的具體信息 */
private static void getVideoInfo(){
// 獲取視頻的基本信息
JSONObject videoInfo = VIDEO_INFO.videoInfo;
JSONArray videoInfoArr = videoInfo.getJSONObject("data").getJSONObject("dash").getJSONArray("video");
VIDEO_INFO.videoBaseUrl = videoInfoArr.getJSONObject(0).getStr("baseUrl");
VIDEO_INFO.videoBaseRange = videoInfoArr.getJSONObject(0).getJSONObject("SegmentBase").getStr("Initialization");
HttpResponse videoRes = HttpRequest.get(VIDEO_INFO.videoBaseUrl)
.header("Referer", VIDEO_URL)
.header("Range", "bytes=" + VIDEO_INFO.videoBaseRange)
.header("User-Agent", USER_AGENT)
.timeout(2000)
.execute();
VIDEO_INFO.videoSize = videoRes.header("Content-Range").split("/")[1];
// 獲取音頻基本信息
JSONArray audioInfoArr = videoInfo.getJSONObject("data").getJSONObject("dash").getJSONArray("audio");
VIDEO_INFO.audioBaseUrl = audioInfoArr.getJSONObject(0).getStr("baseUrl");
VIDEO_INFO.audioBaseRange = audioInfoArr.getJSONObject(0).getJSONObject("SegmentBase").getStr("Initialization");
HttpResponse audioRes = HttpRequest.get(VIDEO_INFO.audioBaseUrl)
.header("Referer", VIDEO_URL)
.header("Range", "bytes=" + VIDEO_INFO.audioBaseRange)
.header("User-Agent", USER_AGENT)
.timeout(2000)
.execute();
VIDEO_INFO.audioSize = audioRes.header("Content-Range").split("/")[1];
downloadFile();
}
下載音視頻
/** 下載音視頻 */
private static void downloadFile(){
// 保存音視頻的位置
SAVE_PATH = "." + File.separator + VIDEO_INFO.videoName;
File fileDir = new File(SAVE_PATH);
if (!fileDir.exists()){
fileDir.mkdirs();
}
// 下載視頻
File videoFile = new File(SAVE_PATH + File.separator + VIDEO_INFO.videoName + "_video.mp4");
if (!videoFile.exists()){
System.out.println("--------------開始下載視頻文件--------------");
HttpResponse videoRes = HttpRequest.get(VIDEO_INFO.videoBaseUrl)
.header("Referer", VIDEO_URL)
.header("Range", "bytes=0-" + VIDEO_INFO.videoSize)
.header("User-Agent", USER_AGENT)
.execute();
videoRes.writeBody(videoFile);
System.out.println("--------------視頻文件下載完成--------------");
}
// 下載音頻
File audioFile = new File(SAVE_PATH + File.separator + VIDEO_INFO.videoName + "_audio.mp4");
if (!audioFile.exists()){
System.out.println("--------------開始下載音頻文件--------------");
HttpResponse audioRes = HttpRequest.get(VIDEO_INFO.audioBaseUrl)
.header("Referer", VIDEO_URL)
.header("Range", "bytes=0-" + VIDEO_INFO.audioSize)
.header("User-Agent", USER_AGENT)
.execute();
audioRes.writeBody(audioFile);
System.out.println("--------------音頻文件下載完成--------------");
}
mergeFiles(videoFile,audioFile);
}
合併視頻
private static void mergeFiles(File videoFile,File audioFile){
System.out.println("--------------開始合併音視頻--------------");
String outFile = SAVE_PATH + File.separator + VIDEO_INFO.videoName + ".mp4";
List<String> commend = new ArrayList<>();
commend.add(FFMPEG_PATH);
commend.add("-i");
commend.add(videoFile.getAbsolutePath());
commend.add("-i");
commend.add(audioFile.getAbsolutePath());
commend.add("-vcodec");
commend.add("copy");
commend.add("-acodec");
commend.add("copy");
commend.add(outFile);
ProcessBuilder builder = new ProcessBuilder();
builder.command(commend);
try {
builder.inheritIO().start().waitFor();
System.out.println("--------------音視頻合併完成--------------");
} catch (InterruptedException | IOException e) {
System.err.println("音視頻合併失敗!");
e.printStackTrace();
}
}