目录
0. 项目介绍
1. 构造浏览器实例
2. 爬取账户页中所有帖子的链接
3. 爬取并下载帖子页中的视频
4. 完整代码

0. 项目介绍

本项目的目的是输入指定TikTok账户页的链接，输出该账户每一个帖子的视频。本项目的基本结构参考这篇博文。

本项目需要先导入如下库：

from selenium import webdriver
import json, time, os

本项目的全局变量如下：

sslPort = 
fxBinaryPath = ''
geckodriverPath = ''
pageDownJS = 'document.documentElement.scrollTop=100000000'
outputPath = ''
wgetPath = ''
httpsProxy = 'https://127.0.0.1:{}/'.format(str(sslPort))

本项目的基本结构如下：

def Main():
	profileUrl = input('Please input the tiktok profile link: ')
	
	urlList = PROFILE().Main(profileUrl)
	
	for url in urlList:
		POST().Main(url)

1. 构造浏览器实例

fxProfile = webdriver.firefox.firefox_profile.FirefoxProfile()
fxProfile.set_preference('network.proxy.type', 1)
fxProfile.set_preference('network.proxy.ssl', '127.0.0.1')
fxProfile.set_preference('network.proxy.ssl_port', sslPort)
fxProfile.set_preference('network.proxy.socks_remote_dns', True)
fxProfile.set_preference('network.trr.mode', 2)
fxProfile.set_preference('permissions.default.image', 2)
fxProfile.set_preference('intl.accept_languages', 'zh-CN, zh, zh-TW, zh-HK, en-US, en')
fxDriver = webdriver.firefox.webdriver.WebDriver(firefox_profile=fxProfile, firefox_binary=fxBinaryPath, executable_path=geckodriverPath)

2. 爬取账户页中所有帖子的链接

class PROFILE(object):
	
	def GetLocY(self):		
		for e in fxDriver.find_elements_by_xpath('//a[contains(@class, "video-feed-item-wrapper")]'):
			locY = e.location['y']
		
		return locY
	
	def JudgeLoading(self, locY):
		time.sleep(0.5)
		
		locYNew = PROFILE().GetLocY()
		
		if locY < locYNew:
			return locYNew
		else:
			return None
	
	def GetPostUrl(self):
		urlList = []
		
		for e in fxDriver.find_elements_by_xpath('//a[contains(@class, "video-feed-item-wrapper")]'):
			url = e.get_attribute('href')
			urlList.append(url)
		
		return urlList
	
	def GetWholePage(self):
		locY = PROFILE().GetLocY()
		loadFailCount = 0
		
		while 1:			
			fxDriver.execute_script(pageDownJS)
			
			while 1:
				locYNew = PROFILE().JudgeLoading(locY)
				
				if locYNew == None:
					loadFailCount += 1
					if loadFailCount > 20:
						urlList = PROFILE().GetPostUrl()
						return urlList
				else:
					loadFailCount = 0
					locY = locYNew
					break
		
	def Main(self, profileUrl):
		try:
			fxDriver.get(profileUrl)
			urlList = PROFILE().GetWholePage()
			return urlList
		except Exception as e:
			print(e)

3. 爬取并下载帖子页中的视频

class POST(object):
	
	def GetInfo(self, html):
		jsonText = fxDriver.find_element_by_xpath('//script[@type="application/json"]').get_attribute('textContent')
		jsonData = json.loads(jsonText)['props']
		
		videoData = jsonData['pageProps']['videoData']
		vidUrl = videoData['itemInfos']['video']['urls'][0]
		createTimeStamp = int(videoData['itemInfos']['createTime'])
		createTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(createTimeStamp))
		uniqueID = videoData['authorInfos']['uniqueId']
		
		return vidUrl, createTime, uniqueID
	
	def DownloadFile(self, info):
		vidUrl = info[0]
		createTime = info[1].replace('-', '').replace(':', '').replace(' ', '')
		uniqueID = info[2]
		
		folder = '{}\\{}'.format(outputPath, uniqueID)
		
		try:
			os.makedirs(folder)
		except Exception as e:
			print(e)
		
		os.system('{} --output-document={}\\{}.mp4 --no-check-certificate --execute https_proxy={} --execute robots=off --continue "{}"'.format(wgetPath, folder, createTime, httpsProxy, vidUrl))
	
	def Main(self, url):
		try:
			fxDriver.get(url)
			info = POST().GetInfo(html)
			POST().DownloadFile(info)
		except Exception as e:
			print(e)

4. 完整代码

from selenium import webdriver
import json, time, os

sslPort = 
fxBinaryPath = ''
geckodriverPath = ''
pageDownJS = 'document.documentElement.scrollTop=100000000'
outputPath = ''
wgetPath = ''
httpsProxy = 'https://127.0.0.1:{}/'.format(str(sslPort))

fxProfile = webdriver.firefox.firefox_profile.FirefoxProfile()
fxProfile.set_preference('network.proxy.type', 1)
fxProfile.set_preference('network.proxy.ssl', '127.0.0.1')
fxProfile.set_preference('network.proxy.ssl_port', sslPort)
fxProfile.set_preference('network.proxy.socks_remote_dns', True)
fxProfile.set_preference('network.trr.mode', 2)
fxProfile.set_preference('permissions.default.image', 2)
fxProfile.set_preference('intl.accept_languages', 'zh-CN, zh, zh-TW, zh-HK, en-US, en')
fxDriver = webdriver.firefox.webdriver.WebDriver(firefox_profile=fxProfile, firefox_binary=fxBinaryPath, executable_path=geckodriverPath)

class POST(object):
	
	def GetInfo(self, html):
		jsonText = fxDriver.find_element_by_xpath('//script[@type="application/json"]').get_attribute('textContent')
		jsonData = json.loads(jsonText)['props']
		
		videoData = jsonData['pageProps']['videoData']
		vidUrl = videoData['itemInfos']['video']['urls'][0]
		createTimeStamp = int(videoData['itemInfos']['createTime'])
		createTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(createTimeStamp))
		uniqueID = videoData['authorInfos']['uniqueId']
		
		return vidUrl, createTime, uniqueID
	
	def DownloadFile(self, info):
		vidUrl = info[0]
		createTime = info[1].replace('-', '').replace(':', '').replace(' ', '')
		uniqueID = info[2]
		
		folder = '{}\\{}'.format(outputPath, uniqueID)
		
		try:
			os.makedirs(folder)
		except Exception as e:
			print(e)
		
		os.system('{} --output-document={}\\{}.mp4 --no-check-certificate --execute https_proxy={} --execute robots=off --continue "{}"'.format(wgetPath, folder, createTime, httpsProxy, vidUrl))
	
	def Main(self, url):
		try:
			fxDriver.get(url)
			info = POST().GetInfo(html)
			POST().DownloadFile(info)
		except Exception as e:
			print(e)

class PROFILE(object):
	
	def GetLocY(self):		
		for e in fxDriver.find_elements_by_xpath('//a[contains(@class, "video-feed-item-wrapper")]'):
			locY = e.location['y']
		
		return locY
	
	def JudgeLoading(self, locY):
		time.sleep(0.5)
		
		locYNew = PROFILE().GetLocY()
		
		if locY < locYNew:
			return locYNew
		else:
			return None
	
	def GetPostUrl(self):
		urlList = []
		
		for e in fxDriver.find_elements_by_xpath('//a[contains(@class, "video-feed-item-wrapper")]'):
			url = e.get_attribute('href')
			urlList.append(url)
		
		return urlList
	
	def GetWholePage(self):
		locY = PROFILE().GetLocY()
		loadFailCount = 0
		
		while 1:			
			fxDriver.execute_script(pageDownJS)
			
			while 1:
				locYNew = PROFILE().JudgeLoading(locY)
				
				if locYNew == None:
					loadFailCount += 1
					if loadFailCount > 20:
						urlList = PROFILE().GetPostUrl()
						return urlList
				else:
					loadFailCount = 0
					locY = locYNew
					break
		
	def Main(self, profileUrl):
		try:
			fxDriver.get(profileUrl)
			urlList = PROFILE().GetWholePage()
			return urlList
		except Exception as e:
			print(e)

def Main():
	profileUrl = input('Please input the tiktok profile link: ')
	
	urlList = PROFILE().Main(profileUrl)
	
	for url in urlList:
		POST().Main(url)
	
	Main()

if __name__ == '__main__':
	Main()

【Python】爬取并下载TikTok账户中所有帖子的视频

0. 项目介绍

1. 构造浏览器实例

2. 爬取账户页中所有帖子的链接

3. 爬取并下载帖子页中的视频

4. 完整代码

linux安装cuda和cudnn

模拟手机设备：使用 Playwright 实现移动端自动化测试

Mellanox网卡开启SR-IOV

全面系统的AI学习路径，帮助普通人也能玩转AI

HTML 00 Tutorial

uni-app实现上拉加载

vue3编译优化之“静态提升”

又是一个月-20240513

flask 如何保证返回json有序

linux服务器设置ssh免密

【Python】爬取並下載Instagram帖子的信息、圖片和視頻

【Python】使用 Selenium + Firefox 獲取 HTML

【Windows】命令行下載工具

【Python】基於哈希生成密碼

【Windows】軟件推薦

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結