目录 |
---|
0. 项目介绍 |
1. 构造浏览器实例 |
2. 登录Instagram账户 |
3. 爬取账户页中所有帖子的链接 |
4. 爬取并下载帖子页中的信息、图片和视频 |
5. 完整代码 |
0. 项目介绍
本项目的目的是输入指定Instagram账户页的链接,输出该账户每一个帖子的:① 包含当前时间、用户上传帖子的时间(当前时区)、用户名称(Username)、用户全称(Full name)、帖子文字、点赞数、评论数、图片描述(当帖子中有图片时)、图片链接(当帖子中有图片时)、视频观看数(当帖子中有视频时)、视频链接(当帖子中有视频时)的文本文档;② 图片(当帖子中有图片时)、视频(当帖子中有视频时)。
本项目需要先导入如下库:
from selenium import webdriver
from bs4 import BeautifulSoup
import json, time, os
本项目的基本结构如下:
def Main():
profileUrl = input('Please input the instagram profile link: ')
urlList = PROFILE().Main(profileUrl)
for url in urlList:
POST().Main(url)
解释:在构造浏览器实例和登录Instagram账户之后输入Instagram账户页的链接profileUrl
;PROFILE().Main(profileUrl)
负责爬取账户页中所有帖子的链接,生成链接列表urlList
;POST().Main(url)
负责下载每一个帖子的信息、图片和视频。
1. 构造浏览器实例
from selenium import webdriver
socksPort =
httpPort =
sslPort =
fxProfile = webdriver.firefox.firefox_profile.FirefoxProfile()
fxProfile.set_preference('network.proxy.type', 1)
fxProfile.set_preference('network.proxy.socks', '127.0.0.1')
fxProfile.set_preference('network.proxy.socks_port', socksPort)
fxProfile.set_preference('network.proxy.http', '127.0.0.1')
fxProfile.set_preference('network.proxy.http_port', httpPort)
fxProfile.set_preference('network.proxy.ssl', '127.0.0.1')
fxProfile.set_preference('network.proxy.ssl_port', sslPort)
fxProfile.set_preference('network.proxy.socks_remote_dns', True)
fxProfile.set_preference('network.trr.mode', 2)
fxProfile.set_preference('permissions.default.image', 2)
fxProfile.set_preference('intl.accept_languages', 'zh-CN, zh, zh-TW, zh-HK, en-US, en')
fxBinaryPath = ''
geckodriverPath = ''
fxDriver = webdriver.firefox.webdriver.WebDriver(firefox_profile=fxProfile, firefox_binary=fxBinaryPath, executable_path=geckodriverPath)
解释:可以参考这篇博文。应根据手头上的工具分别设置SOCKS、HTTP和SSL代理的端口socksPort
、httpPort
和sslPort
。fxBinaryPath
是Firefox浏览器的firefox.exe
的绝对路径,geckodriverPath
是geckodriver.exe
的绝对路径。
2. 登录Instagram账户
account = ''
password = ''
fxDriver.get('https://www.instagram.com/accounts/login/')
webdriver.support.ui.WebDriverWait(fxDriver, 100).until(lambda x: x.find_element_by_name('username'))
fxDriver.find_element_by_name('username').send_keys(account)
fxDriver.find_element_by_name('password').send_keys(password)
fxDriver.find_element_by_xpath('//button[@type="submit"]').click()
解释:account
是你的Instagram用户名,password
是你的Instagram密码。
3. 爬取账户页中所有帖子的链接
这一步的基本结构如下:
def Main(self, profileUrl):
try:
fxDriver.get(profileUrl)
urlList = PROFILE().GetWholePage()
return urlList
except Exception as e:
print(e)
解释:浏览器先访问账户页,然后PROFILE().GetWholePage()
负责爬取账户页中所有帖子的链接,生成链接列表urlList
。
PROFILE().GetWholePage()
如下:
def GetWholePage(self):
locY, urlList = PROFILE().GetLocY()
loadFailCount = 0
while 1:
pageDownJS = 'document.documentElement.scrollTop=100000000'
fxDriver.execute_script(pageDownJS)
while 1:
locYNew, urlListNew = PROFILE().JudgeLoading(locY, urlList)
if locYNew == None:
loadFailCount += 1
if loadFailCount > 20:
return urlList
else:
loadFailCount = 0
locY = locYNew
urlList = urlListNew
break
解释:
PROFILE().GetLocY()
可以获得账户页HTML中每个帖子链接所在tag的Y座标locY
和当前加载的所有帖子的链接列表urlList
。fxDriver.execute_script(pageDownJS)
可以通过执行JS代码document.documentElement.scrollTop=100000000
把页面拉到最下面。PROFILE().JudgeLoading(locY, urlList)
可以对比输入的Y座标和0.5秒之后的Y座标来判断fxDriver.execute_script(pageDownJS)
有没有执行完毕。如果没有执行完毕则返回None
,如果执行完毕则返回新的Y座标locYNew
和新的链接列表urlListNew
。
PROFILE().GetLocY()
如下:
def GetLocY(self):
urlList = []
for e in fxDriver.find_elements_by_tag_name('a'):
try:
url = e.get_attribute('href')
if '/p/' in url:
locY = e.location['y']
urlList.append(url)
except:
continue
return locY, urlList
解释:通过循环判断'/p/'
有没有在a
标签的'href'
属性中来获得帖子链接及其所在tag的Y座标。
PROFILE().JudgeLoading(locY, urlList)
如下:
def JudgeLoading(self, locY, urlList):
time.sleep(0.5)
locYNew, urlListNew = PROFILE().GetLocY()
if locY < locYNew:
urlListNew += urlList
urlListNew = list(set(urlListNew))
return locYNew, urlListNew
else:
return None, None
把上述模块如下整合到类中:
class PROFILE(object):
def GetLocY(self):
urlList = []
for e in fxDriver.find_elements_by_tag_name('a'):
try:
url = e.get_attribute('href')
if '/p/' in url:
locY = e.location['y']
urlList.append(url)
except:
continue
return locY, urlList
def JudgeLoading(self, locY, urlList):
time.sleep(0.5)
locYNew, urlListNew = PROFILE().GetLocY()
if locY < locYNew:
urlListNew += urlList
urlListNew = list(set(urlListNew))
return locYNew, urlListNew
else:
return None, None
def GetWholePage(self):
locY, urlList = PROFILE().GetLocY()
loadFailCount = 0
while 1:
pageDownJS = 'document.documentElement.scrollTop=100000000'
fxDriver.execute_script(pageDownJS)
while 1:
locYNew, urlListNew = PROFILE().JudgeLoading(locY, urlList)
if locYNew == None:
loadFailCount += 1
if loadFailCount > 20:
return urlList
else:
loadFailCount = 0
locY = locYNew
urlList = urlListNew
break
def Main(self, profileUrl):
try:
fxDriver.get(profileUrl)
urlList = PROFILE().GetWholePage()
return urlList
except Exception as e:
print(e)
解释:可以通过调用PROFILE().Main(profileUrl)
获得账户页中所有帖子的链接。
4. 爬取并下载帖子页中的信息、图片和视频
这一步的基本结构如下:
def Main(self, url):
try:
fxDriver.get(url)
html = fxDriver.page_source
info = POST().GetInfo(html)
POST().DownloadInfo(info)
POST().DownloadFile(info)
except Exception as e:
print(e)
解释:浏览器先访问帖子页;然后通过fxDriver.page_source
获取帖子页的HTML;POST().GetInfo(html)
可以通过分析HTML获得用户上传帖子的时间(当前时区)、用户名称(Username)、用户全称(Full name)、帖子文字、点赞数、评论数、图片描述(当帖子中有图片时)、图片链接(当帖子中有图片时)、视频观看数(当帖子中有视频时)、视频链接(当帖子中有视频时)等信息;POST().DownloadInfo(info)
把信息写入文本文档;POST().DownloadFile(info)
根据获取的信息下载帖子页中的图片和视频。
POST().GetInfo(html)
如下:
def GetInfo(self, html):
soup = BeautifulSoup(html, 'html.parser')
for s in soup.find_all('script', {'type':'text/javascript'}):
if s.string is not None and 'graphql' in s.string:
jsonData = json.loads(s.string[s.string.find('{'): s.string.rfind('}') + 1])
break
uploadTimeStamp = jsonData['graphql']['shortcode_media']['taken_at_timestamp']
uploadTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploadTimeStamp))
username = jsonData['graphql']['shortcode_media']['owner']['username']
fullName = jsonData['graphql']['shortcode_media']['owner']['full_name']
likes = jsonData['graphql']['shortcode_media']['edge_media_preview_like']['count']
comments = jsonData['graphql']['shortcode_media']['edge_media_preview_comment']['count']
try:
text = jsonData['graphql']['shortcode_media']['edge_media_to_caption']['edges'][0]['node']['text']
except:
text = 'None'
try:
displayDict = {}
for obj in jsonData['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']:
displayUrl = obj['node']['display_url']
picDescription = obj['node']['accessibility_caption']
displayDict[displayUrl] = picDescription
return uploadTime, username, fullName, likes, comments, text, displayDict, 'ps'
except:
try:
videoUrl = jsonData['graphql']['shortcode_media']['video_url']
videoViewCount = jsonData['graphql']['shortcode_media']['video_view_count']
return uploadTime, username, fullName, likes, comments, text, videoUrl, videoViewCount, 'v'
except:
displayUrl = jsonData['graphql']['shortcode_media']['display_url']
picDescription = jsonData['graphql']['shortcode_media']['accessibility_caption']
return uploadTime, username, fullName, likes, comments, text, displayUrl, picDescription, 'p'
解释:帖子中我们需要的所有信息都在jsonData
中。我们通过判断'graphql'
是否在'type'
属性为'text/javascript'
的script
标签中来获取jsonData
。
POST().DownloadInfo(info)
如下:
def DownloadInfo(self, info):
now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))
uploadTime = info[0]
username = info[1]
fullName = info[2]
likes = info[3]
comments = info[4]
text = info[5]
folder = '{}\\{}\\{}'.format(outputPath, username, uploadTime.replace('-', '').replace(':', '').replace(' ', ''))
try:
os.makedirs(folder)
except Exception as e:
print(e)
with open('{}\\info.txt'.format(folder), 'w', encoding='utf-8') as f:
f.write('Now: {}'.format(now))
f.write('\nUpload time: {}'.format(uploadTime))
f.write('\nUsername: {}'.format(username))
f.write('\nFull name: {}'.format(fullName))
f.write('\nText: {}'.format(text))
f.write('\nLikes: {}'.format(likes))
f.write('\nComments: {}'.format(comments))
if info[-1] == 'ps':
displayDict = info[6]
picIdx = 1
for displayUrl, picDescription in displayDict.items():
f.write('\nPicture {} description: {}'.format(str(picIdx), picDescription))
f.write('\nPicture {} url: {}'.format(str(picIdx), displayUrl))
picIdx += 1
elif info[-1] == 'v':
videoUrl = info[6]
videoViewCount = info[7]
f.write('\nVideo view count: {}'.format(videoViewCount))
f.write('\nVideo url: {}'.format(videoUrl))
elif info[-1] == 'p':
displayUrl = info[6]
picDescription = info[7]
f.write('\nPicture description: {}'.format(picDescription))
f.write('\nPicture url: {}'.format(displayUrl))
解释:outputPath
是全局变量,表示输入文件夹的绝对路径。
POST().DownloadFile(info)
如下:
def DownloadFile(self, info):
uploadTime = info[0]
username = info[1]
folder = '{}\\{}\\{}'.format(outputPath, username, uploadTime.replace('-', '').replace(':', '').replace(' ', ''))
if info[-1] == 'ps':
displayDict = info[6]
i = 1
for displayUrl in displayDict.keys():
os.system('{} --output-document={}\\{}.png --no-check-certificate --execute http_proxy={} --execute https_proxy={} --execute robots=off --continue "{}"'.format(wgetPath, folder, str(i), httpProxy, httpsProxy, displayUrl))
i += 1
elif info[-1] == 'v':
videoUrl = info[6]
os.system('{} --output-document={}\\1.mp4 --no-check-certificate --execute http_proxy={} --execute https_proxy={} --execute robots=off --continue "{}"'.format(wgetPath, folder, httpProxy, httpsProxy, videoUrl))
elif info[-1] == 'p':
displayUrl = info[6]
os.system('{} --output-document={}\\1.png --no-check-certificate --execute http_proxy={} --execute https_proxy={} --execute robots=off --continue "{}"'.format(wgetPath, folder, httpProxy, httpsProxy, displayUrl))
解释:可以参考这篇博文。wgetPath
是全局变量,表示wget.exe
的绝对路径;httpProxy
是全局变量,表示HTTP代理;httpsProxy
是全局变量,表示HTTPS代理。
把上述模块如下整合到类中:
class POST(object):
def GetInfo(self, html):
soup = BeautifulSoup(html, 'html.parser')
for s in soup.find_all('script', {'type':'text/javascript'}):
if s.string is not None and 'graphql' in s.string:
jsonData = json.loads(s.string[s.string.find('{'): s.string.rfind('}') + 1])
break
uploadTimeStamp = jsonData['graphql']['shortcode_media']['taken_at_timestamp']
uploadTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploadTimeStamp))
username = jsonData['graphql']['shortcode_media']['owner']['username']
fullName = jsonData['graphql']['shortcode_media']['owner']['full_name']
likes = jsonData['graphql']['shortcode_media']['edge_media_preview_like']['count']
comments = jsonData['graphql']['shortcode_media']['edge_media_preview_comment']['count']
try:
text = jsonData['graphql']['shortcode_media']['edge_media_to_caption']['edges'][0]['node']['text']
except:
text = 'None'
try:
displayDict = {}
for obj in jsonData['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']:
displayUrl = obj['node']['display_url']
picDescription = obj['node']['accessibility_caption']
displayDict[displayUrl] = picDescription
return uploadTime, username, fullName, likes, comments, text, displayDict, 'ps'
except:
try:
videoUrl = jsonData['graphql']['shortcode_media']['video_url']
videoViewCount = jsonData['graphql']['shortcode_media']['video_view_count']
return uploadTime, username, fullName, likes, comments, text, videoUrl, videoViewCount, 'v'
except:
displayUrl = jsonData['graphql']['shortcode_media']['display_url']
picDescription = jsonData['graphql']['shortcode_media']['accessibility_caption']
return uploadTime, username, fullName, likes, comments, text, displayUrl, picDescription, 'p'
def DownloadInfo(self, info):
now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))
uploadTime = info[0]
username = info[1]
fullName = info[2]
likes = info[3]
comments = info[4]
text = info[5]
folder = '{}\\{}\\{}'.format(outputPath, username, uploadTime.replace('-', '').replace(':', '').replace(' ', ''))
try:
os.makedirs(folder)
except Exception as e:
print(e)
with open('{}\\info.txt'.format(folder), 'w', encoding='utf-8') as f:
f.write('Now: {}'.format(now))
f.write('\nUpload time: {}'.format(uploadTime))
f.write('\nUsername: {}'.format(username))
f.write('\nFull name: {}'.format(fullName))
f.write('\nText: {}'.format(text))
f.write('\nLikes: {}'.format(likes))
f.write('\nComments: {}'.format(comments))
if info[-1] == 'ps':
displayDict = info[6]
picIdx = 1
for displayUrl, picDescription in displayDict.items():
f.write('\nPicture {} description: {}'.format(str(picIdx), picDescription))
f.write('\nPicture {} url: {}'.format(str(picIdx), displayUrl))
picIdx += 1
elif info[-1] == 'v':
videoUrl = info[6]
videoViewCount = info[7]
f.write('\nVideo view count: {}'.format(videoViewCount))
f.write('\nVideo url: {}'.format(videoUrl))
elif info[-1] == 'p':
displayUrl = info[6]
picDescription = info[7]
f.write('\nPicture description: {}'.format(picDescription))
f.write('\nPicture url: {}'.format(displayUrl))
def DownloadFile(self, info):
uploadTime = info[0]
username = info[1]
folder = '{}\\{}\\{}'.format(outputPath, username, uploadTime.replace('-', '').replace(':', '').replace(' ', ''))
if info[-1] == 'ps':
displayDict = info[6]
i = 1
for displayUrl in displayDict.keys():
os.system('{} --output-document={}\\{}.png --no-check-certificate --execute http_proxy={} --execute https_proxy={} --execute robots=off --continue "{}"'.format(wgetPath, folder, str(i), httpProxy, httpsProxy, displayUrl))
i += 1
elif info[-1] == 'v':
videoUrl = info[6]
os.system('{} --output-document={}\\1.mp4 --no-check-certificate --execute http_proxy={} --execute https_proxy={} --execute robots=off --continue "{}"'.format(wgetPath, folder, httpProxy, httpsProxy, videoUrl))
elif info[-1] == 'p':
displayUrl = info[6]
os.system('{} --output-document={}\\1.png --no-check-certificate --execute http_proxy={} --execute https_proxy={} --execute robots=off --continue "{}"'.format(wgetPath, folder, httpProxy, httpsProxy, displayUrl))
def Main(self, url):
try:
fxDriver.get(url)
html = fxDriver.page_source
info = POST().GetInfo(html)
POST().DownloadInfo(info)
POST().DownloadFile(info)
except Exception as e:
print(e)
解释:可以通过调用POST().Main(url)
下载每一个帖子的信息、图片和视频。
5. 完整代码
from selenium import webdriver
from bs4 import BeautifulSoup
import json, time, os
socksPort =
httpPort =
sslPort =
fxProfile = webdriver.firefox.firefox_profile.FirefoxProfile()
fxProfile.set_preference('network.proxy.type', 1)
fxProfile.set_preference('network.proxy.socks', '127.0.0.1')
fxProfile.set_preference('network.proxy.socks_port', socksPort)
fxProfile.set_preference('network.proxy.http', '127.0.0.1')
fxProfile.set_preference('network.proxy.http_port', httpPort)
fxProfile.set_preference('network.proxy.ssl', '127.0.0.1')
fxProfile.set_preference('network.proxy.ssl_port', sslPort)
fxProfile.set_preference('network.proxy.socks_remote_dns', True)
fxProfile.set_preference('network.trr.mode', 2)
fxProfile.set_preference('permissions.default.image', 2)
fxProfile.set_preference('intl.accept_languages', 'zh-CN, zh, zh-TW, zh-HK, en-US, en')
fxBinaryPath = ''
geckodriverPath = ''
fxDriver = webdriver.firefox.webdriver.WebDriver(firefox_profile=fxProfile, firefox_binary=fxBinaryPath, executable_path=geckodriverPath)
account = ''
password = ''
fxDriver.get('https://www.instagram.com/accounts/login/')
webdriver.support.ui.WebDriverWait(fxDriver, 100).until(lambda x: x.find_element_by_name('username'))
fxDriver.find_element_by_name('username').send_keys(account)
fxDriver.find_element_by_name('password').send_keys(password)
fxDriver.find_element_by_xpath('//button[@type="submit"]').click()
outputPath = ''
wgetPath = ''
httpProxy = 'http://127.0.0.1:{}/'.format(str(httpPort))
httpsProxy = 'https://127.0.0.1:{}/'.format(str(sslPort))
class POST(object):
def GetInfo(self, html):
soup = BeautifulSoup(html, 'html.parser')
for s in soup.find_all('script', {'type':'text/javascript'}):
if s.string is not None and 'graphql' in s.string:
jsonData = json.loads(s.string[s.string.find('{'): s.string.rfind('}') + 1])
break
uploadTimeStamp = jsonData['graphql']['shortcode_media']['taken_at_timestamp']
uploadTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploadTimeStamp))
username = jsonData['graphql']['shortcode_media']['owner']['username']
fullName = jsonData['graphql']['shortcode_media']['owner']['full_name']
likes = jsonData['graphql']['shortcode_media']['edge_media_preview_like']['count']
comments = jsonData['graphql']['shortcode_media']['edge_media_preview_comment']['count']
try:
text = jsonData['graphql']['shortcode_media']['edge_media_to_caption']['edges'][0]['node']['text']
except:
text = 'None'
try:
displayDict = {}
for obj in jsonData['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']:
displayUrl = obj['node']['display_url']
picDescription = obj['node']['accessibility_caption']
displayDict[displayUrl] = picDescription
return uploadTime, username, fullName, likes, comments, text, displayDict, 'ps'
except:
try:
videoUrl = jsonData['graphql']['shortcode_media']['video_url']
videoViewCount = jsonData['graphql']['shortcode_media']['video_view_count']
return uploadTime, username, fullName, likes, comments, text, videoUrl, videoViewCount, 'v'
except:
displayUrl = jsonData['graphql']['shortcode_media']['display_url']
picDescription = jsonData['graphql']['shortcode_media']['accessibility_caption']
return uploadTime, username, fullName, likes, comments, text, displayUrl, picDescription, 'p'
def DownloadInfo(self, info):
now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))
uploadTime = info[0]
username = info[1]
fullName = info[2]
likes = info[3]
comments = info[4]
text = info[5]
folder = '{}\\{}\\{}'.format(outputPath, username, uploadTime.replace('-', '').replace(':', '').replace(' ', ''))
try:
os.makedirs(folder)
except Exception as e:
print(e)
with open('{}\\info.txt'.format(folder), 'w', encoding='utf-8') as f:
f.write('Now: {}'.format(now))
f.write('\nUpload time: {}'.format(uploadTime))
f.write('\nUsername: {}'.format(username))
f.write('\nFull name: {}'.format(fullName))
f.write('\nText: {}'.format(text))
f.write('\nLikes: {}'.format(likes))
f.write('\nComments: {}'.format(comments))
if info[-1] == 'ps':
displayDict = info[6]
picIdx = 1
for displayUrl, picDescription in displayDict.items():
f.write('\nPicture {} description: {}'.format(str(picIdx), picDescription))
f.write('\nPicture {} url: {}'.format(str(picIdx), displayUrl))
picIdx += 1
elif info[-1] == 'v':
videoUrl = info[6]
videoViewCount = info[7]
f.write('\nVideo view count: {}'.format(videoViewCount))
f.write('\nVideo url: {}'.format(videoUrl))
elif info[-1] == 'p':
displayUrl = info[6]
picDescription = info[7]
f.write('\nPicture description: {}'.format(picDescription))
f.write('\nPicture url: {}'.format(displayUrl))
def DownloadFile(self, info):
uploadTime = info[0]
username = info[1]
folder = '{}\\{}\\{}'.format(outputPath, username, uploadTime.replace('-', '').replace(':', '').replace(' ', ''))
if info[-1] == 'ps':
displayDict = info[6]
i = 1
for displayUrl in displayDict.keys():
os.system('{} --output-document={}\\{}.png --no-check-certificate --execute http_proxy={} --execute https_proxy={} --execute robots=off --continue "{}"'.format(wgetPath, folder, str(i), httpProxy, httpsProxy, displayUrl))
i += 1
elif info[-1] == 'v':
videoUrl = info[6]
os.system('{} --output-document={}\\1.mp4 --no-check-certificate --execute http_proxy={} --execute https_proxy={} --execute robots=off --continue "{}"'.format(wgetPath, folder, httpProxy, httpsProxy, videoUrl))
elif info[-1] == 'p':
displayUrl = info[6]
os.system('{} --output-document={}\\1.png --no-check-certificate --execute http_proxy={} --execute https_proxy={} --execute robots=off --continue "{}"'.format(wgetPath, folder, httpProxy, httpsProxy, displayUrl))
def Main(self, url):
try:
fxDriver.get(url)
html = fxDriver.page_source
info = POST().GetInfo(html)
POST().DownloadInfo(info)
POST().DownloadFile(info)
except Exception as e:
print(e)
class PROFILE(object):
def GetLocY(self):
urlList = []
for e in fxDriver.find_elements_by_tag_name('a'):
try:
url = e.get_attribute('href')
if '/p/' in url:
locY = e.location['y']
urlList.append(url)
except:
continue
return locY, urlList
def JudgeLoading(self, locY, urlList):
time.sleep(0.5)
locYNew, urlListNew = PROFILE().GetLocY()
if locY < locYNew:
urlListNew += urlList
urlListNew = list(set(urlListNew))
return locYNew, urlListNew
else:
return None, None
def GetWholePage(self):
locY, urlList = PROFILE().GetLocY()
loadFailCount = 0
while 1:
pageDownJS = 'document.documentElement.scrollTop=100000000'
fxDriver.execute_script(pageDownJS)
while 1:
locYNew, urlListNew = PROFILE().JudgeLoading(locY, urlList)
if locYNew == None:
loadFailCount += 1
if loadFailCount > 20:
return urlList
else:
loadFailCount = 0
locY = locYNew
urlList = urlListNew
break
def Main(self, profileUrl):
try:
fxDriver.get(profileUrl)
urlList = PROFILE().GetWholePage()
return urlList
except Exception as e:
print(e)
def Main():
profileUrl = input('Please input the instagram profile link: ')
urlList = PROFILE().Main(profileUrl)
for url in urlList:
POST().Main(url)
Main()
if __name__ == '__main__':
Main()