一、前言
發現了一個網站 https://wall.alphacoders.com/ ,圖片質量挺高,正好最近在學習go,就用go下載圖片到本地
我記錄一下,以便以後查看,代碼肯定是有所不足,大家別笑話,看看就好。
[起始頁截圖 https://wall.alphacoders.com/finding_wallpapers.php ]
二、大概思路
1、https://wall.alphacoders.com/finding_wallpapers.php 作爲入口地址
2、根據入口地址獲取每個分類的title和href
3、處理每個分類:進入分類頁面,獲取到最後一頁的地址
4、知道最後一頁的地址,從第一頁循環到最後一頁,獲取每頁的數據
5、循環匹配每頁中原圖的地址(還是個頁面,並非圖片)
6、在原圖地址中獲取待下載的圖片地址:$(’.img.main-content’).attr(‘src’)
7、保存圖片到本地,如果已保存,則不重複保存(注意:如果程序中途結束,有些圖片是不完整的,不完整的圖片是不會再次下載的)
【注意】:如果網頁的標籤有所變動,代碼肯定是會報錯的
【延伸】:
如果 goquery 模塊不存在,cmd中執行 [ go get github.com/opesun/goquery ] 即可
這個html解析器好像不是很強,可以用正則匹配地址或者用其他的html解析器
這個可以優化一下,用多線程執行
三、代碼
package main
import (
"fmt"
"io"
"log"
"net/http"
"os"
"strconv"
"strings"
//"github.com/PuerkitoBio/goquery" //這個模塊配置環境經常出錯就沒有選擇,我這裏顯示該模塊某些方法已棄用
"github.com/opesun/goquery"
)
const DOMAIN string = "https://wall.alphacoders.com/"
func main() {
start() //下載所有
//自定義分類下載:分類數據在 log.txt 文件中獲取
/*custom := [10][2]string{
{"41276Women", "https://wall.alphacoders.com/by_category.php?id=33&name=Women+Wallpapers"},
{"1545Game", "https://wall.alphacoders.com/by_category.php?id=14&name=Game+Wallpapers"}}
customStart(custom)*/
}
//自定義分類鏈接下載
func customStart(custom [10][2]string) {
fmt.Println("開始下載...")
for _, value := range custom{
if len(value) == 0 { //爲空,跳過後面的步驟,執行下一個循環
continue
}
//獲取鍵(標題)值(url)地址
title := value[0]
href := value[1]
dir := createDir(title) //創建文件夾名爲分類的文件夾
handleOneUrl(href, dir) //處理單個分類的數據
}
fmt.Println("結束下載!!!")
}
func start(){
fmt.Println("開始下載...")
titleToUrls := getUrls() //獲取到title - href 的map數據
//fmt.Println(titleToUrls)
//循環,以title爲文件夾名,處理每個分類下面的分頁數據
for _, value := range titleToUrls{
if len(value) == 0 { //一維數組爲空,跳過後面的步驟,執行下一個循環
continue
}
//獲取鍵(標題)值(url)地址
title := value[0]
href := value[1]
dir := createDir(title)
handleOneUrl(href, dir)
}
fmt.Println("結束下載!!!")
}
//處理單個分類URL的數據(包含分頁數據)
func handleOneUrl(url string, dir string){
doc, err := goquery.ParseUrl(url)
if err != nil {//hidden-sm
fmt.Println("error to get url html:", err.Error())
return
}
liList := doc.Find("li") //匹配到這個分類頁面裏所有li標籤
lastUrl := ""
length := liList.Length()
isLast := false //是否是最後一頁,我們認爲在 ... 後面的一個url是最後一頁
// 這個for循環就是動態獲取分類最後一頁的url地址
for i := 0; i < length; i++{ //獲取到最後一頁的ID
text := liList.Find("a").Eq(i).Text()
href := liList.Find("a").Eq(i).Attr("href")
if isLast { //isLast爲true,是最後一頁,保留最後一頁的Url,結束本次循環
lastUrl = href
break
}
if strings.Index(text, "...") != -1 { //...存在,認爲下一個數據爲最後一頁,修改isLast爲true
isLast = true
}
}
//如果lastUrl爲空,返回,結束該分類的後續操作
if lastUrl == "" {
fmt.Println("Not Found Last Page:", url)
return
}
//把該分類最後一頁的地址 https://wall.alphacoders.com/by_resolution.php?w=2560&h=1440&page=474 拆分爲 https://wall.alphacoders.com/by_resolution.php?w=2560&h=1440&page= 和 474
lastEqIndex := strings.LastIndex(lastUrl, "=") //獲取最後一個 = 在url中的索引
startUrl := lastUrl[:lastEqIndex+1] //https://wall.alphacoders.com/by_resolution.php?w=2560&h=1440&page=
lastPage := lastUrl[lastEqIndex+1:] //最後一頁:474
//循環處理單個分類的每一頁數據
handleOnePage(startUrl, lastPage, dir)
}
//循環處理單個分類的每一頁數據
func handleOnePage(startUrl string, lastPage string, dir string){
endPage,_ := strconv.Atoi(lastPage) //string轉int
for i := 1; i <= endPage; i++ { //每頁數據,頁碼爲 [1, lastPage]
pageUrl := startUrl + strconv.Itoa(i) //獲取每頁的url
pageDoc, err := goquery.ParseUrl(pageUrl)
if err != nil { //如果該頁數據不存在,則打印日誌,繼續執行下一頁
fmt.Println("PageUrl error:", err.Error())
continue
}
//獲取該的所有圖片原圖頁面
urls := pageDoc.Find("div.boxgrid")
length := urls.Length()
for i:=0; i < length; i++ {
suffixUrl := urls.Find("a").Eq(i).Attr("href") //獲取每張圖片原圖頁面(原圖地址在原圖頁面中):big.php?i=1079534
detailUrl := DOMAIN + suffixUrl //大圖的URL地址
//獲取最後大圖的地址
pageDetailDoc, err := goquery.ParseUrl(detailUrl)
if err != nil {
fmt.Println("PageDetailUrl error:", err.Error())
continue
}
//獲取最終URL:https://images8.alphacoders.com/107/thumb-1920-1079562.jpg
finalUrl := pageDetailDoc.Find("img.main-content").Attr("src") //獲取每張圖片原圖地址:把該地址的圖片下載保存到本地
//設置文件名:獲取 https://images8.alphacoders.com/107/thumb-1920-1079562.jpg 的 thumb-1920-1079562.jpg 作爲文件名
lastEqIndex := strings.LastIndex(finalUrl, "/")
fileName := finalUrl[lastEqIndex+1:] //最後一個/後面的內容
savePath := dir + "/" + fileName //保存到本地的地址:地址 + / + 圖片名
saveFile(finalUrl, savePath) //下載圖片到本地
}
}
}
//保存文件操作(如果程序被強行結束,下載的某些圖片可能不完整)
func saveFile(finalUrl string, savePath string){
//判斷文件是否存在,如果存在,說明已經下載了,直接return,下載下一張圖片
_, err := os.Stat(savePath)
if err == nil {
fmt.Printf("圖片 [ %s ] 已下載\n", finalUrl)
return
}
//創建文件:如果文件已存在,會將文件清空
file, err := os.Create(savePath)
if err != nil {
fmt.Println("createFile error:", err.Error())
return
}
defer file.Close()
//讀取url的信息,存入到文件
resp, err := http.Get(finalUrl)
if err != nil {
fmt.Println("imageGet error:", err.Error())
return
}
defer file.Close()
buf := make([]byte, 4096)
for {
res, err2 := resp.Body.Read(buf)
if res == 0{
break
}
if err2 != nil && err2 != io.EOF {
err = err2
return
}
//寫入文件
file.Write(buf[:res])
}
//循環結束,我們認爲圖片已經下載成功,控制檯輸出提示
fmt.Printf("圖片 [ %s ] 下載 [ %s ] 成功\n", finalUrl, savePath)
}
//創建名爲dirName的文件夾,用於存放改分類的圖片
func createDir(dirName string) string{
dir := "./images/" + dirName
//如果文件夾不存在,則創建
_, err := os.Stat(dir)
if err != nil {
//os.Mkdir(dir, os.ModePerm) //創建單個文件夾
os.MkdirAll(dir, os.ModePerm) //遞歸創建文件夾
}
return dir
}
//獲取title,href二維數組結果數據
func getUrls() [100][2]string{
url := "https://wall.alphacoders.com/finding_wallpapers.php" //搜索頁面作爲入口
doc, err := goquery.ParseUrl(url)
if err != nil { //如果入口頁數據獲取失敗,直接退出程序
fmt.Println("SearchPage error:", err)
os.Exit(0) //結束程序
}
//定義二維數組,接收每個分類的title,href
result := [100][2]string{}
urls := doc.Find("a.list-group-item") //匹配入口頁的url所在的標籤
length := urls.Length() //獲取匹配到的標籤長度,用於循環;urls.Length()可以直接寫在for中,但是那樣的話每次循環都要執行一次該函數,影響效率
for i:=0; i < length; i++{
text := urls.Eq(i).Text() //匹配到html標籤裏的text內容
text = strings.Replace(text, " ", "", -1) //去除text內容的空格
text = strings.Replace(text, "\n", "", -1) //去除text內容的換行
href := urls.Eq(i).Attr("href") //獲取當前分類的url
//去除爲 # 的href
if href == "#" { //如果href爲#,則後面的語句不執行,執行下一次循環
continue
}
//如果沒有字符http的拼接上域名
if strings.Index(href, "http") == -1 {
href = DOMAIN + href
}
logger(text + " - " + href, "log.txt") //記錄日誌
//數組數據追加:數組不是連貫的數組(會有爲空的情況),因爲前面有continue操作;另外:設置了數組長度100,這裏只有80多個分類,所以後面10多個一維數組都是空的。
result[i][0] = text
result[i][1] = href
}
return result //返回title,href二維數組結果數據
}
//寫日誌
func logger(content string, logFile string){
//判斷文件是否存在,不存在則創建
_, err := os.Stat(logFile)
if err != nil {
os.Create(logFile)
}
//設置文件類型爲追加
file, err := os.OpenFile(logFile, os.O_APPEND|os.O_CREATE, 666)
if err != nil {
log.Fatalln("fail to open log file")
return
}
logger := log.New(file, "", log.LstdFlags) //設置日誌的記錄格式
logger.Println(content) //記錄(寫入)日誌
}
四、打印
控制檯 輸出的內容如下
五、日誌
log.txt 文件保存的內容如下
2020/05/20 12:31:14 MobileVersion - https://mobile.alphacoders.com
2020/05/20 12:31:14 NewestWallpapers - https://wall.alphacoders.com/newest_wallpapers.php
2020/05/20 12:31:14 FeaturedWallpapers - https://wall.alphacoders.com/featured.php
2020/05/20 12:31:14 CreatorWallpapers - https://wall.alphacoders.com/by_creator.php
2020/05/20 12:31:14 AuthorsAtWallpaperAbyss - https://wall.alphacoders.com/authors.php
2020/05/20 12:31:14 HDWallpapers - https://wall.alphacoders.com/by_resolution.php?w=1920&h=1080
2020/05/20 12:31:14 UltraHD4kWallpapers - https://wall.alphacoders.com/by_resolution.php?w=3840&h=2160
2020/05/20 12:31:14 Retina5kWallpapers - https://wall.alphacoders.com/by_resolution.php?w=5120&h=2880
2020/05/20 12:31:14 UltraHD8kWallpapers - https://wall.alphacoders.com/by_resolution.php?w=7680&h=4320
2020/05/20 12:31:14 PopularWallpapers - https://wall.alphacoders.com/popular.php
2020/05/20 12:31:14 CCOWallpapers - https://wall.alphacoders.com/by_license.php?filter=4
2020/05/20 12:31:14 RandomWallpapers - https://wall.alphacoders.com/random.php
2020/05/20 12:31:14 RecentComments - https://wall.alphacoders.com/comments.php
2020/05/20 12:31:14 RecentCaptions - https://wall.alphacoders.com/captions.php
2020/05/20 12:31:14 HighestRated - https://wall.alphacoders.com/highest_rated.php
2020/05/20 12:31:14 MostViewed - https://wall.alphacoders.com/by_views.php
2020/05/20 12:31:14 MostFavorited - https://wall.alphacoders.com/by_favorites.php
2020/05/20 12:31:14 MostCommentedOn - https://wall.alphacoders.com/by_comments.php
2020/05/20 12:31:14 PopularCollections - https://alphacoders.com/collections
2020/05/20 12:31:14 ByTag - https://wall.alphacoders.com/all_tags.php
2020/05/20 12:31:14 ByColor - https://wall.alphacoders.com/by_color.php
2020/05/20 12:31:14 iPhone11 - https://mobile.alphacoders.com/by-device/540/iPhone-11-Wallpapers?ref=wa
2020/05/20 12:31:14 iPhoneX - https://mobile.alphacoders.com/by-device/450/iPhone-X-Wallpapers?ref=wa
2020/05/20 12:31:14 GalaxyNote10 - https://mobile.alphacoders.com/by-device/543/Galaxy-Note10-Wallpapers?ref=wa
2020/05/20 12:31:14 GooglePixel4 - https://mobile.alphacoders.com/by-device/551/Pixel-4-Wallpapers?ref=wa
2020/05/20 12:31:14 18300Abstract - https://wall.alphacoders.com/by_category.php?id=1&name=Abstract+Wallpapers
2020/05/20 12:31:14 53634Animal - https://wall.alphacoders.com/by_category.php?id=2&name=Animal+Wallpapers
2020/05/20 12:31:14 180969Anime - https://wall.alphacoders.com/by_category.php?id=3&name=Anime+Wallpapers
2020/05/20 12:31:14 20318Artistic - https://wall.alphacoders.com/by_category.php?id=4&name=Artistic+Wallpapers
2020/05/20 12:31:14 30116Celebrity - https://wall.alphacoders.com/by_category.php?id=7&name=Celebrity+Wallpapers
2020/05/20 12:31:14 24640Comics - https://wall.alphacoders.com/by_category.php?id=8&name=Comics+Wallpapers
2020/05/20 12:31:14 6087Dark - https://wall.alphacoders.com/by_category.php?id=9&name=Dark+Wallpapers
2020/05/20 12:31:14 61436Earth - https://wall.alphacoders.com/by_category.php?id=10&name=Earth+Wallpapers
2020/05/20 12:31:14 23407Fantasy - https://wall.alphacoders.com/by_category.php?id=11&name=Fantasy+Wallpapers
2020/05/20 12:31:14 17620Food - https://wall.alphacoders.com/by_category.php?id=12&name=Food+Wallpapers
2020/05/20 12:31:14 1545Game - https://wall.alphacoders.com/by_category.php?id=14&name=Game+Wallpapers
2020/05/20 12:31:14 7893Holiday - https://wall.alphacoders.com/by_category.php?id=15&name=Holiday+Wallpapers
2020/05/20 12:31:14 2398Humor - https://wall.alphacoders.com/by_category.php?id=13&name=Humor+Wallpapers
2020/05/20 12:31:14 33340ManMade - https://wall.alphacoders.com/by_category.php?id=16&name=Man+Made+Wallpapers
2020/05/20 12:31:14 523Men - https://wall.alphacoders.com/by_category.php?id=17&name=Men+Wallpapers
2020/05/20 12:31:14 10226Military - https://wall.alphacoders.com/by_category.php?id=18&name=Military+Wallpapers
2020/05/20 12:31:14 4026Misc - https://wall.alphacoders.com/by_category.php?id=19&name=Misc+Wallpapers
2020/05/20 12:31:14 45031Movie - https://wall.alphacoders.com/by_category.php?id=20&name=Movie+Wallpapers
2020/05/20 12:31:14 24587Music - https://wall.alphacoders.com/by_category.php?id=22&name=Music+Wallpapers
2020/05/20 12:31:14 17927Photography - https://wall.alphacoders.com/by_category.php?id=24&name=Photography+Wallpapers
2020/05/20 12:31:14 992Products - https://wall.alphacoders.com/by_category.php?id=25&name=Products+Wallpapers
2020/05/20 12:31:14 3168Religious - https://wall.alphacoders.com/by_category.php?id=26&name=Religious+Wallpapers
2020/05/20 12:31:14 17904SciFi - https://wall.alphacoders.com/by_category.php?id=27&name=Sci+Fi+Wallpapers
2020/05/20 12:31:14 14990Sports - https://wall.alphacoders.com/by_category.php?id=28&name=Sports+Wallpapers
2020/05/20 12:31:14 23041TVShow - https://wall.alphacoders.com/by_category.php?id=29&name=TV+Show+Wallpapers
2020/05/20 12:31:14 4204Technology - https://wall.alphacoders.com/by_category.php?id=30&name=Technology+Wallpapers
2020/05/20 12:31:14 67145Vehicles - https://wall.alphacoders.com/by_category.php?id=31&name=Vehicles+Wallpapers
2020/05/20 12:31:14 90168VideoGame - https://wall.alphacoders.com/by_category.php?id=32&name=Video+Game+Wallpapers
2020/05/20 12:31:14 1913Weapons - https://wall.alphacoders.com/by_category.php?id=34&name=Weapons+Wallpapers
2020/05/20 12:31:14 41289Women - https://wall.alphacoders.com/by_category.php?id=33&name=Women+Wallpapers
2020/05/20 12:31:14 68451280x960 - https://wall.alphacoders.com/by_resolution.php?w=1280&h=960
2020/05/20 12:31:14 130221280x1024 - https://wall.alphacoders.com/by_resolution.php?w=1280&h=1024
2020/05/20 12:31:14 41271281x961 - https://wall.alphacoders.com/by_resolution.php?w=1281&h=961
2020/05/20 12:31:14 42381366x768 - https://wall.alphacoders.com/by_resolution.php?w=1366&h=768
2020/05/20 12:31:14 60081440x900 - https://wall.alphacoders.com/by_resolution.php?w=1440&h=900
2020/05/20 12:31:14 55191600x900 - https://wall.alphacoders.com/by_resolution.php?w=1600&h=900
2020/05/20 12:31:14 20321600x1000 - https://wall.alphacoders.com/by_resolution.php?w=1600&h=1000
2020/05/20 12:31:14 268361600x1200 - https://wall.alphacoders.com/by_resolution.php?w=1600&h=1200
2020/05/20 12:31:14 150881680x1050 - https://wall.alphacoders.com/by_resolution.php?w=1680&h=1050
2020/05/20 12:31:14 1819081920x1080 - https://wall.alphacoders.com/by_resolution.php?w=1920&h=1080
2020/05/20 12:31:14 1109811920x1200 - https://wall.alphacoders.com/by_resolution.php?w=1920&h=1200
2020/05/20 12:31:14 76841920x1280 - https://wall.alphacoders.com/by_resolution.php?w=1920&h=1280
2020/05/20 12:31:14 77931920x1440 - https://wall.alphacoders.com/by_resolution.php?w=1920&h=1440
2020/05/20 12:31:14 28732000x1333 - https://wall.alphacoders.com/by_resolution.php?w=2000&h=1333
2020/05/20 12:31:14 37412048x1152 - https://wall.alphacoders.com/by_resolution.php?w=2048&h=1152
2020/05/20 12:31:14 131212048x1365 - https://wall.alphacoders.com/by_resolution.php?w=2048&h=1365
2020/05/20 12:31:14 21762048x1366 - https://wall.alphacoders.com/by_resolution.php?w=2048&h=1366
2020/05/20 12:31:14 29052048x1367 - https://wall.alphacoders.com/by_resolution.php?w=2048&h=1367
2020/05/20 12:31:14 52452048x1536 - https://wall.alphacoders.com/by_resolution.php?w=2048&h=1536
2020/05/20 12:31:14 142022560x1440 - https://wall.alphacoders.com/by_resolution.php?w=2560&h=1440
2020/05/20 12:31:14 399912560x1600 - https://wall.alphacoders.com/by_resolution.php?w=2560&h=1600
2020/05/20 12:31:14 20882560x1920 - https://wall.alphacoders.com/by_resolution.php?w=2560&h=1920
2020/05/20 12:31:14 58902880x1800 - https://wall.alphacoders.com/by_resolution.php?w=2880&h=1800
2020/05/20 12:31:14 52063000x2000 - https://wall.alphacoders.com/by_resolution.php?w=3000&h=2000
2020/05/20 12:31:14 199733840x2160 - https://wall.alphacoders.com/by_resolution.php?w=3840&h=2160
2020/05/20 12:31:14 50833840x2400 - https://wall.alphacoders.com/by_resolution.php?w=3840&h=2400
2020/05/20 12:31:14 18775184x3456 - https://wall.alphacoders.com/by_resolution.php?w=5184&h=3456
2020/05/20 12:31:14 19555616x3744 - https://wall.alphacoders.com/by_resolution.php?w=5616&h=3744
2020/05/20 12:31:14 21265760x3840 - https://wall.alphacoders.com/by_resolution.php?w=5760&h=3840
2020/05/20 12:31:14 26816000x4000 - https://wall.alphacoders.com/by_resolution.php?w=6000&h=4000