這是我的第一個爬蟲項目,也是我第一次接觸c# 窗體程序。
我的需求:頁面中有音頻文件但是它時單個下載的,用戶需要一個一個的去點擊下載按鈕進行下載,我的目的:根據用戶的需求篩選出相關的數據,然後我拿到頁面上用戶篩選的數據,實現批量下載,然後將下載並存放到用戶本地文件夾中,然後對下載下來的這些文件進行播放。
主要用到的插件有:CefSharp HtmlAgilityPack
將瀏覽器頁面嵌入到winForm中
將web頁面嵌入到winForm的界面中
//窗體load時執行下面方法
private void Form1_Load(object sender, EventArgs e)
{
CefSettings settings = new CefSettings();
Cef.Initialize(settings);
webbrowser = new ChromiumWebBrowser(“要嵌入的web地址”);
webbrowser.Dock = DockStyle.Fill;
this.pnlTop.Controls.Add(webbrowser);
webbrowser.FrameLoadEnd += Webbrowser_FrameLoadEnd;//註冊窗體加載事件onload
webbrowser.FrameLoadEnd += SetCookie;
}
下面是獲取web頁面的url地址做相應的操作
private void Webbrowser_FrameLoadEnd(object sender, FrameLoadEndEventArgs e)
{
if (e.Frame.IsMain)
{
if (e.Frame.Url == "頁面的url地址(不同的地址處理不同的事情)")
{
string listPage = "想要跳轉的頁面地址";
string js = "window.location.href='" + listPage + "';";
this.webbrowser.ExecuteScriptAsync(js);//將這段js添加到web頁面中,它會執行此跳轉
return;
}
if (e.Frame.Url == "url1")
{
string html = "";
e.Frame.GetSourceAsync().ContinueWith(task =>//異步執行
{
html = task.Result;//抓取到的頁面,然後分析頁面的代碼結構拿到想要的數據
String filePath = SavaProcess(html);
});
return;
}
if (e.Frame.Url == "url2")
{
e.Frame.GetSourceAsync().ContinueWith(task =>
{
string htmlDom = task.Result;
var doc = new HtmlDocument();
doc.LoadHtml(htmlDom);//可以將html頁面,使可以用類似於操作dom的一些方法來操作
//拿到總頁數
request requoption = new request();
requoption.Method = "POST";
//下面是根據抓取到的實際的頁面結構,和具體的也去需求,去獲取頁面上的數據
var pageTr = doc.DocumentNode.SelectNodes(@"/html[1]/body[1]/div[3]/table[1]/tbody[1]/tr[@class='forPage']/td[1]/div[1]/div[1]");//選擇標籤數組
if (pageTr.Count > 0)
{
var p = pageTr[0];
var spanNodes = pageTr[0].SelectNodes(@".//span");//取到該節點下的所有span節點
}
}
});
return;
}
}
}
設置cookie方法
private void SetCookie(object sender, CefSharp.FrameLoadEndEventArgs e)
{
var cookieManager = CefSharp.Cef.GetGlobalCookieManager();
CookieVisitor visitor = new CookieVisitor();
visitor.SendCookie += Visitor_SendCookie;
cookieManager.VisitAllCookies(visitor);
}
/// <summary>
/// 將Cookie保存到字典COOKIES中
/// </summary>
/// <param name="obj"></param>
private void Visitor_SendCookie(CefSharp.Cookie obj)
{
lock (lockObject)
{
string key = obj.Domain.TrimStart('.') + "^" + obj.Name;
string value = obj.Value;
if (!cookies.ContainsKey(key))
{
cookies.Add(key, value);
}
else
{
cookies[key] = value;
}
}
}
/// <summary>
/// 將COOKIES解析成System.Net.Cookie
/// </summary>
/// <returns></returns>
private CookieCollection GetCookieCollection()
{
lock (lockObject)
{
CookieCollection cookieCollection = new CookieCollection();
foreach (var keyValue in cookies)
{
System.Net.Cookie cookie = new System.Net.Cookie();
cookie.Domain = keyValue.Key.Split('^')[0];
cookie.Name = keyValue.Key.Split('^')[1];
cookie.Value = keyValue.Value;
cookieCollection.Add(cookie);
}
return cookieCollection;
}
}
下面是已經拿到音頻文件的地址了,然後請求下載地址下載文件
/// <summary>
/// 將文件下載到本地
/// </summary>
public void HttpWebRequestGet(Uri url, string fileName, DataModel data)
{
try
{
HttpWebRequest AudioReq = (HttpWebRequest)HttpWebRequest.Create(url);
AudioReq.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8";
AudioReq.KeepAlive = true;
AudioReq.Referer = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
AudioReq.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36";
AudioReq.Headers.Set("Accept-Encoding", "gzip,deflate");
AudioReq.Headers.Set("Accept-Language", "zh-CN,zh;q=0.9");
AudioReq.Headers.Set("Upgrade-Insecure-Requests", "1");
AudioReq.Headers.Set("Cookie", "JSESSIONID=" + JSESSIONID + ";rememberPass=1;userAccount=" + uid + ";#pwd=" + pwd + ";loginByTwoCode=0");
string responseData = String.Empty;
AudioReq.Method = "GET";
AudioReq.ContentType = "application/x-www-form-urlencoded";
string path = System.AppDomain.CurrentDomain.BaseDirectory + @"AudioList\AMR";
if (!System.IO.Directory.Exists(path))
{
System.IO.Directory.CreateDirectory(path);
}
HttpWebResponse rsp = (HttpWebResponse)AudioReq.GetResponse();//獲取回寫流
//將文件存到本地
var localAmrnb = path + "\\" + fileName;
FileStream fs = new FileStream(localAmrnb, FileMode.Create, FileAccess.Write, FileShare.ReadWrite);//創建本地文件寫入流
data.LocalPath = localAmrnb;
var responseStream = rsp.GetResponseStream(); //創建本地文件寫入流
byte[] bArr = new byte[1024];
int iTotalSize = 0;
int size = responseStream.Read(bArr, 0, (int)bArr.Length);
while (size > 0)
{
iTotalSize += size;
fs.Write(bArr, 0, size);
size = responseStream.Read(bArr, 0, (int)bArr.Length);
}
fs.Close();
responseStream.Close();
rsp.Close();
rsp.Dispose();
}
catch (Exception ex)
{
ex.ToString();
}
}
c#序列化數據並寫入文件
List dataList = new List();
System.IO.StreamWriter file1 = new System.IO.StreamWriter(DownloadDataPath, false);
file1.Write(new JavaScriptSerializer().Serialize(dataList));
file1.Close();
file1.Dispose();
從文件中讀取數據並反序列化
using (System.IO.StreamReader sr = new System.IO.StreamReader(DownloadDataPath, Encoding.UTF8))
{
// 從文件讀取並顯示行,直到文件的末尾
string line = sr.ReadLine();
if (line != null)
{
oldData = line;
}
}
System.IO.StreamWriter file2 = new System.IO.StreamWriter(DownloadDataPath, false);
List oldDataList = new JavaScriptSerializer().Deserialize<List>(oldData);//反序列化讀取到的值
dataList.AddRange(oldDataList);//將新的數據添加到之前數據的末尾
file2.Write(new JavaScriptSerializer().Serialize(dataList));
file2.Close();
file2.Dispose();
下面向窗體中添加mediaPlay播放器
首先添加引用如下圖所示:
其次將mediaPlayer組件添加到工具箱中,菜單欄:工具—>選擇工具箱選項,添加如下組件
添加完之後就可以在工具箱中將組件直接拖到界面上了,
具體實現播放的代碼如下所示
public Boolean getMediaPlayData()
{
this.playMedia.currentPlaylist.clear();
for (int i = 0; i < oldDataList.Count; i++)
{
this.playMedia.currentPlaylist.appendItem(playMedia.newMedia(oldDataList[i].LocalPath));//將所有要播放的文件添加到播放列表
}
return true;
}
/// <summary>
/// 點擊查詢並播放按鈕
/// </summary>
/// <returns></returns>
private void button1_Click(object sender, EventArgs e)
{
if (getMediaPlayData())
{
this.playMedia.settings.autoStart = true;
this.playMedia.settings.setMode("shuffle", false);
this.playMedia.Ctlcontrols.play();
}
}
private void wmp_PlayStateChange(object sender, AxWMPLib._WMPOCXEvents_PlayStateChangeEvent e)
{
//如果已播放完畢就播放下一個文件
if ((WMPLib.WMPPlayState)e.newState == WMPLib.WMPPlayState.wmppsReady) playMedia.Ctlcontrols.play();
}
以上不是完整的代碼。
總體來說把大致的過程和用到的一些技術記錄下來,加深記憶。