/**
* 在小說目錄頁面按F12打開控制檯,複製粘貼,
* 執行本函數下載目錄中的小說章節,
* 參數removeString是你想要刪除的字符串,也可以是字符串數組
*/
function simpleDownloadStoryInCatalogueWeb(removeString=""){
//目錄頁url
const catalogueWebUrl = document.location.origin+document.location.pathname;
//小說名稱
const storyTitle = document.querySelector('title').innerText;
//找到所有的a標籤
const aAll = document.getElementsByTagName('a');
const cataloguesTmp = [];
for(const a of aAll){
//篩選出href以目錄頁開頭的a標籤即認爲是章節目錄
if(a&&a.href&&a.href.startsWith(catalogueWebUrl))
cataloguesTmp.push(a);
}
//移除所有重複的a標籤
for(let l=cataloguesTmp.length,i=l-1;i>=0;--i){
const aHref = cataloguesTmp[i]&&cataloguesTmp[i].href;
if(aHref)
for (let j=0;j<i;++j){
const aj = cataloguesTmp[j];
if(aj&&aHref===aj.href)
cataloguesTmp[j]=null;
}
}
const catalogues = cataloguesTmp.filter(a=>a);
let charset = 'utf-8';
try{
charset = document.querySelector('meta[charset]').getAttribute('charset');
}catch (e) {
}
const textDecoder = new TextDecoder(charset);
const catalogueLen = catalogues.length;
const allStoryStrArr=new Array(catalogueLen);
//循環兩兩移除無用部分
const removeSurplus = (i)=>{
if(i>=catalogueLen)return;
const s1 = allStoryStrArr[i];
const s2 = allStoryStrArr[i+1];
if(s1&&s2){
let x='',b=true;
//移除小說尾部相同的內容
for(let i=1,l1=s1.length,l2=s2.length,l=1>l2?l2:l1;i<=l;++i){
if(!b)break;
const x1 = s1[l1-i];
if(x1!==s2[l2-i])b=false;
else x = x1+x;
}
if(x&&x.length<s1.length){
const rmL = x.length
for(let j=i;j<i+2;++j){
const str = allStoryStrArr[j];
allStoryStrArr[j] = str.substring(0,str.length-rmL);
}
}
//移除小說頭部相同的內容
const t1= catalogues[i].innerText.trim(),t2=catalogues[i+1].innerText.trim();
const titleIdx1 = allStoryStrArr[i].indexOf(t1);
const titleIdx2 = allStoryStrArr[i+1].indexOf(t2);
if(titleIdx1!==0&&titleIdx1!==0){
if(titleIdx1>=1&&titleIdx1+t1.length<s1.length-100)
allStoryStrArr[i]=allStoryStrArr[i].substring(titleIdx1);
if(titleIdx2>=1&&titleIdx2+t2.length<s1.length-100)
allStoryStrArr[i+1]=allStoryStrArr[i+1].substring(titleIdx2);
x = "";
b = true;
for(let i=0,l1=s1.length,l2=s2.length,l=1>l2?l2:l1;i<l;++i){
if(!b)break;
const x1 = s1[i];
if(x1!==s2[i])b=false;
else x = x+x1;
}
if(x&&x.length<s1.length){
const rmL = x.length
allStoryStrArr[i] = allStoryStrArr[i].substring(rmL);
allStoryStrArr[i+1] = allStoryStrArr[i+1].substring(rmL);
}
}
}
removeSurplus(i+2);
}
//處理一下小說內容並下載
const detailStory = ()=>{
console.log("完成小說讀取,正在準備處理內容");
if(Array.isArray(removeString)){
for(let i=0;i<catalogueLen;++i){
for(const str of removeString)
if(str)
allStoryStrArr[i]=allStoryStrArr[i].replaceAll(str,"");
}
}else if(typeof removeString === 'string' && removeString.length>=1){
for(let i=0;i<catalogueLen;++i){
allStoryStrArr[i]=allStoryStrArr[i].replaceAll(removeString,"");
}
}
removeSurplus(0);
console.log('完成部分無關內容移除,正在添加章節換行');
for(let i=0;i<catalogueLen;++i){
if(!allStoryStrArr[i])continue;
allStoryStrArr[i]=allStoryStrArr[i].replaceAll("\n\n","\r\n\r\n\n\n");
allStoryStrArr[i]=allStoryStrArr[i]+"\r\n\r\n";
const ti = catalogues[i].innerText.trim();
if(allStoryStrArr[i].startsWith(ti)){
allStoryStrArr[i] = allStoryStrArr[i].replace(ti,ti+"\r\n");
}else{
allStoryStrArr[i] = ti+"\r\n"+allStoryStrArr[i];
}
}
console.log('小說內容處理完成,正在準備下載');
const a = document.createElement('a');
const name = storyTitle||'小說';
const blob = new Blob([name,"\r\n\r\n\r\n",...allStoryStrArr], {type: 'text/plain'});
const url = window.URL.createObjectURL(blob);
const filename = name+".txt";
a.href = url;
a.download = filename;
a.click();
window.URL.revokeObjectURL(url);
};
//循環查找小說
const doFetch = (idx)=>{
const a = catalogues[idx];
if(a&&a.href){
const title = a.innerText.trim()
fetch(a.href).then(r=>r.arrayBuffer()).then(b=>{
let htmlStr = textDecoder.decode(b);
const hStart='<html>',hEnd='</html>';
const idxS = htmlStr.indexOf(hStart);
const idxE = htmlStr.lastIndexOf(hEnd);
if(idxS>=0 &&idxE>idxS)
htmlStr = htmlStr.substring(idxS+hStart.length,idxE);
const html = document.createElement('html');
html.innerHTML = htmlStr;
for(;;){
//移除無關鏈接文本,腳本標籤
let rmCount = 0;
const aArr = html.getElementsByTagName('a');
for(const inner of aArr){
inner.remove();
rmCount++;
}
const sArr = html.getElementsByTagName('script');
for(const inner of sArr){
inner.remove();
rmCount++;
}
if(rmCount===0)break;
}
allStoryStrArr[idx]=html.innerText;
console.log("完成讀取章節:"+title);
if(idx<catalogueLen){
//如果被攔截就setTimeout(()=>doFetch(idx+1,max),300),等一段時間再下
doFetch(idx+1);
}else detailStory();
});
}else{
if(idx<catalogueLen){
doFetch(idx+1);
}else detailStory();
}
}
doFetch(0);
}
simpleDownloadStoryInCatalogueWeb();
/** 在章節目錄頁或非章節內容頁面創建此對象 */
class DownloadStory{
/**
* 全參構造下載小說對象
* @param {string} cataloguesHtmlUrl 小說章節目錄所在頁面的url
* @param {string} cataloguesCssSelector 章節目錄所在dom的css選擇器
* @param {Array<string>} cataloguesStartWhihString 章節標題以哪些字開頭(其中之一),默認第
* @param {Array<string>} cataloguesMustContainString 章節標題必然包含哪些字(其中之一),默認章
* @param {Array<string>} cataloguesMableContainStringEventNotHaveFirstTwo 章節標題可能包含哪些字,即使不包含前面兩個字段內容(其中之一)
* @param {string} contentCssSelector 章節內容所在dom的css選擇器
* @param {Array<string>} contentStartWithString 章節內容以什麼開頭,如果爲空,則默認爲以章節標題開頭(其中之一)
* @param {Array<string>} contentEndWithString 章節內容以什麼結束(其中之一)
* @param {Array<string>} contentRemoveString 章節內容中需要刪除的字(全部)
* @param {string} charset html頁面的字符集,默認GBK
* @param {string} title 小說標題
*/
constructor(cataloguesHtmlUrl,
cataloguesCssSelector,
cataloguesStartWhihString,
cataloguesMustContainString,
cataloguesMableContainStringEventNotHaveFirstTwo,
contentCssSelector,
contentStartWithString,
contentEndWithString,
contentRemoveString,
charset='GBK',title){
if(title){
this.title = title;
}
if(!cataloguesHtmlUrl){
if(globalThis.location&&globalThis.location.href){
cataloguesHtmlUrl = globalThis.location.href;
}
else throw new Error('請傳入一個章節目錄');
}
if(!Array.isArray(cataloguesStartWhihString))
cataloguesStartWhihString = cataloguesStartWhihString?[cataloguesStartWhihString]:['第'];
if(!Array.isArray(cataloguesMustContainString))
cataloguesMustContainString = cataloguesMustContainString?[cataloguesMustContainString]:['章'];
if(!Array.isArray(cataloguesMableContainStringEventNotHaveFirstTwo))
cataloguesMableContainStringEventNotHaveFirstTwo = cataloguesMableContainStringEventNotHaveFirstTwo?[cataloguesMableContainStringEventNotHaveFirstTwo]:[];
if(!Array.isArray(contentStartWithString))
contentStartWithString = contentStartWithString?[contentStartWithString]:[];
if(!Array.isArray(contentEndWithString))
contentEndWithString = contentEndWithString?[contentEndWithString]:[];
if(!Array.isArray(contentRemoveString))
contentRemoveString = contentRemoveString?[contentRemoveString]:[];
this.cataloguesHtmlUrl = cataloguesHtmlUrl;
this.cataloguesHtmlUrlOrigin = new URL(cataloguesHtmlUrl).origin;
this.cataloguesCssSelector = cataloguesCssSelector;
this.cataloguesStartWhihString = cataloguesStartWhihString;
this.cataloguesMustContainString = cataloguesMustContainString;
this.cataloguesMableContainStringEventNotHaveFirstTwo = cataloguesMableContainStringEventNotHaveFirstTwo;
this.contentCssSelector = contentCssSelector;
this.contentStartWithString = contentStartWithString;
this.contentEndWithString = contentEndWithString;
this.contentRemoveString = contentRemoveString;
this.charset = charset.toUpperCase();
this.lastUsedCharset = this.charset;
this.textDecoder = new TextDecoder(charset);
this.lastUsedTextDecoder = this.textDecoder
}
/**
* 下載小說
*/
async down(){
await this.findStoryCataloguesInHtml();
const urls = this.allContentUrls;
const titles = this.allContentTitles;
if(urls==null||urls.length<=0) throw new Error('沒找到小說內容url')
const str = await this.getContentText(urls,titles);
if(!str) throw new Error('沒找到小說內容');
const a = document.createElement('a');
const name = this.title||'小說';
const blob = new Blob([name+"\r\n\r\n\r\n"+str], {type: 'text/plain'});
const url = window.URL.createObjectURL(blob);
const filename = name+".txt";
a.href = url;
a.download = filename;
a.click();
window.URL.revokeObjectURL(url);
}
/**
* 在小說目錄頁面查找小說全章節的鏈接
*/
async findStoryCataloguesInHtml(){
const html = await this.fetchHtmlContent(this.cataloguesHtmlUrl);
if(!this.title){
this.title = html.querySelector('title').innerText;
this.allContextText = this.title+"\r\n\r\n";
}
let htmls = [];
if(this.cataloguesCssSelector){
const tmp = html.querySelectorAll(this.cataloguesCssSelector);
for(const t of tmp){
htmls.push(t);
}
}
if(htmls.length<=0)htmls.push(html);
const aSet = new Set();
const sSet = new Set();
for(const h of htmls){
if(h.tagName.toUpperCase()==='A'){
if(this.isCatalogue(h)){
aSet.add(this.getContentUrl(h));
sSet.add(h.innerText);
}
}else{
const aList = h.querySelectorAll('a');
for(const a of aList){
if(this.isCatalogue(a)){
aSet.add(this.getContentUrl(a));
sSet.add(a.innerText);
}
}
}
}
this.allContentUrls = [...aSet];
this.allContentTitles = [...sSet];
}
/** 判斷a標籤是不是小說目錄章節 */
isCatalogue(a){
if(!a.href)return false;
const txt = a.innerText;
if(!txt)return false;
for(const start of this.cataloguesStartWhihString){
if(txt.startsWith(start)){
for(const c of this.cataloguesMustContainString){
if(txt.hasSubString(c)){
return true;
}
}
}
}
for(const s of this.cataloguesMableContainStringEventNotHaveFirstTwo){
if(txt.hasSubString(s))return true;
}
return false;
}
/** 獲取目錄的url */
getContentUrl(a){
let href = a.href;
if(href.startsWith('http')){
if(globalThis.location&&globalThis.location.href)
href = href.replace(globalThis.location.href,this.cataloguesHtmlUrl);
return href;
}
else if(!href.startsWith('/'))
return this.cataloguesHtmlUrl+"/"+href;
else
return this.cataloguesHtmlUrlOrigin+"/"+href;
}
/** 獲取小說內容 */
async getContentText(urls,titles){
let str = '';
const len = urls.length;
const arr = new Array(len);
for(let i=0;i<len;++i){
arr[i]= await this.getContentTextSingle(urls[i],titles[i]);
}
for(const s of arr){
str += s+"\r\n";
}
return str;
}
async getContentTextSingle(url,title){
const html = await this.fetchHtmlContent(url);
let htmls;
if(this.contentCssSelector){
htmls = html.querySelectorAll(this.contentCssSelector);
}else htmls = [html];
let str = '';
for(let i=0,l=htmls.length;i<l;++i){
const h = htmls[i];
let txt = h.textContent;
if(i===0){
let startArr;debugger
if(this.contentStartWithString&&this.contentStartWithString.length>=1){
startArr = this.contentStartWithString;
if(!title)title = startArr[0];
}else if(title) startArr = [title];
if(startArr){
let startNot = true;
for(const start of startArr){
const startI = txt.indexOf(start);
if(startI>=0){
if(startI>=1)
txt = txt.substring(startI);
startNot = false;
break;
}
}
if(startNot&&title)
txt = title+"\r\n"+txt;
}
if(this.contentEndWithString&&this.contentEndWithString.length>=1){
for(const end of this.contentEndWithString){
const endI = txt.lastIndexOf(end);
let start = 1;
if(title) start = title.length;
if(endI>start){
txt = txt.substring(0,endI);
break;
}
}
}
if(this.contentRemoveString&&this.contentRemoveString.length>=1){
for(const rmStr of this.contentRemoveString){
txt = txt.replaceAll(rmStr,'');
}
}
}
str += " \r\n" + txt;
}
str = str+"\r\n";
console.log(str);
return str+"\r\n";
}
/**
* 以GET請求的方式拉取網頁內容
* @param {string} htmlUrl
* @returns 完整html的dom
*/
async fetchHtmlContent(htmlUrl){
if(!htmlUrl)throw new Error("沒有傳入htmlUrl");
const resp = await fetch(htmlUrl);
if(resp.ok){
let buffer = await resp.arrayBuffer();
let text = this.lastUsedTextDecoder.decode(buffer);
const hStart='<html>',hEnd='</html>';
text = text.substring(text.indexOf(hStart)+hStart.length,text.lastIndexOf(hEnd));
const html = document.createElement('html');
html.innerHTML = text;
//如果頁面上有字符集的meta則按照頁面上字符集的meta重新編碼
const charsetEl = html.querySelector('meta[charset]')
if(charsetEl){
const charset = charsetEl.getAttribute('charset');
if(charset){
const newCharset = charset.toUpperCase();
if(!newCharset===this.lastUsedCharset){
this.lastUsedCharset = newCharset;
const decoderName = 'textDecoder'+newCharset;
if(!this[decoderName]){
this[decoderName] = new TextDecoder(newCharset);
}
this.lastUsedTextDecoder = this[decoderName];
text = this[decoderName].decode(buffer);
text = text.substring(text.indexOf(hStart)+hStart.length,text.lastIndexOf(hEnd));
html.innerHTML = text;
}
}
}
return html;
}else{
throw new Error('獲取頁面內容失敗,url:'+htmlUrl);
}
}
}
//測試,下載遮天
var a =new DownloadStory('https://www.bbiquge.net/book_967/',//目錄,只傳這個也能下載,默認章節名以第開頭且包含章
'.zjlist',//章節目錄所在的css選擇器
null,null,null,//第,章,null
'#readbox',//章節正文所在的css選擇器
null,
["上一章","下一章"],//章節正文結束標誌字符串
['jx();','筆趣閣 www.bbiquge.net,最快更新你想幹什麼最新章節!','hf();']//章節正文中要刪除的字符串
);
a.down();