如何方便的下載csdn博客正文

csdn博客的正文的圖片一般不能直接複製,需要複製到微信等再粘貼出來,比較麻煩

因此寫了個工具類,方便進行下載,可以方便的生成html和word

需要jsoup和poi

 

package test.test2019;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * Created by admin on 2019/1/15.
 */
public class JsoupTest {

   /**
    * 從csdn讀取和加工正文
    * @param uriStr
    * @return
    */
   private static String readHtml(String uriStr){
      StringBuffer sb=new StringBuffer();
      sb.append("<html><head>");
      try {
         URI uri=new URI(uriStr);
         Document doc= Jsoup.parse(uri.toURL(),10000);
         sb.append(doc.select("style").outerHtml());
         Elements elements=doc.select("link[rel=\"stylesheet\"]");
         String url=null;
         for(Element element:elements){
            url=element.attr("href");
            sb.append("<style type=\"text/css\" url='");
            sb.append(url);
            sb.append("'>");
            sb.append(IOUtils.toString(new URI(url),"UTF-8"));
            sb.append("</style>");
            sb.append("\r\n");
         }
         sb.append("<style type=\"text/css\">");
         sb.append("#article_content,.fontclass{font-family:\"Microsoft YaHei\"}");
         sb.append("</style>");
         sb.append("</head><body>");
         sb.append("<h1 class='title-article'>下載地址:<b>").append(uriStr).append("</b></h1>");
         doc.select("#article_content p").toggleClass("fontclass");
         sb.append(doc.select(".blog-content-box").outerHtml());
         sb.append("</body></html>");
      } catch (Exception e) {
         e.printStackTrace();
      }
      return sb.toString();
   }

   public static boolean writeDocFile( File file, String html,String encoding) {
      boolean w = false;
      File fileDir=file.getParentFile();
      if (!fileDir.exists()) {
         fileDir.mkdirs();
      }
      try {
         byte b[] = html.getBytes(encoding);
         ByteArrayInputStream bais = new ByteArrayInputStream(b);
         POIFSFileSystem poifs = new POIFSFileSystem();
         DirectoryEntry directory = poifs.getRoot();
         DocumentEntry documentEntry = directory.createDocument(
               "WordDocument", bais);
         FileOutputStream ostream = new FileOutputStream(file);
         poifs.writeFilesystem(ostream);
         bais.close();
         ostream.close();
      }catch(IOException e){
         e.printStackTrace();
      }
      return w;
   }

   /**從csdn截取正文
    * @param uriStr
    * @throws IOException
    * @throws URISyntaxException
    */
   public static void writeCSDNWordFile(String uriStr,File wordFile)  {
      writeDocFile(wordFile,readHtml(uriStr),"UTF-8");
   }

   /**從csdn截取正文
    * @param uriStr
    * @throws IOException
    * @throws URISyntaxException
    */
   public static void writeCSDNHtmlFile(String uriStr,File localFile) throws IOException {
      FileUtils.writeStringToFile(localFile,readHtml(uriStr),"UTF-8");
   }

   public static void main(String[] args) throws IOException, URISyntaxException {
      String html="D:/test/word/jxl-excel.html";
      String doc="D:/test/word/jxl-excel.doc";
       String uri="https://blog.csdn.net/a1091662876/article/details/87722035";
      writeCSDNHtmlFile(uri,new File(html));
      writeCSDNWordFile(uri,new File(doc));
   }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章