MySQL數據庫與數據庫之間的文章採集整理


package net.aykj.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;

public class JdbcUtil2 {
	// 源數據庫
	private static String sourceDirver = "com.mysql.jdbc.Driver";
	private static String sourceUrl = "jdbc:mysql://localhost:3306/slold20191013?user=root&password=123456";
	
	// 目標數據庫
	private static String destinationDirver = "com.mysql.jdbc.Driver";
	//private static String destinationUrl = "jdbc:mysql://112.117.211.66:3307/wwwshineryncom?user=wwwshineryncom&password=wwwshineryncom";
	private static String destinationUrl = "jdbc:mysql://localhost:3306/sl20191013?user=root&password=123456";
	//private static String destinationUrl = "jdbc:mysql://116.55.226.10:3306/ythcom?user=ythcom&password=ythcom";
	
	public static void main(String[] args) throws Exception {
		
		System.out.println("--------------------------------------- 數據採集 start ---------------------------------------------");
		catchData();
		System.out.println("--------------------------------------- 數據採集 end ---------------------------------------------");
	}
	
	private static String MAPPER_FILE = "D:\\show.txt";
	
	@SuppressWarnings("resource")
	private static void catchData() throws IOException {
		// 子站羣ID
		String subsiteId = "1";
		
		File file = new File(MAPPER_FILE);
		FileInputStream fis = new FileInputStream(file);
		BufferedReader br = new BufferedReader(new InputStreamReader(fis, "UTF-8"));
		String line = null;
		while ((line = br.readLine()) != null) {
			String sourceNewsClassId = line.split("\\|")[0].split("=")[1];
			String desNewsClassId = line.split("\\|")[1].split("=")[1];
			moveArticleByNewsClassId(sourceNewsClassId, desNewsClassId, subsiteId);
		}
	}
	
	/**
	 * 複製源數據庫的數據到目標數據庫中
	 * @param sourceNewsClassId
	 * @param desNewsClassId
	 * @param subsiteId
	 */
	private static void moveArticleByNewsClassId(String sourceNewsClassId, String desNewsClassId, String subsiteId) {
		System.out.println("======================= 查詢start ======================");
		//文章列表   根據源數據庫的欄目ID查詢當前欄目ID下的所有文章
		List<SourceArticle> list = queryArticleByNewsClassId_aykj_old_subsite(sourceNewsClassId);
		//歷程
		//List<SourceArticle> list = queryArticleByNewsClassId_junfa(sourceNewsClassId);
		if (list != null) {
			System.out.println("查詢到" + list.size() + "條記錄");
			for (SourceArticle sourceArticle : list) {
				// 向目標數據庫中添加文章
				String articleId = insertDesArticle(sourceArticle);
				
				// 欄目與文章關聯
				JdbcUtil2.insertDesBySql("insert into aykj_newsclass_article (article_id, newsClass_id) values ("+ articleId +","+ desNewsClassId + ")");
				
				// 獲取文章縮率圖
				Annex annex = JdbcUtil2.queryAnnex(sourceArticle.getId());
				if(annex!=null){
					JdbcUtil2.insertDesBySql("insert into aykj_annex (obj,objId,path,type,createtime, ext,name) values ('article',"+ articleId + ",'"+annex.getPath()+"','image','"+annex.getCreatetime()+"','"+annex.getExt()+"','"+annex.getName()+"')");
				}
				
				/*// 獲取圖片路徑
				String path = sourceArticle.getPicture();
				// 存放圖片擴展名
				String ext = "";
				// 存放圖片名稱
				String name = "";
				if(GeneralUtil.isNotNull(path) && !"null".equals(path)){
					if(path.startsWith("/upload/")){
						path = path.replace("/upload/", "/uploadDir/");
					}
					if(!path.startsWith("/")){
						path = "/"+path;
					}
					ext = path.substring(path.indexOf(".")+1);
					name = path.substring(path.lastIndexOf("/")+1);
					System.out.println("--------------圖片擴展名:" + ext);
					System.out.println("--------------圖片名稱:" + name);
					JdbcUtil2.insertDesBySql("insert into aykj_annex (obj,objId,path,type,createtime, ext,name) values ('article',"+ articleId + ",'"+path+"','image','"+sourceArticle.getCreatetime()+"','"+ext+"','"+name+"')");
				}*/
				
				// 獲取視頻路徑
				String videopath = sourceArticle.getVideoPath();
				// 存放視頻擴展名
				String videoext = "";
				// 存放視頻名稱
				String videoname = "";
				if(GeneralUtil.isNotNull(videopath) && !"null".equals(videopath)){
					if(videopath.startsWith("/upload/")){
						videopath = videopath.replace("/upload/", "/uploadDir/");
					}
					videoext = videopath.substring(videopath.indexOf(".")+1);
					videoname = videopath.substring(videopath.lastIndexOf("/")+1);
					System.out.println("--------------視頻擴展名:" + videoext);
					System.out.println("--------------視頻名稱:" + videoname);
					JdbcUtil2.insertDesBySql("insert into aykj_annex (obj,objId,path,type,createtime, ext,name) values ('article',"+ articleId + ",'"+videopath+"','annex','"+sourceArticle.getCreatetime()+"','"+videoext+"','"+videoname+"')");
				}
				
				System.out.println("already insert " + sourceArticle.getTitle());
			}
		}
		
		System.out.println("======================== 查詢結束end =================================");
	}
	
	/**
	 * 向目標數據庫中添加文章
	 * @param sourceArticle
	 * @return
	 */
	private static String insertDesArticle(SourceArticle sourceArticle) {
		StringBuilder sb = new StringBuilder();
	    String content = parseContent(sourceArticle.getContent());
//	    content = Pattern.compile("<style.*</style>", Pattern.DOTALL).matcher(content).replaceAll("");
//		content = content.replaceAll("^[\u00ff\uffff]", "").replaceAll("<.*?>", "").replaceAll("&.*?;", "");
	   if(GeneralUtil.isNotNull(content) && !"null".equals(content)){
		   content = content.replaceAll("(<img.*?)width=\".*?\"", "$1").replaceAll("(<img.*?)height=\".*?\"", "$1");
	   }else{
		   content = "";
	   }
	   String introduce = parseContent(sourceArticle.getIntroduce());
	   if(GeneralUtil.isNotNull(introduce) && !"null".equals(introduce)){
		   
	   }else{
		   introduce = "";
	   }
	   String Author = parseContent(sourceArticle.getAuthor());
	   if(GeneralUtil.isNotNull(Author) && !"null".equals(Author)){
		   
	   }else{
		   Author = "";
	   }
	   String Source = parseContent(sourceArticle.getSource());
	   if(GeneralUtil.isNotNull(Source) && !"null".equals(Source)){
		   
	   }else{
		   Source = "";
	   }
		sourceArticle.setContent(content);
		sourceArticle.setTitle(parseContent(sourceArticle.getTitle()));
		sourceArticle.setIntroduce(introduce);
		/*author,content,introduce,source,audit,hot,commend,orderValue,title,createtime,hits,deleteFlag*/
		sb.append("'"+ Author +"'");
		sb.append(",'"+ sourceArticle.getContent() +"'");
		sb.append(",'"+ sourceArticle.getIntroduce() +"'");
		sb.append(",'"+ Source +"'");
		sb.append(","+sourceArticle.getIsAudit()+"");
		sb.append(","+sourceArticle.getIsHot()+"");
		sb.append(","+sourceArticle.getIsCommend()+"");
		sb.append(","+sourceArticle.getOrderValue()+"");
		sb.append(",'"+ sourceArticle.getTitle() +"'");
		sb.append(",'"+ sourceArticle.getCreatetime() +"'");
		sb.append(","+sourceArticle.getHits()+"");
		sb.append(","+sourceArticle.getDeleteFlag()+"");
		/*author,content,introduce,source,audit,hot,commend,orderValue,title,createtime,hits,deleteFlag*/
		return JdbcUtil2.insertDesBySql("insert into aykj_article(author,content,introduce,source,audit,hot,commend,orderValue,title,createtime,hits,deleteFlag) values ("+ sb +")") + "";
	}

	

	private static String parseContent(String content) {
		if (content != null) {
			return content.replace("'", "");
			//return content.replace("'", "\\'");
		}
		return null;
	}

	@SuppressWarnings("unused")
	private static List<SourceArticle> queryArticleByNewsClassId_junfa(String newsClassId) {
		try {
			//ResultSet rs = JdbcUtil.querySourceBySql("select title,secondtitle,author,content,keyword,newsClass_id,readTotal,recordTime,source,picture,annexPath from article where  newsClass_id=" + newsClassId);
			//NClass7是大事記  28是榮譽  53是公益歷程
			ResultSet rs = JdbcUtil2.querySourceBySql("select NID,DateAndTime,Original,Title,Author,Content,`Describe`,SmallPic,SmallPicPath,Click,IsShow,isDel from list_info_news where NClass = 53 and Original = "+ newsClassId+" ORDER BY DateAndTime asc ");
			if (rs != null) {
				List<SourceArticle> sourceArticleList = new ArrayList<SourceArticle>();
				SourceArticle sourceArticle = null;
				while(rs.next()) {
					sourceArticle = new JdbcUtil2().new SourceArticle();
					sourceArticle.setAuthor(rs.getString("Author").trim());
					sourceArticle.setContent(rs.getString("Content"));
					sourceArticle.setIntroduce(rs.getString("Describe"));
					sourceArticle.setSource(rs.getString("Original").trim());
					sourceArticle.setIsAudit(rs.getInt("IsShow"));
					sourceArticle.setIsHot(0);
					sourceArticle.setDeleteFlag(rs.getInt("isDel"));
					sourceArticle.setIsCommend(0);
					sourceArticle.setOrderValue(100);
					sourceArticle.setTitle(rs.getString("Title").trim());
					sourceArticle.setCreatetime(rs.getString("DateAndTime"));
					sourceArticle.setHits(rs.getInt("Click"));
					sourceArticle.setNewsClass_id(rs.getString("NID"));
					sourceArticle.setPicture(rs.getString("SmallPicPath")+rs.getString("SmallPic"));
					sourceArticle.setVideoPath("");
					
					sourceArticleList.add(sourceArticle);
				}
				return sourceArticleList;
			}
			return null;
		} catch (Exception e) {
			e.printStackTrace();
			return null;
		}
	}
	
	@SuppressWarnings("unused")
	private static List<SourceArticle> queryArticleByNewsClassId_nongkeyuan(String newsClassId) {
		try {
			//ResultSet rs = JdbcUtil.querySourceBySql("select title,secondtitle,author,content,keyword,newsClass_id,readTotal,recordTime,source,picture,annexPath from article where  newsClass_id=" + newsClassId);
			ResultSet rs = JdbcUtil2.querySourceBySql("select sValue6,NID,DateAndTime,Original,Title,Author,Content,`Describe`,SmallPic,SmallPicPath,Click,IsShow,isDel from list_info_news where NClass = "+ newsClassId+" ORDER BY DateAndTime asc ");
			if (rs != null) {
				List<SourceArticle> sourceArticleList = new ArrayList<SourceArticle>();
				SourceArticle sourceArticle = null;
				while(rs.next()) {
					sourceArticle = new JdbcUtil2().new SourceArticle();
					sourceArticle.setAuthor(rs.getString("Author"));
					sourceArticle.setContent(rs.getString("Content"));
					sourceArticle.setIntroduce(rs.getString("Describe"));
					sourceArticle.setSource(rs.getString("Original"));
					sourceArticle.setIsAudit(rs.getInt("IsShow"));
					sourceArticle.setIsHot(0);
					sourceArticle.setDeleteFlag(rs.getInt("isDel"));
					sourceArticle.setIsCommend(0);
					sourceArticle.setOrderValue(100);
					sourceArticle.setTitle(rs.getString("Title").trim());
					sourceArticle.setCreatetime(rs.getString("DateAndTime"));
					sourceArticle.setHits(rs.getInt("Click"));
					sourceArticle.setNewsClass_id(rs.getString("NID"));
					sourceArticle.setPicture(rs.getString("SmallPicPath")+rs.getString("SmallPic"));
					sourceArticle.setVideoPath(rs.getString("sValue6"));
					
					sourceArticleList.add(sourceArticle);
				}
				return sourceArticleList;
			}
			return null;
		} catch (Exception e) {
			e.printStackTrace();
			return null;
		}
	}
	
	
	/**
	 * 老網站數據獲取方法
	 * @param newsClassId
	 * @return
	 */
	private static List<SourceArticle> queryArticleByNewsClassId_aykj_old_subsite(String newsClassId) {
		try {
			//ResultSet rs = JdbcUtil2.querySourceBySql("select * from Article where newsClass_id=" + newsClassId);
			/*ResultSet rs = JdbcUtil2.querySourceBySql("SELECT * FROM aykj_article WHERE id IN ("
					+ "select article_id from aykj_newsclass_article where newsClass_id IN ("+newsClassId+"))");*/
			ResultSet rs = JdbcUtil2.querySourceBySql("SELECT * FROM aykj_article WHERE id IN (select article_id from aykj_newsclass_article where newsClass_id IN ('480'))"
                                                     +" AND DATE_FORMAT(createtime,'%Y-%m-%d')>='2018-12-18' AND DATE_FORMAT(createtime,'%Y-%m-%d')<='2019-09-29'");
			if (rs != null) {
				List<SourceArticle> sourceArticleList = new ArrayList<SourceArticle>();
				SourceArticle sourceArticle = null;
				while(rs.next()) {
					sourceArticle = new JdbcUtil2().new SourceArticle();
					sourceArticle.setAuthor(rs.getString("author"));
					sourceArticle.setTitle(rs.getString("title"));
					sourceArticle.setContent(rs.getString("content"));
					sourceArticle.setCreatetime(rs.getString("createtime"));
					//sourceArticle.setHits(rs.getString("hits"));
					sourceArticle.setIntroduce(rs.getString("introduce"));
					//sourceArticle.setNewsClass_id(rs.getString("newsClass_id"));
					sourceArticle.setSource(rs.getString("source"));
					sourceArticle.setTitle(rs.getString("title"));
					
					// 是否審覈 和 是否刪除
					sourceArticle.setIsAudit(rs.getInt("audit"));
					sourceArticle.setDeleteFlag(0);
					sourceArticle.setIsHot(rs.getInt("hot"));
					sourceArticle.setIsCommend(rs.getInt("commend"));
					sourceArticle.setHits(rs.getInt("hits"));
					sourceArticle.setOrderValue(200);
					sourceArticle.setId(rs.getInt("id"));
					//sourceArticle.setPicture(rs.getString("picture"));
					//sourceArticle.setVideoPath(rs.getString("videoPath"));
					
					sourceArticleList.add(sourceArticle);
				}
				return sourceArticleList;
			}
			return null;
		} catch (Exception e) {
			e.printStackTrace();
			return null;
		}
	}
	
	/**
	 * 獲取老網站附件(縮略圖)
	 * @param objectId
	 * @return
	 * @throws SQLException
	 */
	public static Annex queryAnnex (Integer objId) {
		ResultSet rs = JdbcUtil2.querySourceBySql("select * from aykj_annex where objId=" + objId);
		if (rs != null) {
			Annex annex = null;
			try {
				while(rs.next()) {
					annex = new JdbcUtil2().new Annex();
					annex.setObj("article");
					annex.setObjId(objId);
					annex.setExt(rs.getString("ext"));
					annex.setName(rs.getString("name"));
					annex.setPath(rs.getString("path"));
					annex.setType("image");
					annex.setCreatetime(rs.getString("createtime"));
					return annex;
				}
			} catch (SQLException e) {
				e.printStackTrace();
				return null;
			}
		}
		return null;
	}
	
	/**
	 * 源文章pojo類
	 * @Description 
	 * @author Bingyong.Wang
	 * @Date   2018年9月4日
	 */
	class SourceArticle {
		private String newsClass_id;
		private String author;
		private String content;
		private String introduce;
		private String keyword;
		private String source;
		private Integer isAudit;
		private Integer isHot;
		private Integer deleteFlag;
		private Integer isCommend;
		private Integer orderValue;
		private String title;
		private String picture;
		private String createtime;
		private String videoPath;
		private Integer hits;
		private Integer id;
		
		/*
		private String linkUrl;
		private String secondtitle;
		private String readTotal;
		private Date recordTime;
		private String picture;
		private String annexPath;
		private String hotArticle;
		*/
		
		/*author,content,introduce,keyword,source,isAudit,isHot,isCommend,orderValue,title,createtime,hits*/
		
		//title,secondtitle,author,content,keyword,newsClass_id,readTotal,recordTime,source,picture,annexPath
		
		public String getVideoPath() {
			return videoPath;
		}
		public void setVideoPath(String videoPath) {
			this.videoPath = videoPath;
		}
		public String getNewsClass_id() {
			return newsClass_id;
		}
		public String getPicture() {
			return picture;
		}
		public void setPicture(String picture) {
			this.picture = picture;
		}
		public void setNewsClass_id(String newsClass_id) {
			this.newsClass_id = newsClass_id;
		}
		public String getAuthor() {
			return author;
		}
		public void setAuthor(String author) {
			this.author = author;
		}
		public String getContent() {
			return content;
		}
		public void setContent(String content) {
			this.content = content;
		}
		public String getIntroduce() {
			return introduce;
		}
		public void setIntroduce(String introduce) {
			this.introduce = introduce;
		}
		public String getKeyword() {
			return keyword;
		}
		public void setKeyword(String keyword) {
			this.keyword = keyword;
		}
		public String getSource() {
			return source;
		}
		public void setSource(String source) {
			this.source = source;
		}
		public Integer getIsAudit() {
			return isAudit;
		}
		public void setIsAudit(Integer isAudit) {
			this.isAudit = isAudit;
		}
		public Integer getIsHot() {
			return isHot;
		}
		public void setIsHot(Integer isHot) {
			this.isHot = isHot;
		}
		public Integer getIsCommend() {
			return isCommend;
		}
		public void setIsCommend(Integer isCommend) {
			this.isCommend = isCommend;
		}
		public Integer getOrderValue() {
			return orderValue;
		}
		public void setOrderValue(Integer orderValue) {
			this.orderValue = orderValue;
		}
		public String getTitle() {
			return title;
		}
		public void setTitle(String title) {
			this.title = title;
		}
		public String getCreatetime() {
			return createtime;
		}
		public void setCreatetime(String createtime) {
			this.createtime = createtime;
		}
		public Integer getHits() {
			return hits;
		}
		public void setHits(Integer hits) {
			this.hits = hits;
		}
		public Integer getDeleteFlag() {
			return deleteFlag;
		}
		public void setDeleteFlag(Integer deleteFlag) {
			this.deleteFlag = deleteFlag;
		}
		public Integer getId() {
			return id;
		}
		public void setId(Integer id) {
			this.id = id;
		}
		
	}
	
	/**
	 * 目標文章pojo類
	 */
	class DesArticle {
		private String title;
		private String author;
		private String source;
		private String hits;
		private String createtime;
		private String introduce;
		private String content;
		
		public String getTitle() {
			return title;
		}
		public void setTitle(String title) {
			this.title = title;
		}
		public String getAuthor() {
			return author;
		}
		public void setAuthor(String author) {
			this.author = author;
		}
		public String getSource() {
			return source;
		}
		public void setSource(String source) {
			this.source = source;
		}
		public String getHits() {
			return hits;
		}
		public void setHits(String hits) {
			this.hits = hits;
		}
		public String getCreatetime() {
			return createtime;
		}
		public void setCreatetime(String createtime) {
			this.createtime = createtime;
		}
		public String getIntroduce() {
			return introduce;
		}
		public void setIntroduce(String introduce) {
			this.introduce = introduce;
		}
		public String getContent() {
			return content;
		}
		public void setContent(String content) {
			this.content = content;
		}
	}
	
	/**
	 * 附件pojo
	 * @Description 
	 * @author Bingyong.Wang
	 * @Date   2019年1月8日
	 */
	class Annex {
		private String obj;
		private Integer objId;
		private String ext;
		private String name;
		private String path;
		private String type;
		private String createtime;
		
		public String getObj() {
			return obj;
		}
		public void setObj(String obj) {
			this.obj = obj;
		}
		public Integer getObjId() {
			return objId;
		}
		public void setObjId(Integer objId) {
			this.objId = objId;
		}
		public String getExt() {
			return ext;
		}
		public void setExt(String ext) {
			this.ext = ext;
		}
		public String getName() {
			return name;
		}
		public void setName(String name) {
			this.name = name;
		}
		public String getPath() {
			return path;
		}
		public void setPath(String path) {
			this.path = path;
		}
		public String getType() {
			return type;
		}
		public void setType(String type) {
			this.type = type;
		}
		public String getCreatetime() {
			return createtime;
		}
		public void setCreatetime(String createtime) {
			this.createtime = createtime;
		}
		
		
	}
	
	/**
	 * 查詢源數據庫文章
	 * @param sql
	 * @return
	 */
	private static ResultSet querySourceBySql(String sql) {
		try {
			return createStatement(sourceDirver, sourceUrl).executeQuery(sql);
		} catch (Exception e) {
			e.printStackTrace();
			return null;
		}
	}
	
	/**
	 * 將數據寫入目標數據庫
	 * @param sql
	 * @return
	 */
	private static int insertDesBySql(String sql) {
		try {
			Statement statement = createStatement(destinationDirver, destinationUrl);
			statement.executeUpdate(sql,Statement.RETURN_GENERATED_KEYS);
			ResultSet resultSet = statement.getGeneratedKeys();
			if (resultSet != null) {
				while(resultSet.next()) {
					return resultSet.getInt(1);
				}
			}
			return -1;
		} catch (Exception e) {
			e.printStackTrace();
			return -1;
		}
	}
	
	/**
	 * 創建statement對象
	 * @param driver
	 * @param url
	 * @return
	 */
	private static Statement createStatement(String driver, String url) {
		try {
			Class.forName(driver);
			Connection conn = (Connection) DriverManager.getConnection(url);
			return conn.createStatement();
		} catch (Exception e) {
			e.printStackTrace();
			return null;
		}
	}
}

/*
            集團要聞=479|集團新聞=16
	實時動態=480|集團新聞=16
	媒體聚焦=481|行業動態=19
*/

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章