1、對於自建ElasticSearch的分詞配置相信大家都知道了,不多說
2、對於購買的阿里雲ElasticSearch服務,要用api遠程調整分詞配置
條件和步驟:
1、首先購買阿里雲的OSS服務,創建存儲空間,這個費用不高,只收取存儲和上傳下載費用
2、開發上傳文件到OSS的接口
3、開發通知elasticsearch熱更新IK分詞插件的接口
4、使用過程爲,先調用上傳文件接口更新字典,再調用通知接口刷新
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.aliyun.oss.ClientException;
import com.aliyun.oss.OSS;
import com.aliyun.oss.OSSClientBuilder;
import com.aliyun.oss.OSSException;
import com.aliyun.oss.model.GetObjectRequest;
import com.aliyun.oss.model.OSSObject;
import com.aliyuncs.DefaultAcsClient;
import com.aliyuncs.elasticsearch.model.v20170613.UpdateHotIkDictsRequest;
import com.aliyuncs.http.FormatType;
import com.aliyuncs.http.HttpResponse;
import com.aliyuncs.profile.DefaultProfile;
import com.aliyuncs.profile.IClientProfile;
import com.oa.common.error.exception.OaBusinessException;
import com.oa.common.util.DateUtil;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
/**
* 功能: ElasticSearch分詞字典熱更新<br/>
*
* @author ql
* @create 2020-01-21 14:06
**/
@Slf4j
@RestController
@RequestMapping
public class ElasticIkDictEndpoint {
/**
* endpoint是訪問OSS的域名
*/
@Value("${oss.endpoint}")
private String endpoint;
/**
* 阿里雲平臺訪問key
*/
@Value("${oss.accessKeyId}")
private String accessKeyId;
/**
* 阿里雲平臺訪問Secret
*/
@Value("${oss.accessKeySecret}")
private String accessKeySecret;
/**
* 存儲空間
*/
@Value("${oss.bucketName}")
private String bucketName;
/**
* oss存儲目錄
*/
@Value("${oss.fileDir}")
private String fileDir;
/**
* es地域
*/
@Value("${elasticsearch.regionId}")
private String regionId;
/**
* es實例
*/
@Value("${elasticsearch.instanceId}")
private String esInstanceId;
/**
* 阿里雲Elasticsearch API的服務接入地址(Endpoint)
*
* | 華東1(杭州) | elasticsearch.cn-hangzhou.aliyuncs.com |
* | 華東2(上海) | elasticsearch.cn-shanghai.aliyuncs.com |
*/
@Value("${elasticsearch.domain}")
private String esDomain;
/**
* ik主分詞文件名稱,需要和es插件配置上的文件名一致,否則會導致es實例重啓
*/
@Value("${elasticsearch.ikword}")
private String mainFileName;
/**
* ik停止詞文件名稱,需要和es插件配置上的文件名一致,否則會導致es實例重啓
*/
@Value("${elasticsearch.stopword}")
private String stopFileName;
/**
* 功能: 上傳es分詞文件<br/>
*
* @author ql
* @create 2019/12/25 18:34
* @param file 上傳文件
* @param wordType 分詞類型:MAIN -IK主分詞詞庫;STOP-IK停用詞庫
* @return java.lang.String
**/
@PostMapping("/es/ikWords/upload")
public String uploadIkDict2Oss(@RequestParam("file") MultipartFile file,@RequestParam("wordType") String wordType) throws Exception{
log.info("OSS文件開始上傳,時間={}", DateUtil.getTime());
String fileName = "";
switch (wordType){
case "MAIN" :
fileName = mainFileName;
break;
case "STOP" :
fileName = stopFileName;
break;
default:
throw new OaBusinessException("分詞類型不對。");
}
// 生成OSSClient,您可以指定一些參數,詳見“SDK手冊 > Java-SDK > 初始化”,
// 鏈接地址是:https://help.aliyun.com/document_detail/oss/sdk/java-sdk/init.html?spm=5176.docoss/sdk/java-sdk/get-start
OSS ossClient = new OSSClientBuilder().build(endpoint, accessKeyId, accessKeySecret);
try {
// 判斷Bucket是否存在。詳細請參看“SDK手冊 > Java-SDK > 管理Bucket”。
// 鏈接地址是:https://help.aliyun.com/document_detail/oss/sdk/java-sdk/manage_bucket.html?spm=5176.docoss/sdk/java-sdk/init
if (ossClient.doesBucketExist(bucketName)) {
log.debug("您已經創建Bucket:{}" , bucketName );
} else {
log.info("您的Bucket不存在,創建Bucket:{}" , bucketName );
// 創建Bucket。詳細請參看“SDK手冊 > Java-SDK > 管理Bucket”。
// 鏈接地址是:https://help.aliyun.com/document_detail/oss/sdk/java-sdk/manage_bucket.html?spm=5176.docoss/sdk/java-sdk/init
ossClient.createBucket(bucketName);
}
//存入oss的url
String ossFileUrl = fileDir + fileName;
// 上傳文件流。
ossClient.putObject(bucketName, ossFileUrl, file.getInputStream());
log.info("Object:" + fileName + "存入OSS成功。");
} catch (OSSException oe) {
log.error("OSS文件上傳失敗,error={}",oe.getMessage(),oe);
return "failure";
} catch (ClientException ce) {
log.error("OSS文件上傳失敗,error={}",ce.getMessage(),ce);
return "failure";
} catch (Exception e) {
log.error("OSS文件上傳失敗,error={}",e.getMessage(),e);
return "failure";
} finally {
ossClient.shutdown();
log.debug("============流關閉===============");
}
log.info("OSS文件上傳成功,時間={}", DateUtil.getTime());
return "success";
}
/**
* 功能: 下載分詞文件<br/>
*
* @author ql
* @create 2020/01/21 19:34
* @param response
* @param wordType 分詞類型:MAIN -IK主分詞詞庫;STOP-IK停用詞庫
* @return java.lang.String
**/
@GetMapping("/es/ikWords/download")
public String downloadIkDict2Oss(HttpServletResponse response, @RequestParam("wordType") String wordType) throws Exception{
log.info("OSS文件開始下載,時間={}", DateUtil.getTime());
String fileName = "";
switch (wordType){
case "MAIN" :
fileName = mainFileName;
break;
case "STOP" :
fileName = stopFileName;
break;
default:
throw new OaBusinessException("分詞類型不對。");
}
// 生成OSSClient,您可以指定一些參數,詳見“SDK手冊 > Java-SDK > 初始化”,
// 鏈接地址是:https://help.aliyun.com/document_detail/oss/sdk/java-sdk/init.html?spm=5176.docoss/sdk/java-sdk/get-start
OSS ossClient = new OSSClientBuilder().build(endpoint, accessKeyId, accessKeySecret);
//oss的url
String ossFileUrl = fileDir + fileName;
OSSObject object = ossClient.getObject(new GetObjectRequest(bucketName, ossFileUrl));
response.setContentType("application/binary;charset=UTF-8");
response.setHeader("Content-disposition", "attachment; filename=" + fileName);
BufferedInputStream bis = null;
OutputStream out = null;
try {
// 實現文件下載
byte[] buffer = new byte[1024];
bis = new BufferedInputStream(object.getObjectContent());
out = response.getOutputStream();
int i = bis.read(buffer);
while (i != -1) {
out.write(buffer, 0, i);
i = bis.read(buffer);
}
log.info("Object:" + fileName + "下載成功。");
} catch (OSSException oe) {
log.error("OSS文件下載失敗,error={}",oe.getMessage(),oe);
return "failure";
} catch (ClientException ce) {
log.error("OSS文件下載失敗,error={}",ce.getMessage(),ce);
return "failure";
} catch (Exception e) {
log.error("OSS文件下載失敗,error={}",e.getMessage(),e);
return "failure";
} finally {
ossClient.shutdown();
if(bis != null){
bis.close();
}
if(out != null){
out.close();
}
log.debug("============流關閉===============");
}
log.info("OSS文件下載成功,時間={}", DateUtil.getTime());
return "success";
}
/**
* 功能: 刷新es的字典<br/>
*
* @author ql
* @create 2020/01/21 19:09
* @return java.lang.String
**/
@GetMapping("/es/ikWords/refresh")
public String refreshIKDictFromOss() throws Exception{
//針對上海區域的阿里雲ES實例
IClientProfile profile = DefaultProfile.getProfile(regionId,accessKeyId, accessKeySecret);
//添加自定義endpoint。
DefaultProfile.addEndpoint(regionId,"elasticsearch", esDomain);
DefaultAcsClient client = new DefaultAcsClient(profile);
// updating es hot ik dicts
log.info(" Start updating es hot ik dicts...");
UpdateHotIkDictsRequest updateHotIkDictsRequest = new UpdateHotIkDictsRequest();
updateHotIkDictsRequest.setAcceptFormat(FormatType.JSON);
updateHotIkDictsRequest.setInstanceId(esInstanceId);
JSONObject mainObject = this.buildJSONObject(mainFileName, "MAIN", bucketName, fileDir + mainFileName);
JSONObject stopObject = this.buildJSONObject(stopFileName, "STOP", bucketName, fileDir + stopFileName);
JSONArray jsonArray = new JSONArray();
jsonArray.add(mainObject);
jsonArray.add(stopObject);
String content = JSONArray.toJSONString(jsonArray);
log.info("刷新es的字典,請求內容:"+content);
updateHotIkDictsRequest.setHttpContent(content.getBytes(), "UTF-8", FormatType.JSON);
try {
HttpResponse response = client.doAction(updateHotIkDictsRequest);
log.info("刷新時間={},status={},result={}",DateUtil.getTime(),response.getStatus(),response.getHttpContentString());
} catch (ClientException e) {
log.error("======刷新es的字典失敗============{}",e.getMessage(),e);
return "failure";
}
return "success";
}
/**
* 功能: 組裝請求報文 <br/>
*
* @author ql
* @create 2020/01/21 19:33
* @param fileName 文件名稱
* @param type 文件類型:IK主分詞詞庫取值爲MAIN,如果是作爲IK停用詞庫取值爲STOP
* @param bucketName 桶名稱
* @param key oss中的對應IK熱詞典文件路徑
* @return com.alibaba.fastjson.JSONObject
**/
private JSONObject buildJSONObject(String fileName, String type, String bucketName, String key) {
JSONObject object = new JSONObject();
//name參數表示上傳對應文件的文件名。
object.put("name", fileName);
//type參數值,如果是作爲IK主分詞詞庫取值爲MAIN,如果是作爲IK停用詞庫取值爲STOP。
object.put("type", type);
//sourceType參數表示數據源類型,例如此處取值爲OSS。
object.put("sourceType", "OSS");
JSONObject ossObject = new JSONObject();
//bucketName參數表示OSS數據源中的project名稱
ossObject.put("bucketName", bucketName);
//key參數值表示oss中的對應IK熱詞典文件路徑
ossObject.put("key", key);
//etag是對這個文件中的內容的md5值,可以取MD5後的32位大寫數值,用來比對是否和上次的內容有差異,如果有差異或做更新。
// ossObject.put("etag","35C44BC0C7AFF5AE6E73B195F933810E");
//OSS中的熱詞文件需要有公共讀權限。
object.put("ossObject", ossObject);
return object;
}
}