人工智能:文本相似度分析

通常我們會遇到這麼一個問題,就是用戶在評論、發文的時候,會時不時的發一些高度相似的內容,顯然這是沒有任何意義的,這時候我們就可以用一些算法來確定文本的相似度究竟是多少,據此我們可以做出一些迴應策略。實現這個功能可以用多種語言來完成,你可以點我去獲得其他語言的做法,這裏羅列下如何用go語言來實現這個功能:

package main

import (
    "io/ioutil"
    "net/http"
    "net/url"
    "fmt"
    "strings"
)
 
//配置您申請的appKey和openId
const APP_KEY ="yours";
const OPEN_ID ="yours";

func requestContent(requestUrl string,params url.Values,method string)(rs[]byte ,err error){
    
    if strings.ToUpper(method)=="GET"{
        return get(requestUrl,params)
    }
    return post(requestUrl,params)
}

// get 網絡請求
func get(requestUrl string,params url.Values)(rs[]byte ,err error){
    var Url *url.URL
    Url,err=url.Parse(requestUrl)
    if err!=nil{
        fmt.Printf("解析url錯誤:\r\n%v",err)
        return nil,err
    }
    //如果參數中有中文參數,這個方法會進行URLEncode
    Url.RawQuery=params.Encode()
    resp,err:=http.Get(Url.String())
    if err!=nil{
        fmt.Println("err:",err)
        return nil,err
    }
    defer resp.Body.Close()
    return ioutil.ReadAll(resp.Body)
}
 
// post 網絡請求 ,params 是url.Values類型
func post(requestUrl string, params url.Values)(rs[]byte,err error){
    resp,err:=http.PostForm(requestUrl, params)
    if err!=nil{
        return nil ,err
    }
    defer resp.Body.Close()
    return ioutil.ReadAll(resp.Body)
}

func main(){

    domain :="http://api.xiaocongjisuan.com/"

    servlet :="data/contentsimilarity/analysis"
    method :="get"
    requestUrl:=domain+servlet
    
    //初始化參數
    params:=url.Values{}
    
    params.Set("appKey",APP_KEY)
    params.Set("openId",OPEN_ID)
    
    //變動部分
    params.Set("content1","我是最可愛的小夥子")
    params.Set("content2","我是最漂亮的小姑娘")
 
 
    //發送請求
    data,err:=requestContent(requestUrl,params,method)
    fmt.Println(string(data))
    if err!=nil{
        fmt.Printf("解析url錯誤:\r\n%v",err)
    }
}

c#語言的實現方式也非常的簡單,羅列在下面:

using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;

namespace ConsoleApplication1
{
    class Program
    {
        private static string appKey="yours";
        private static string openId = "yours";
       
        static string getResponseAsString(HttpWebResponse rsp, Encoding encoding)
        {
            System.IO.Stream stream = null;
            StreamReader reader = null;
            try
            {
                // 以字符流的方式讀取HTTP響應
                stream = rsp.GetResponseStream();
                reader = new StreamReader(stream, encoding);
                return reader.ReadToEnd();
            }
            finally
            {
                // 釋放資源
                if (reader != null) reader.Close();
                if (stream != null) stream.Close();
                if (rsp != null) rsp.Close();
            }
        }

        /*
         * parameters 參數
         * encode 編碼
         */

        static string buildQuery(IDictionary<string,object> parameters, string encode)
        {
            StringBuilder postData = new StringBuilder();
            bool hasParam = false;
            IEnumerator<KeyValuePair<string, object>> dem = parameters.GetEnumerator();
            while (dem.MoveNext())
            {
                string name = dem.Current.Key;
                string value = dem.Current.Value.ToString(); ;
                // 忽略參數名或參數值爲空的參數
                if (!string.IsNullOrEmpty(name))//&& !string.IsNullOrEmpty(value)
                {
                    if (hasParam)
                    {
                        postData.Append("&");
                    }
                    postData.Append(name);
                    postData.Append("=");
                    if (encode == "gb2312")
                    {
                        postData.Append(System.Web.HttpUtility.UrlEncode(value, Encoding.GetEncoding("gb2312")));
                    }
                    else if (encode == "utf8")
                    {
                        postData.Append(System.Web.HttpUtility.UrlEncode(value, Encoding.UTF8));
                    }
                    else
                    {
                        postData.Append(value);
                    }
                    hasParam = true;
                }
            }
            return postData.ToString();
        }


        /**
        *
        * @param url 請求地址
        * @param params 請求參數
        * @param method 請求方法
        * @return 請求結果
        * @throws Exception
        */
        static string requestContent(string url, IDictionary<string,object> parameters, string method)
        {
            if (method.ToLower() == "post")
            {
                HttpWebRequest req = null;
                HttpWebResponse rsp = null;
                System.IO.Stream reqStream = null;
                try
                {
                    req = (HttpWebRequest)WebRequest.Create(url);
                    req.Method = method;
                    req.KeepAlive = false;
                    req.ProtocolVersion = HttpVersion.Version10;
                    req.Timeout = 5000;
                    req.ContentType = "application/x-www-form-urlencoded;charset=utf-8";
                    byte[] postData = Encoding.UTF8.GetBytes(buildQuery(parameters, "utf8"));
                    reqStream = req.GetRequestStream();
                    reqStream.Write(postData, 0, postData.Length);
                    rsp = (HttpWebResponse)req.GetResponse();
                    Encoding encoding = Encoding.GetEncoding(rsp.CharacterSet);
                    return getResponseAsString(rsp, encoding);
                }
                catch (Exception ex)
                {
                    return ex.Message;
                }
                finally
                {
                    if (reqStream != null) reqStream.Close();
                    if (rsp != null) rsp.Close();
                }
            }
            else
            {
                //創建請求
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url + "?" + buildQuery(parameters, "utf8"));

                //GET請求
                request.Method = "GET";
                request.ReadWriteTimeout = 5000;
                request.ContentType = "text/html;charset=UTF-8";
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream myResponseStream = response.GetResponseStream();
                StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));

                //返回內容
                string retString = myStreamReader.ReadToEnd();
                return retString;
            }
        }

        static void Main(string[] args)
        {

            String domain = "http://api.xiaocongjisuan.com/";
            domain = "http://127.0.0.1:8080/xiaocongjisuan/";
            String servlet = "data/contentsimilarity/analysis";
            String method = "get";
            String url = domain + servlet;


            var parameters = new Dictionary<string,object>();

            parameters.Add("appKey", appKey);
            parameters.Add("openId", openId);

            //變動部分
            parameters.Add("content1", "我是最可愛的小夥子");
            parameters.Add("content2", "我是最漂亮的小姑娘");

            string result = requestContent(url, parameters, method);
            Console.WriteLine(result);
            Console.Read();

        }

    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章