ElasticSearch學習筆記-ngram、中文拼音、簡繁體搜索記錄

ElasticSearch版本:elasticsearch-7.3.0

ElasticSearch相關插件安裝可以參考:

https://blog.csdn.net/fighting_one_piece/article/details/100183135

ElasticSearch服務上創建Mapping

curl -H "Content-Type: application/json" -XPUT 'http://192.168.0.1:9200/book_v2/' -d '
{
  "settings": {
    "analysis": {
      "analyzer": {
        "pinyin_analyzer_1": {
          "tokenizer": "pinyin_tokenizer_1"
        },
        "pinyin_analyzer_2": {
          "tokenizer": "pinyin_tokenizer_2"
        },
        "tsconvert": {
          "tokenizer": "tsconvert_tokenizer",
          "filter": [
            "tsconvert_filter"
          ]
        },
        "autocomplete_analyzer": {
          "type": "custom",
          "tokenizer": "ik_smart",
          "filter": [
            "autocomplete_filter"
          ]
        },
        "custom_analyzer_1": {
          "type": "custom",
          "tokenizer": "ik_smart",
          "filter": [
            "cus_pinyin_filter_1"
          ]
        },
        "custom_analyzer_2": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "filter": [
            "stconvert_filter"
          ]
        },
        "custom_analyzer_3": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "filter": [
            "tsconvert_filter"
          ]
        }
      },
      "tokenizer": {
        "pinyin_tokenizer_1": {
          "type": "pinyin",
          "keep_separate_first_letter": false,
          "keep_full_pinyin": true,
          "keep_original": true,
          "limit_first_letter_length": 16,
          "lowercase": true,
          "remove_duplicated_term": true
        },
        "pinyin_tokenizer_2": {
          "type": "pinyin",
          "keep_separate_first_letter": true,
          "keep_full_pinyin": false
        },
        "tsconvert_tokenizer": {
          "type": "stconvert",
          "delimiter": "#",
          "keep_both": false,
          "convert_type": "t2s"
        }
      },
      "filter": {
      	"stconvert_filter": {
          "type": "stconvert",
          "delimiter": "#",
          "keep_both": false,
          "convert_type": "s2t"
        },
        "tsconvert_filter": {
          "type": "stconvert",
          "delimiter": "#",
          "keep_both": false,
          "convert_type": "t2s"
        },
        "autocomplete_filter": { 
            "type": "edge_ngram",
            "min_gram": 1,
            "max_gram": 10
        },
        "cus_pinyin_filter_1": {
          "type": "pinyin",
          "keep_joined_full_pinyin": true,
          "keep_separate_first_letter": true,
          "none_chinese_pinyin_tokenize": true
        }
      },
      "char_filter": {
        "tsconvert": {
          "type": "stconvert",
          "convert_type": "t2s"
        }
      }
    }
  },
  "mappings": {
    "dynamic": "strict",
    "_source": {
      "excludes": [
        "id"
      ]
    },
    "properties": {
      "id": {
        "type": "keyword"
      },
      "author": {
        "analyzer": "ik_max_word",
        "type": "text",
        "fields": {
          "raw": {
            "type": "keyword"
          },
          "pinyin0": {
            "analyzer": "pinyin",
            "type": "text"
          },
          "pinyin1": {
            "analyzer": "pinyin_analyzer_1",
            "type": "text"
          },
          "pinyin2": {
            "analyzer": "pinyin_analyzer_2",
            "type": "text"
          },
          "s2t": {
            "analyzer": "stconvert",
            "type": "text"
          },
          "t2s": {
            "analyzer": "tsconvert",
            "type": "text"
          },
          "ac": {
            "analyzer": "autocomplete_analyzer",
            "type": "text"
          },
          "cus1": {
            "analyzer": "custom_analyzer_1",
            "type": "text"
          },
          "cus2": {
            "analyzer": "custom_analyzer_2",
            "type": "text"
          },
          "cus3": {
            "analyzer": "custom_analyzer_3",
            "type": "text"
          }
        }
      },
      "title": {
        "analyzer": "ik_max_word",
        "type": "text"
      },
      "describe": {
        "analyzer": "ik_smart",
        "type": "text"
      },
      "publish_time": {
        "type": "date",
        "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
      }
    }
  }
}
'

ElasticSearch中新增數據

curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/book_v2/_doc/1' -d '
{
	"id": "1",
    "author": "張三丰123",
    "title": "太極拳三十天入門到精通123",
    "describe": "手把手、一對一教學、三十天入門到精通,包教包會",
    "publish_time": "2019-08-22 17:48:16"
}
'

curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/book_v2/_doc/2' -d '
{
	"id": "2",
    "author": "張三丰abc",
    "title": "太極拳三十天入門到精通abc",
    "describe": "手把手、一對一教學、三十天入門到精通,包教包會",
    "publish_time": "2019-08-22 17:48:16"
}
'

curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/book_v2/_doc/3' -d '
{
	"id": "3",
    "author": "張三豐",
    "title": "太極拳三十天入門到精通",
    "describe": "手把手、一對一教學、三十天入門到精通,包教包會,龍馬精神",
    "publish_time": "2019-08-22 17:48:16"
}
'

驗證相關分詞器

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_analyze?pretty=true' -d '
{
	"analyzer": "pinyin",
	"text": "張三丰"
}
'
"zhang","zsf","san","feng"

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_analyze?pretty=true' -d '
{
	"analyzer": "pinyin_analyzer_1",
	"text": "張三丰"
}
'
"張三丰","zhang","zsf","san","feng"


curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_analyze?pretty=true' -d '
{
	"analyzer": "pinyin_analyzer_2",
	"text": "張三丰"
}
'
"z","zsf","s","f"

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_analyze?pretty=true' -d '
{
	"analyzer": "stconvert",
	"text": "張三丰"
}
'
"張三豐"

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_analyze?pretty=true' -d '
{
	"analyzer": "tsconvert",
	"text": "張三豐"
}
'
"張三丰"

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_analyze?pretty=true' -d '
{
	"analyzer": "autocomplete_analyzer",
	"text": "張三丰"
}
'
"張","張三","張三丰"

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_analyze?pretty=true' -d '
{
	"analyzer": "custom_analyzer_1",
	"text": "張三丰"
}
'
"z","zhang","zhangsanfeng","zsf","s","san","f","feng"

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_analyze?pretty=true' -d '
{
	"analyzer": "custom_analyzer_2",
	"text": "張三丰"
}
'
"張三豐","張三","三","豐"

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_analyze?pretty=true' -d '
{
	"analyzer": "custom_analyzer_3",
	"text": "張三豐"
}
'
"張","三","豐"

驗證查詢

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_search?pretty=true' -d '
{
	"query": {
		"match": {
			"author": "張三丰"
		}
	},
	"from": 0,
    "size": 10,
    "highlight": {
        "pre_tags": ["<span style=\"color:red\">"],
        "post_tags": ["</span>"],
        "fields": {
            "author": {}
        }
    }
}
'

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_search?pretty=true' -d '
{
	"query": {
		"match_phrase": {
			"author.t2s": "張三丰"
		}
	}
}
'

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_search?pretty=true' -d '
{
	"query": {
		"match_phrase": {
			"author.pinyin0": "zhang"
		}
	}
}
'

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_search?pretty=true' -d '
{
	"query": {
		"match_phrase": {
			"author.pinyin1": "zhang"
		}
	}
}
'

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_search?pretty=true' -d '
{
	"query": {
		"match_phrase": {
			"author.pinyin1": "張三丰"
		}
	}
}
'

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_search?pretty=true' -d '
{
	"query": {
		"match_phrase": {
			"author.pinyin2": "zsf"
		}
	}
}
'

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_search?pretty=true' -d '
{
	"query": {
		"match_phrase": {
			"author.cus1": "zsf"
		}
	}
}
'

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_search?pretty=true' -d '
{
	"query": {
		"match_phrase": {
			"author.cus2": "豐"
		}
	}
}
'

curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/book_v2/_search?pretty=true' -d '
{
	"query": {
		"match_phrase": {
			"author.cus3": "豐"
		}
	}
}
'

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章