環(huán)境
分詞插件
1. 中文: https://github.com/medcl/elasticsearch-analysis-ik/releases/
2. 拼音: https://github.com/medcl/elasticsearch-analysis-pinyin/releases/
3. 簡體/繁體轉(zhuǎn)換:https://github.com/medcl/elasticsearch-analysis-stconvert/releases/
安裝插件
1. 下載es對應(yīng)版本的插件族操,解壓苛坚,copy to ES_HOME/plugins
2. restart es
ik 分詞器使用
GET /_analyze
{
"text":"中華人民共和國國徽",
"analyzer":"ik_smart"
}
GET /_analyze
{
"text":"中華人民共和國國徽",
"analyzer":"ik_max_word"
}
pinyin分詞器使用
GET /_analyze
{
"text": "劉德華",
"analyzer": "pinyin"
}
簡體/繁體轉(zhuǎn)換 分詞
PUT /stconvert/
{
"settings" : {
"analysis" : {
"analyzer" : {
"tsconvert" : {
"tokenizer" : "tsconvert"
}
},
"tokenizer" : {
"tsconvert" : {
"type" : "stconvert",
"delimiter" : "#",
"keep_both" : false,
"convert_type" : "t2s"
}
},
"filter": {
"tsconvert" : {
"type" : "stconvert",
"delimiter" : "#",
"keep_both" : false,
"convert_type" : "t2s"
}
},
"char_filter" : {
"tsconvert" : {
"type" : "stconvert",
"convert_type" : "t2s"
}
}
}
}
}
GET stconvert/_analyze
{
"tokenizer" : "keyword",
"filter" : ["lowercase"],
"char_filter" : ["tsconvert"],
"text" : "國際國際"
}
ik分詞 和 pinyin分詞 組合使用
PUT /my_index
{
"settings": {
"analysis": {
"analyzer": {
"ik_smart_pinyin": {
"type": "custom",
"tokenizer": "ik_smart",
"filter": ["my_pinyin", "word_delimiter"]
},
"ik_max_word_pinyin": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": ["my_pinyin", "word_delimiter"]
}
},
"filter": {
"my_pinyin": {
"type" : "pinyin",
"keep_separate_first_letter" : true,
"keep_full_pinyin" : true,
"keep_original" : true,
"limit_first_letter_length" : 16,
"lowercase" : true,
"remove_duplicated_term" : true
}
}
}
}
}
PUT /my_index/my_type/_mapping
{
"my_type":{
"properties": {
"id":{
"type": "integer"
},
"name":{
"type": "text",
"analyzer": "ik_smart_pinyin"
}
}
}
}
POST /my_index/my_type/_bulk
{ "index": { "_id":1}}
{ "name": "張三"}
{ "index": { "_id": 2}}
{ "name": "張四"}
{ "index": { "_id": 3}}
{ "name": "李四"}
GET /my_index/my_type/_search
{
"query": {
"match": {
"name": "zhang"
}
}
}
GET /my_index/my_type/_search
{
"query": {
"match_phrase": {
"name": "zs"
}
}
}