系列文章:
- 从零学Elasticsearch系列——基础概念
- 从零学Elasticsearch系列——环境搭建
- 从零学Elasticsearch系列——使用kibana实现ES基本的操作
- 从零学Elasticsearch系列——深入搜索(Query、Filter、Aggregation)
- 从零学Elasticsearch系列——JAVA API操作
- 从零学Elasticsearch系列——集成中文分词器IK
- 从零学Elasticsearch系列——构建ES集群
- 从零学Elasticsearch系列——搭建ELK Nginx日志分析平台
参考资料:https://github.com/medcl/elasticsearch-analysis-ik
安装
[es@localhost root]$ cd /usr/elasticsearch-6.4.0/
[es@localhost elasticsearch-6.4.0]$ ./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.4.0/elasticsearch-analysis-ik-6.4.0.zip
-> Downloading https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.4.0/elasticsearch-analysis-ik-6.4.0.zip
[=================================================] 100%
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@ WARNING: plugin requires additional permissions @
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
* java.net.SocketPermission * connect,resolve
See http://docs.oracle.com/javase/8/docs/technotes/guides/security/permissions.html
for descriptions of what these permissions allow and the associated risks.
Continue with installation? [y/N]y
-> Installed analysis-ik
[es@localhost elasticsearch-6.4.0]$ ll plugins/
total 0
drwxr-xr-x. 2 es es 229 Jan 3 10:04 analysis-ik
# 重新启动es的服务
[root@localhost ~]# jps
2673 Elasticsearch
46151 Jps
40921 PluginCli
[root@localhost ~]# kill -9 2673
[root@localhost ~]# cd /usr/elasticsearch-6.4.0/
[root@localhost elasticsearch-6.4.0]# su es
[es@localhost elasticsearch-6.4.0]$ bin/elasticsearch
测试
创建测试索引
PUT /news
创建类型映射
POST /news/international/_mapping
{
"properties": {
"content":{
"type": "text",
"analyzer": "ik_max_word", # 会将文本做最细粒度的拆分
"search_analyzer": "ik_max_word"
}
}
}
插入测试数据
POST /news/international/_bulk
{"index":{"_id":1}}
{"content":"美国留给伊拉克的是个烂摊子吗"}
{"index":{"_id":2}}
{"content":"公安部:各地校车将享最高路权"}
{"index":{"_id":3}}
{"content":"中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"}
{"index":{"_id":4}}
{"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}
根据关键词高亮查询
GET /news/international/_search
{
"query": {
"match": {
"content": "中国"
}
},
"highlight": {
"fields": {"content": {}}
}
}
-------------------------------------------------------------------
{
"took": 177,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.6489038,
"hits": [
{
"_index": "news",
"_type": "international",
"_id": "4",
"_score": 0.6489038,
"_source": {
"content": "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"
},
"highlight": {
"content": [
"<em>中国</em>驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"
]
}
},
{
"_index": "news",
"_type": "international",
"_id": "3",
"_score": 0.2876821,
"_source": {
"content": "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"
},
"highlight": {
"content": [
"中韩渔警冲突调查:韩警平均每天扣1艘<em>中国</em>渔船"
]
}
}
]
}
}
IK词典配置
[root@localhost analysis-ik]# vim /usr/elasticsearch-6.4.0/config/analysis-ik/IKAnalyzer.cfg.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict">mydict.dic;</entry>
<!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords"></entry>
<!--用户可以在这里配置远程扩展字典 -->
<!--<entry key="remote_ext_dict">location</entry>-->
<!--用户可以在这里配置远程扩展停止词字典-->
<!--<entry key="remote_ext_stopwords">http://xxx.com/xxx.dic</entry>-->
</properties>
自定义扩展词测试
POST /_analyze
{
"analyzer": "ik_max_word",
"text": ["抖音视频真的好火啊"]
}
# 未添加扩展词 效果如下
---------------------------------------------------
{
"tokens": [
{
"token": "抖",
"start_offset": 0,
"end_offset": 1,
"type": "CN_CHAR",
"position": 0
},
{
"token": "音视频",
"start_offset": 1,
"end_offset": 4,
"type": "CN_WORD",
"position": 1
},
......
]
}
# 添加扩展词 效果如下
---------------------------------------------------
{
"tokens": [
{
"token": "抖音",
"start_offset": 0,
"end_offset": 2,
"type": "CN_WORD",
"position": 0
},
......
{
"token": "腰子姐",
"start_offset": 17,
"end_offset": 20,
"type": "CN_WORD",
"position": 8
}
]
}