再次强调,我安装的Elasticsearch 版本是 7.8.0 ,C# 操作 Elasticsearch 的驱动有 NEST、Elasticsearch.net 、PlainElastic.Net 等,当然要注意版本是否匹配,
PlainElastic.Net 是比较旧的操作 Elasticsearch 的方式 ,但是看到不少园友用这个,所以本篇也用 PlainElastic.Net ,稍后有时间会给出 NEST 、 Elasticsearch.net 的 Demo
PlainElastic.Net 的参考文档、使用方法可以看 https://github.com/Yegoroff/PlainElastic.Net
完整的 Demo 见 https://github.com/fhrddx/ES_Query ,前端 UI 框架是 aceadmin ,见 http://ace.jeka.by/
先看一下效果图
1、首先是引用 PlainElastic.Net ,然后封装 ElasticSearchHelper
public class ElasticSearchHelper { public static readonly ElasticSearchHelper Intance = new ElasticSearchHelper(); private ElasticConnection Client; private ElasticSearchHelper() { Client = new ElasticConnection("localhost", 9200); } }
2、在 ElasticSearchHelper 设定字段mapping 并生成索引库 db_student_test1
public bool BuildStudentMapping() { var mapping = new { mappings = new { properties = new { name = new { type = "text", analyzer = "standard" }, school = new { type = "text", analyzer = "ik_max_word" }, desc = new { type = "text", analyzer = "ik_max_word" }, @class = new { type = "integer" }, chinese = new { type = "integer" }, english = new { type = "integer" }, math = new { type = "integer" } } } }; string jsonDocument = new JsonNetSerializer().Serialize(mapping); OperationResult operationResult = Client.Put("db_student_test1", jsonDocument); CommandResult result = new JsonNetSerializer().ToCommandResult(operationResult.Result); if (result?.acknowledged != null) return result.acknowledged; return false; }
3、在 ElasticSearchHelper中,给与索引库 db_student_test1 一个别名 student_test1
public bool Alias() { OperationResult operationResult = Client.Put("db_student_test1/_alias/student_test1"); CommandResult result = new JsonNetSerializer().ToCommandResult(operationResult.Result); if (result?.acknowledged != null) return result.acknowledged; return false; }
4、创建索引文档
public IndexResult CreateIndex(string indexName, string id, string jsonDocument) { var serializer = new JsonNetSerializer(); //注意ES版本是8.7.0,type只能是默认的、唯一的 _doc string cmd = new IndexCommand(indexName, "_doc", id); Client.Timeout = 30000; OperationResult result = Client.Put(cmd, jsonDocument); var indexResult = serializer.ToIndexResult(result.Result); return indexResult; } public IndexResult CreateIndex(string indexName, string id, object document) { var serializer = new JsonNetSerializer(); var jsonDocument = serializer.Serialize(document); return CreateIndex(indexName, id, jsonDocument); }
5、随机生成测试数据,网上下载一篇txt的长篇小说,随机截取字段,然后写入Elasticsearch
测试数据如下:
public class TestData { public static string xing = @"王李张刘陈杨赵黄周吴徐孙胡朱高林何郭马罗梁宋郑谢韩唐冯于董萧程曹袁邓许傅沈曾彭吕苏卢蒋蔡贾丁魏薛叶阎余潘杜戴夏钟汪田任姜范方石姚谭廖邹熊金陆郝孔白崔康毛邱秦江史顾侯邵孟龙万段雷钱汤尹黎易常武乔贺赖龚文庞樊兰殷施陶洪翟安颜倪严牛温芦季俞章鲁葛伍韦申尤毕聂丛焦向柳邢路岳齐沿梅莫庄辛管祝左涂谷祁时舒耿牟卜肖詹关苗凌费纪靳盛童欧甄项曲成游阳裴席卫查屈鲍位覃霍翁隋植甘景薄单包司柏宁柯阮桂闵欧阳解强柴华车冉房边净阴闫佘练骆付代麦容悲初瞿褚班全名井米谈宫虞奚佟符蒲穆漆卞东储党从艾苻厉岑燕吉冷仇伊首郁娄楚邝历狄简胥连帅封危支原滕苑信索栗官沙池藏师国巩刁茅杭巫居窦皮戈麻饶习巴旷宗荆荣孝蔺廉员西寇刃见底区郦卓琚续朴蒙敖花应喻冀尚顿菅嵇雒弓忻权谌卿扈海冼伦鹿宿山桑裘达么智宣尉迟东方幺郎农戚屠楼步鞠仲尉蓝招攀栾籍寿邬荚税逄加勾由福缑钦鲜于但邸逢况鄢古乐斯钮盖旦毅邰哈鄂商英迟仝亓玄黑腾晏禹诸苟湛殳亢奉占闻粟种匡宾劳申屠伏过水真宇巢计羌相辜展丑银丰矫上昝绳臧舍郅布糜乌衣来恒那满门司徒皋旺公言藤释尧缪干阚靖渠契晋六束良鹗贝邴沃竺扬励归上官荃焉多都果郜隆诸葛令狐慕礼祖翦力朗撖修呼富明站虢冶茹禚笪云肇平弋候尔姬宝畅冒邾延禅浦敬颉南巍补"; public static string name = @"帆栋祜权锟坤允骞谛初盛炳初泽荣喆恒鹤礼华帝宇中鑫彬槐禧允翱鹏皓中伟炳皓槐帆芃欣鑫振杰诚锟潍吉轩福宇初柏芃翰浩峰延帆欣帆奇郁烁卓仕吉帝潍钊杰鑫星谛鑫铭锋沛芃泽禄勇峰欣延鹤郁信侠翰邦寅轩泽哲佑福翱恒文枫澄栋翰中震杞斌凯锦升逸延腾谛权盛弘烁俊强博禄中欣权浩阳裕延盛平畅沛吉强骏起华炳腾柏佑畅杰凯鸿斌加振晨沛祥祜盛濡彬成弘天福锦颖嘉茜芸格美漫慧漫妍钰琪玥沛玥鑫洁岚采曼珍雪昕婷碧弦雪洁馨昕香弦帆芳菲楠俊月珊函蔚帆灵灵莲优蔚碧文蕾娅林婧妮婷薇馨淑惠杉美栀怡薇琪曦云漫瑶韵楠妮颖妮杉媛诗芳菲锦锦蕾芸欢珍岚鹤莉优云舒舒璇慧依菡雅妍楠雅慧灵阳漫珠帆媛可雅欣鑫妮雯霞柔芳芝琳彩冰林媛柔初倩玉冰薇洁妍洁璐采彩颖呈雪云欢琪璟紫静蓓薇欢薇柔晨萱云歆鑫月阳娅媛露露琳"; public static string[] school = new string[] { "中山大学", "暨南大学", "汕头大学", "华南理工大学", "华南农业大学", "广东海洋大学", "广州医科大学", "广州中医药大学", "华南师范大学", "韶关学院", "深圳大学", "广东财经大学", "广东工业大学", "东莞理工学院", "南方科技大学", "香港中文大学", "广州商学院", "上海交通大学", "同济大学", "复旦大学", "上海大学", "上海财经大学", "北京大学", "清华大学", "北京邮电大学", "中国人民大学", "北京理工大学" }; public static string content = @"第一回 甄士隐梦幻识通灵 贾雨村风尘怀闺秀() 此开卷第一回也.作者自云:因曾历过一番梦幻之后,故将真事隐去,而借”通灵”之说,撰此《石头记》一书也.故曰”甄士隐”云云.但书所记何事何人?自又云:“今风尘碌碌,一事无成,忽念及当日所有之女子,一一细考较去,觉其行止见识,皆出于我之上.何我堂堂须眉,诚不若彼裙钗哉?实愧则有余,悔又无益之大无可如何之日也!当此,则自欲将已往所赖天恩祖德,锦衣纨绔之时,饫甘餍肥之日,背父兄教育之恩,负师友规谈之德,以至今日一技无成,半生潦倒之罪,编述一集,以告天下人:我之罪固不免,然闺阁本自历历有人,万不可因我之不肖,自护己短,一并使其泯灭也.虽今日之茅椽蓬牖,瓦灶绳床,其晨夕风露,阶柳庭花,亦未有妨我之襟怀笔墨者.虽我未学,下笔无,又何妨用假语村言,敷演出一段故事来,亦可使闺阁昭传,复可悦世之目,破人愁闷,不亦宜乎?”故曰”贾雨村”云云. ...... ......
由来同一梦,休笑世人痴!"; }
随机生成学生记录的代码如下:
int xing_length = TestData.xing.Length; int name_length = TestData.name.Length; int school_length = TestData.school.Length; int content_length = TestData.content.Length; ParallelOptions _po = new ParallelOptions(); _po.MaxDegreeOfParallelism = 4; Parallel.For(0, 100000000, _po, c => { Random r = new Random(c); Random r2 = new Random(); try { string desc = TestData.content.Substring((r.Next(0, content_length - 700)), 20).Trim().Replace("/r/n", string.Empty); Student model = new Student() { name = TestData.xing[r.Next(0, xing_length)].ToString() + TestData.name.Substring(r.Next(0, name_length / 2) * 2, 2), school = TestData.school[r.Next(0, school_length)], chinese = r.Next(25, 80) + r2.Next(0, 20), math = r.Next(15, 60) + r2.Next(0, 40), english = r.Next(21, 70) + r2.Next(0, 30), @class = c, desc = desc + TestData.school[r2.Next(0, school_length)] }; ElasticSearchHelper.Intance.CreateIndex("db_student_test1", Guid.NewGuid().ToString(), model); } catch (Exception ex) { Console.Write(ex.ToString()); } });
我总共跑了大概是5千万条数据,可以打开 head 插件或者是 Kibana 看到数据总共有多少
Kibana 如下
6、单个词语查询,例如查询满足以下条件的文档:(1) desc 包含 “黛玉” ;(2)chinese、math、english 都大于90;(3)分页,取前10条记录;(4)关键词高亮;(5)排序按照语文、数学、英语倒序
controller 代码如下
public ActionResult Index(string key = "黛玉") { Stopwatch sw = new Stopwatch(); sw.Restart(); var model = ElasticSearchHelper.Intance.Term(key.Trim(), 0, 10); sw.Stop(); ViewBag.Message = $"共耗时{sw.ElapsedMilliseconds}毫秒"; return View(model); }
ElasticsearchHelper 代码如下
public ElasticsearchResult<Student> Term(string key, int from = 0, int size = 10) { if (string.IsNullOrEmpty(key)) return null; key = key.Trim(); string cmd = new SearchCommand("student_test1", "_doc"); var query = new QueryBuilder<Student>().Query( b => b.Bool(m => m.Must(t => t.Term(d => d.Field("desc").Value(key)) .Range(d => d.Field("chinese").From("90").To("100")) .Range(d => d.Field("math").Gt("90")) .Range(d => d.Field("english").Gt("90")) ) ) ) .From(from) .Size(size) .Sort(s => s.Field("chinese", SortDirection.desc).Field("math", SortDirection.desc).Field("english", SortDirection.desc)) .Highlight(h => h .PreTags("<span class=\"label label-sm label-danger\">") .PostTags("</span>") .Fields( f => f.FieldName("desc").Order(HighlightOrder.score) ) ) .Build(); string result = Client.Post(cmd, query); var list = new JsonNetSerializer().Deserialize<ElasticsearchResult<Student>>(result); return list; }
效果是
7、语句匹配查询,关键词是“黛玉”,当然是可以查出来,但是如果用户知道红楼梦有个情节,是关于大观园里众人举办螃蟹宴,作诗玩乐的,想把相关文段查询出来,这时用户输入的关键词是 “宝玉黛玉螃蟹宴作诗” ,这时候会查询出什么呢?
首先改一下逻辑,controller 代码为
public ActionResult Query(string key = "宝玉黛玉螃蟹宴作诗") { Stopwatch sw = new Stopwatch(); sw.Restart(); var model = ElasticSearchHelper.Intance.Query(key.Trim(), 0, 10); sw.Stop(); ViewBag.Message = $"共耗时{sw.ElapsedMilliseconds}毫秒"; return View("~/Views/ES/Index.cshtml", model); }
ElasticsearchHelper 代码如下
public ElasticsearchResult<Student> Query(string key, int from = 0, int size = 10) { if (string.IsNullOrEmpty(key)) return null; key = key.Trim(); string cmd = new SearchCommand("student_test1", "_doc"); var query = new QueryBuilder<Student>().Query( b => b.Bool(m => m.Must(t => //其实也是可以用 t.match() 的,可以试一下 t.QueryString(d => d.DefaultField("desc").Query(key)) .Range(d => d.Field("chinese").From("90").To("100")) .Range(d => d.Field("math").Gt("90")) .Range(d => d.Field("english").Gt("90")) ) ) ) .From(from) .Size(size) //这里不再按照分数来排序,这时ES会根据关键词匹配度来排序,出现在最前的,应该是最匹配的 //.Sort(s => s.Field("chinese", SortDirection.desc).Field("math", SortDirection.desc).Field("english", SortDirection.desc)) .Highlight(h => h .PreTags("<span class=\"label label-sm label-danger\">") .PostTags("</span>") .Fields( f => f.FieldName("desc").Order(HighlightOrder.score) ) ) .Build(); string result = Client.Post(cmd, query); var list = new JsonNetSerializer().Deserialize<ElasticsearchResult<Student>>(result); return list; }
查询结果是
可以看到,ES 会把 “宝玉黛玉螃蟹宴作诗” 进行分词,然后进行文本匹配。
PlainElastic.Net 是比较旧的版本了,不是很适合ES版本 7.8.0 可以用 NEST 或者 Elasticsearch.net ,我这里是参考一些园友,然后写个Demo出来测试一下。
(未完,ES 聚合统计的,待续)