一、Lucene介绍
1.1功能介绍
Lucene是apache下的一个开源的全文检索引擎工具包。
全文检索就是先分词创建索引,再执行搜索的过程。
分词:就是将一段文字分成一个个单词
全文检索就将一段文字分成一个个单词去查询数据!!!
1.2Lucene实现全文检索的流程
全文检索的流程分为两大部分:索引流程、搜索流程。
索引流程:采集数据--->构建文档对象--->创建索引(将文档写入索引库)。
搜索流程:创建查询--->执行搜索--->渲染搜索结果。
二、配置
导入4个包,最后一个为MySQL包,因为此次数据是从MySQL数据库读取出来再由lucene处理
核心包:lucene-core-4.10.3.jar
分析器通用包:lucene-analyzers-common-4.10.3.jar
查询解析器包:lucene-queryparser-4.10.3.jar
mysql5.1驱动包:mysql-connector-java-5.1.7-bin.jar
三、使用
3.1
此步骤主要却奥程序能从数据库中读取数据,这里使用JDBC技术
创建如下4个类,UBook是模版,BookDao存放与数据库数据交换程序,DBUtil存放数据库连接信息,Text是我们的测试程序
package util;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
public class DButil {
public static Connection getConnection() throws SQLException {
//1.驱动
String drive = "org.gjt.mm.mysql.Driver";
//2.URL
String url = "jdbc:mysql://localhost:3306/lucenet";
//3.username
String user = "root";
//4.password
String password = "1234";
try {
Class.forName(drive);
return DriverManager.getConnection(url, user, password);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
}
package dao;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import pdjo.UBook;
import util.DButil;
public class BookDao {
public List<UBook> getAll() throws SQLException {
List<UBook> books = new ArrayList<>();
//1.获得连接
Connection connection = DButil.getConnection();
//2.获得操作对象
Statement statement = connection.createStatement();
//3.处理查询
ResultSet rs =statement.executeQuery("select * from book");
while (rs.next()) {
UBook uBook = new UBook();
uBook.setBookid(rs.getInt("id"));
uBook.setName(rs.getString("name"));
uBook.setPic(rs.getString("pic"));
uBook.setPrice(rs.getFloat("price"));
uBook.setDescription(rs.getString("description"));
//将查询到的ubook放入集合中
books.add(uBook);
}
rs.close();
statement.close();
connection.close();
return books;
}
public List<Document> getDocument(List<UBook> books) {
//创建Document对象及其集合
List<Document> docList = new ArrayList<>();
Document doc = null;
for (UBook book : books) {
//Document创建对象
doc = new Document();
Field id =new TextField("id", book.getBookid().toString(), Store.YES);
Field name = new TextField("name", book.getName().toString(), Store.YES);
Field price =new TextField("price", book.getPrice().toString(), Store.YES);
Field pic = new TextField("pic", book.getPic().toString(), Store.YES);
Field description = new TextField("description", book.getDescription().toString(), Store.YES);
doc.add(id);
doc.add(name);
doc.add(price);
doc.add(pic);
doc.add(description);
docList.add(doc);
}
return docList;
}
}
package pdjo;
public class UBook {
private Integer bookid;// 图书ID
private String name ; // 图书名称
private Float price; // 图书价格
private String pic; // 图书图片
private String description;// 图书描述
public Integer getBookid() {
return bookid;
}
public void setBookid(Integer bookid) {
this.bookid = bookid;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Float getPrice() {
return price;
}
public void setPrice(Float price) {
this.price = price;
}
public String getPic() {
return pic;
}
public void setPic(String pic) {
this.pic = pic;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public UBook(Integer bookid, String name, Float price, String pic, String description) {
super();
this.bookid = bookid;
this.name = name;
this.price = price;
this.pic = pic;
this.description = description;
}
public UBook() {
super();
// TODO Auto-generated constructor stub
}
@Override
public String toString() {
return "UBook [bookid=" + bookid + ", name=" + name + ", price=" + price + ", pic=" + pic + ", description="
+ description + "]";
}
}
package textpage;
import static org.junit.Assert.*;
import java.io.File;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import dao.BookDao;
import pdjo.UBook;
public class Text {
@Test
public void testName() throws Exception {
BookDao bookDao = new BookDao();
List<UBook> books = bookDao.getAll();
for (UBook uBook : books) {
System.out.println("id="+uBook.getBookid()+"name="+uBook.getName());
}
}
//创建索引库
@Test
public void testLcene() throws Exception {
BookDao dao = new BookDao();
//分析文档,对文档中的field域进行分词
Analyzer analyzer = new StandardAnalyzer();
//创建索引
//1)创建索引库目录
Directory directory = FSDirectory.open(new File("C:\\lucene\\123"));
//2创建IndexWriterConfig对象
IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
//3.创建IndexWriter对象
IndexWriter writer = new IndexWriter(directory, config);
//4.通过IndexWriter对象添加文档对象
writer.addDocuments(dao.getDocument(dao.getAll()));
//5.关闭IndexWriter
writer.close();
System.out.println("索引库创建成功");
}
//执行搜索
@Test
public void testLucene1() throws Exception {
//1.创建查询(Query对象,即构建查询对象)
//创建分析器
Analyzer analyzer = new StandardAnalyzer();
QueryParser queryParser = new QueryParser("name", analyzer);
Query query = queryParser.parse("name:lucene");
//2.读取索引库
//2.1指定搜索目录directory
Directory directory = FSDirectory.open(new File("C:\\lucene\\123"));
//2.2读取索引
IndexReader reader = DirectoryReader.open(directory);
//2.3索引库查询
IndexSearcher searcher = new IndexSearcher(reader);
//3.获得记录
//3.1参数一,查询对象;参数二,指定返回的最大记录
TopDocs topDocs = searcher.search(query, 10);
//获得返回记录
ScoreDoc [] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
//获得id
int docID = scoreDoc.doc;
Document doc = searcher.doc(docID);
System.out.println("索引ID"+docID);
System.out.println("编号"+doc.get("id"));
System.out.println("名称"+doc.get("name"));
System.out.println("价格"+doc.get("price"));
System.out.println("图片"+doc.get("pic"));
}
reader.close();
}
}
最后一个代码段为Text程序
数据库内容
第一个程序为测试程序,测试与数据库连接,成功连接输出如下(根据数据库中数据不同而不同)
第二个为创建索引库程序,生成如下索引库
第三个程序为索引,即索引name中包含lucene字段的
lucene只会断英文单词,对于汉字,只会一个字一个字断开并搜索