之前我們已經(jīng)將索引的創(chuàng)建基本說明了市咽,下面我們看搜索的相關(guān)方法(工程lucene_searcher01
)
一、精確搜素TermQuery
這里我們想給出相關(guān)代碼:
SearcherUtil.java
package cn.itcast.searcher;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public class SearcherUtil {
private Directory directory;
private IndexReader reader;
private String[] ids = { "1", "2", "3", "4", "5", "6" };
private String[] emails = { "aa@qq.com", "bb@sina.edu", "cc@yahu.org",
"ss@sina.com", "dd@gmail.com", "ee@163.com" };
private String[] content = { "welcom to visited the space,I like football",
"hello boy, i like someone", "come on baby", "first blood",
"I like football,I like football",
"my girlfriend is so beatiful, every body like game" };
private int[] attaches = { 2, 5, 6, 5, 8, 4 };
private String[] names = { "tom", "jack", "goudan", "alibaba", "jerry", "kitty" };
private Date[] dates = null;
private Map<String, Float> scores = new HashMap<String, Float>();// 新建一個(gè)Map哩簿,用來存儲(chǔ)權(quán)值
public SearcherUtil(){
directory = new RAMDirectory();
setDates();
index();
}
public IndexSearcher getSearcher(){
try {
if (reader == null) {
reader = IndexReader.open(directory);
}else{
IndexReader tr = IndexReader.openIfChanged(reader);
if(tr != null){
reader.close();
reader = tr;
}
}
return new IndexSearcher(reader);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
//精確匹配查詢
public void searchByTerm(String field, String name, int num){
IndexSearcher searcher = getSearcher();
Query query = new TermQuery(new Term(field, name));
try {
TopDocs tds = searcher.search(query, num);//TopDocs封裝搜索結(jié)果以及 ScoreDoc 的總數(shù)
System.out.println("總共查詢了: " + tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){//ScoreDoc提供對(duì)TopDocs中每條搜索結(jié)果的訪問接口
Document doc = searcher.doc(sd.doc);
System.out.println("id號(hào):" + doc.get("id")
+ ",權(quán)值:"+ doc.getBoost()
+ "酝静,名字:" + doc.get("name")
+ "卡骂,郵箱:" + doc.get("email")
+ ",附件條數(shù):" +doc.get("attach")
+ "形入,日期:" + doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public void index() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
// 此方法可將索引全部清空
writer.deleteAll();
Document document = null;
for (int i = 0; i < ids.length; i++) {
document = new Document();
// id需要存儲(chǔ),不需要加權(quán)缝左、分詞亿遂,email也需要存儲(chǔ),但不需要分詞渺杉,有時(shí)候也需要加權(quán)
// 對(duì)于內(nèi)容蛇数,我們不需要存儲(chǔ)和加權(quán),但需要分詞是越。而名字需要存儲(chǔ)耳舅,不需要分詞和加權(quán)
// 這里我們先不對(duì)整型數(shù)據(jù)進(jìn)行索引,后面再說
document.add(new Field("id", ids[i], Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
document.add(new Field("email", emails[i], Field.Store.YES,
Field.Index.NOT_ANALYZED));
document.add(new Field("content", content[i], Field.Store.NO,
Field.Index.ANALYZED));
document.add(new Field("name", names[i], Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
// 為數(shù)字添加索引倚评,第三個(gè)參數(shù)設(shè)置為true表示默認(rèn)索引
document.add(new NumericField("attach", Field.Store.YES, true)
.setIntValue(attaches[i]));
// 為日期添加索引
document.add(new NumericField("date", Field.Store.YES, true)
.setLongValue(dates[i].getTime()));
String et = emails[i].substring(emails[i].lastIndexOf("@") + 1);
// 加入權(quán)值
if (scores.containsKey(et)) {
document.setBoost(scores.get(et));
} else {
document.setBoost(0.5f);
}
writer.addDocument(document);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (writer != null) {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
// 設(shè)置日期
private void setDates() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
try {
dates = new Date[ids.length];
dates[0] = sdf.parse("2015-02-15");
dates[1] = sdf.parse("2015-03-01");
dates[2] = sdf.parse("2015-05-18");
dates[3] = sdf.parse("2015-09-05");
dates[4] = sdf.parse("2015-12-15");
dates[5] = sdf.parse("2015-08-29");
} catch (ParseException e) {
e.printStackTrace();
}
}
}
TestSearch.java
package cn.lucene.test;
import org.junit.Before;
import org.junit.Test;
import cn.itcast.searcher.SearcherUtil;
public class TestSearch {
private SearcherUtil util;
@Before
public void init(){
util = new SearcherUtil();
}
@Test
public void searchByTerm(){
//util.searchByTerm("name", "Jack", 3);
util.searchByTerm("content", "like", 3);//注意:查詢到的總記錄數(shù)和我們想要顯示的記錄數(shù)沒有關(guān)系
}
}
說明:因?yàn)檫@里我們將索引存放在內(nèi)存中浦徊,所以當(dāng)程序運(yùn)行完也就沒有了,所以不能使用之前的類創(chuàng)建索引天梧,這里我們將創(chuàng)建索引的方法寫在本類中盔性,這里我們使用
Query query = new TermQuery(new Term(field, name));
進(jìn)行精確搜索。注意:查詢到的總記錄數(shù)和我們想要顯示的記錄數(shù)沒有關(guān)系呢岗。
二冕香、范圍查詢 TermRangeQuery
public void searchByTermRang(String field, String start, String end, int num){
IndexSearcher searcher = getSearcher();
//最后兩個(gè)參數(shù)表示起始和結(jié)尾是開區(qū)間還是閉區(qū)間蛹尝,為什么我使用true或者false都無效?都是前閉后開
Query query = new TermRangeQuery(field, start, end, true, true);
try {
TopDocs tds = searcher.search(query, num);
System.out.println("總共查詢了: " + tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println("id號(hào):" + doc.get("id")
+ "悉尾,權(quán)值:"+ doc.getBoost()
+ "突那,名字:" + doc.get("name")
+ ",郵箱:" + doc.get("email")
+ "构眯,附件條數(shù):" +doc.get("attach")
+ "愕难,日期:" + doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
測(cè)試:
@Test
public void searchByTermRang(){
//util.searchByTermRang("id", "1", "3", 10);
//util.searchByTermRang("attach", "1", "3", 10);數(shù)字查詢不到
util.searchByTermRang("name", "a", "j", 10);//注意:這里的范圍區(qū)分大小寫
}
說明:這里是范圍查詢,我們使用
Query query = new TermRangeQuery(field, start, end, true, true);
進(jìn)行查詢鸵赖,給出相關(guān)的域和范圍區(qū)間即可务漩。但是這里的區(qū)間不知道試驗(yàn)時(shí)不起作用。但是這種范圍查詢對(duì)數(shù)字無效它褪。
三饵骨、數(shù)字范圍查詢 NumericRangeQuery
public void searchByNumricRange(String field, int start, int end, int num){
IndexSearcher searcher = getSearcher();
//這里前后都是閉區(qū)間,這里設(shè)置最后兩個(gè)參數(shù)卻有有效了
Query query = NumericRangeQuery.newIntRange(field, start, end, true, true);
try {
TopDocs tds = searcher.search(query, num);
System.out.println("總共查詢了: " + tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println("id號(hào):" + doc.get("id")
+ "茫打,權(quán)值:"+ doc.getBoost()
+ "居触,名字:" + doc.get("name")
+ ",郵箱:" + doc.get("email")
+ "老赤,附件條數(shù):" +doc.get("attach")
+ "轮洋,日期:" + doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
測(cè)試:
@Test
public void searchByNumricRange(){
util.searchByNumricRange("attach", 2, 3, 10);
}
說明:這里我們使用
Query query = NumericRangeQuery.newIntRange(field, start, end, true, true);
進(jìn)行查詢。
四抬旺、前綴搜索 PrefixQuery
public void searchByPrefix(String field, String value, int num){
IndexSearcher searcher = getSearcher();
Query query = new PrefixQuery(new Term(field, value));//value的值通過前綴匹配
try {
TopDocs tds = searcher.search(query, num);
System.out.println("總共查詢了: " + tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println("id號(hào):" + doc.get("id")
+ "弊予,權(quán)值:"+ doc.getBoost()
+ ",名字:" + doc.get("name")
+ "开财,郵箱:" + doc.get("email")
+ "汉柒,附件條數(shù):" +doc.get("attach")
+ ",日期:" + doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
測(cè)試:
@Test
public void searchByPrefix(){
//util.searchByPrefix("name", "j", 10);
util.searchByPrefix("content", "s", 10);
}
說明:這里我們使用
Query query = new PrefixQuery(new Term(field, value));
其中value
就是前綴值责鳍。
五碾褂、通配符搜索 WildcardQuery
public void searchByWildCard(String field, String value, int num){
IndexSearcher searcher = getSearcher();
Query query = new WildcardQuery(new Term(field, value));
try {
TopDocs tds = searcher.search(query, num);
System.out.println("總共查詢了: " + tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println("id號(hào):" + doc.get("id")
+ ",權(quán)值:"+ doc.getBoost()
+ "历葛,名字:" + doc.get("name")
+ "正塌,郵箱:" + doc.get("email")
+ ",附件條數(shù):" +doc.get("attach")
+ "恤溶,日期:" + doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
測(cè)試:
@Test
public void searchByWildCard(){
//*表示任何字符乓诽,?表示一個(gè)字符
//util.searchByWildCard("name", "j?", 10);//搜不出來
util.searchByWildCard("name", "j*", 10);
}
說明:這里我們使用
Query query = new WildcardQuery(new Term(field, value));
其中我們使用?
表示任意一個(gè)字符咒程,而使用*
號(hào)表示任意多個(gè)字符问裕。
六、多條件查詢 BooleanQuery
public void searchByBoolean(int num){
IndexSearcher searcher = getSearcher();
BooleanQuery query = new BooleanQuery();
//下面的查詢表示名字為tom同時(shí)內(nèi)容中有l(wèi)ike的索引
query.add(new TermQuery(new Term("name", "tom")), Occur.MUST);//最后的參數(shù)表示條件必須有孵坚,如果是MUST_NOT表示必須沒有
query.add(new TermQuery(new Term("content", "like")), Occur.MUST);//如果是SHOULD表示可有可無
try {
TopDocs tds = searcher.search(query, num);
System.out.println("總共查詢了: " + tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println("id號(hào):" + doc.get("id")
+ "粮宛,權(quán)值:"+ doc.getBoost()
+ "窥淆,名字:" + doc.get("name")
+ ",郵箱:" + doc.get("email")
+ "巍杈,附件條數(shù):" +doc.get("attach")
+ "忧饭,日期:" + doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
測(cè)試:
@Test
public void searchByBoolean(){
util.searchByBoolean(10);
}
說明:這里我們使用
BooleanQuery query = new BooleanQuery();
//下面的查詢表示名字為tom同時(shí)內(nèi)容中有l(wèi)ike的索引
query.add(new TermQuery(new Term("name", "tom")), Occur.MUST);
query.add(new TermQuery(new Term("content", "like")), Occur.MUST);
這里Occur.MUST
表示必須要有,Occur.MUST_NOT
表示必須沒有筷畦,Occur.SHOULD
表示可有可無词裤。這樣我們就將多個(gè)條件組合起來了。
七鳖宾、短語查詢 PhraseQuery
public void searchByPhrase(int num){
IndexSearcher searcher = getSearcher();
PhraseQuery query = new PhraseQuery();
//比如I like football吼砂,我們查詢I football則表示中間有一跳(一空),參數(shù)值就表示跳數(shù)
query.setSlop(1);
query.add(new Term("content", "i"));//注意:這里的參數(shù)值要小寫鼎文,但是也會(huì)將大寫的查出來
query.add(new Term("content", "football"));
try {
TopDocs tds = searcher.search(query, num);
System.out.println("總共查詢了: " + tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println("id號(hào):" + doc.get("id")
+ "渔肩,權(quán)值:"+ doc.getBoost()
+ ",名字:" + doc.get("name")
+ "拇惋,郵箱:" + doc.get("email")
+ "周偎,附件條數(shù):" +doc.get("attach")
+ ",日期:" + doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
測(cè)試:
@Test
public void searchByPhrase(){
util.searchByPhrase(10);
}
說明:這里我們使用
PhraseQuery query = new PhraseQuery();
query.setSlop(1);
query.add(new Term("content", "i"));
query.add(new Term("content", "football"));
這里我們?cè)诮o值的時(shí)候需要給小寫撑帖,但是會(huì)將小寫和大小匹配的索引都查詢出來蓉坎。當(dāng)然這種查詢很消耗資源,同時(shí)對(duì)中文搜索作用不大胡嘿。
八蛉艾、模糊查詢 FuzzyQuery
public void searchByFuzzy(int num){
IndexSearcher searcher = getSearcher();
//這里我們看到名字雖然寫錯(cuò)了,但是能夠查出來衷敌,默認(rèn)可以匹配一個(gè)字符出錯(cuò)的情況勿侯,這里設(shè)置匹配力(相似度)為0.5<=1,距離為2
FuzzyQuery query = new FuzzyQuery(new Term("name", "tome"), 0.5f, 2);
try {
TopDocs tds = searcher.search(query, num);
System.out.println("總共查詢了: " + tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println("id號(hào):" + doc.get("id")
+ "逢享,權(quán)值:"+ doc.getBoost()
+ ",名字:" + doc.get("name")
+ "吴藻,郵箱:" + doc.get("email")
+ "瞒爬,附件條數(shù):" +doc.get("attach")
+ ",日期:" + doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
測(cè)試:
@Test
public void searchByFuzzy(){
util.searchByFuzzy(10);
}
說明:這里我們使用
FuzzyQuery query = new FuzzyQuery(new Term("name", "tome"), 0.5f, 2);
進(jìn)行查詢沟堡,模糊查詢和通配符查詢不一樣侧但,可以理解為容錯(cuò)查詢,如果不給出后面兩個(gè)參數(shù)航罗,則默認(rèn)可以允許一個(gè)字符出錯(cuò)禀横,當(dāng)然這可以使用后面兩個(gè)參數(shù)進(jìn)行設(shè)定。倒數(shù)第二個(gè)參數(shù)是匹配力粥血,此值小于等于1.0柏锄,默認(rèn)為1.0酿箭。值越小匹配越小,最后一個(gè)參數(shù)表示可以出現(xiàn)錯(cuò)誤的字符個(gè)數(shù)趾娃。此方法用的不多缭嫡,了解即可。
九抬闷、QueryParser
public void searchByQueryParser(Query query, int num){
IndexSearcher searcher = getSearcher();
try {
TopDocs tds = searcher.search(query, num);
System.out.println("總共查詢了: " + tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println("id號(hào):" + doc.get("id")
+ "妇蛀,權(quán)值:"+ doc.getBoost()
+ ",名字:" + doc.get("name")
+ "笤成,郵箱:" + doc.get("email")
+ "评架,附件條數(shù):" +doc.get("attach")
+ ",日期:" + doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
測(cè)試:
@Test
public void searchByQueryParser() throws ParseException{
//1炕泳、創(chuàng)建QueryParser對(duì)象纵诞,默認(rèn)搜索域?yàn)閏ontent
QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
parser.setDefaultOperator(Operator.AND);//改變默認(rèn)空格操作符,但是分開寫還是或操作符
parser.setAllowLeadingWildcard(true);//開啟前綴為通配符的查詢喊崖,默認(rèn)關(guān)閉挣磨,效率較低
//搜索content中包含like的
Query query = parser.parse("like");//空格和分開默認(rèn)表示或
//query = parser.parse("I football");//這就是要查有I或者football的
query = parser.parse("I AND football");//這就是要查有I同時(shí)有football的
//改變搜索域?yàn)閚ame為tom的
query = parser.parse("name:like");
query = parser.parse("name:j*");
query = parser.parse("email:*@sina.com");//這種效率較低,默認(rèn)是關(guān)閉的荤懂,但是可以打開
//這里表示不能有l(wèi)ike而必須有football茁裙,中間要有空格,加減要放在條件前面
query = parser.parse("- name:tom + football");
query = parser.parse("id:[1 TO 3]");//注意是閉區(qū)間节仿,TO必須是大寫
query = parser.parse("id:{1 TO 3}");//注意是開區(qū)間晤锥,不能是半開半閉
query = parser.parse("\"I like football\"");//完全匹配"I like football"
query = parser.parse("\"I football\"~1");//表示I和football之間有一個(gè)空白單詞的
util.searchByQueryParser(query, 10);
}
說明:此類會(huì)將我們傳入的參數(shù)進(jìn)行處理之后再進(jìn)行查詢,首先我們構(gòu)造一個(gè)QueryParser
對(duì)象廊宪,此對(duì)象中第二個(gè)參數(shù)(此處是content
)表示默認(rèn)搜索的域當(dāng)然可以改變矾瘾。當(dāng)我們使用
query = parser.parse("I football");
進(jìn)行查詢時(shí)會(huì)將中間的空格默認(rèn)為“或”,也就是有"I"
或者有"football"
的索引箭启,當(dāng)然我們可以通過
parser.setDefaultOperator(Operator.AND);
將默認(rèn)的“或”設(shè)置為“與”壕翩,此時(shí)就表示查詢既有"I"
也有"football"
的索引,當(dāng)然我們一般不改傅寡,而使用"AND"
來進(jìn)行條件“與”操作放妈。而這里通配符查詢由于效率較低,默認(rèn)是不允許放在首位的荐操,當(dāng)然我們可以使用
parser.setAllowLeadingWildcard(true);
讓其允許芜抒。可以查詢的組合還有很多托启,下面給出:
條件 | 含義 |
---|---|
mike |
默認(rèn)域包含mike
|
mike john 宅倒、mike OR john
|
默認(rèn)域包含mike 或者john
|
+ mike + address:zhaotong 、mike AND address:zhaotong
|
默認(rèn)域包含mike 屯耸,同時(shí)address 是zhaotong 的 |
id:2 |
id 域?yàn)? |
address:Kunming – desc:she 拐迁、address:Kunming AND NOT desc:she
|
address 是Kunming 并且desc 不是she 的 |
(mike OR john) AND address:zhaotong |
默認(rèn)域是mike 或者john 并且address 是zhaotong
|
desc:”she like” |
desc 域是she like
|
j* |
默認(rèn)域是j 開頭 |
johe~ |
模糊搜索johe
|
id:[1 TO 3] |
id 從1到3 |
十蹭劈、分頁搜索
在做分頁查詢的時(shí)候我們需要很多文件,不然看不出效果唠亚。這里我們?cè)诼窂?code>E:/myeclipse/Lucene/somefile/中準(zhǔn)備了一些文件链方,如果文件不夠,我們可以使用下面的方法進(jìn)行拷貝:
TestSearch.java
@Test
public void testCopuFiles(){
File file = new File("E:/myeclipse/Lucene/somefile/");
for(File f : file.listFiles()){
String destFileName = FilenameUtils.getFullPath(f.getAbsolutePath())
+ FilenameUtils.getBaseName(f.getName()) + ".py";
try {
FileUtils.copyFile(f, new File(destFileName));
} catch (IOException e) {
e.printStackTrace();
}
}
}
當(dāng)然這個(gè)方法中我們需要用到依賴包commons-io.jar
灶搜。之后我們不能再使用之前的索引創(chuàng)建方法了祟蚀,因?yàn)橹皠?chuàng)建的索引保存在內(nèi)存中,這里我們需要保存在硬盤上割卖。于是我們給出創(chuàng)建索引的類前酿,其實(shí)在之前我們已經(jīng)寫過了,這里作為復(fù)習(xí):
FileIndexUtil.java
package cn.itcast.searcher;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class FileIndexUtil {
private static Directory directory = null;
static {
try {
directory = FSDirectory.open(new File(
"E:/myeclipse/Lucene/index"));
} catch (IOException e) {
e.printStackTrace();
}
}
public static Directory getDirectory() {
return directory;
}
// 創(chuàng)建索引
public static void index(boolean hasNew) {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
if (hasNew) {
writer.deleteAll();//如果我們要新建索引鹏溯,那么將之前創(chuàng)建的刪除
}
File file = new File("E:/myeclipse/Lucene/somefile");
Document document = null;
for (File f : file.listFiles()) {
document = new Document();
document.add(new Field("content", new FileReader(f)));
document.add(new Field("filename", f.getName(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
document.add(new Field("path", f.getAbsolutePath(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
document.add(new NumericField("date", Field.Store.YES, true)
.setLongValue(f.lastModified()));
// 最后我們將字節(jié)數(shù)轉(zhuǎn)換成kb
document.add(new NumericField("size", Field.Store.YES, true)
.setIntValue((int) (f.length() / 1024)));
writer.addDocument(document);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (writer != null) {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
下面我們看分頁查詢的方法罢维,當(dāng)然這里我們同時(shí)給出不使用分頁查詢的方法作為比較:
// 分頁查詢
public void searchPage(String query, int pageIndex, int pageSize) {
try {
Directory dir = FileIndexUtil.getDirectory();
IndexSearcher searcher = getSearcher(dir);
QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
new StandardAnalyzer(Version.LUCENE_35));
Query q = parser.parse(query);
TopDocs tds = searcher.search(q, 100);//TopDocs封裝搜索結(jié)果以及 ScoreDoc 的總數(shù)
ScoreDoc[] sds = tds.scoreDocs;//ScoreDoc提供對(duì)TopDocs中每條搜索結(jié)果的訪問接口
int start = (pageIndex - 1) * pageSize;
int end = pageIndex * pageSize;
for(int i = start; i < end; i++ ){
Document doc = searcher.doc(sds[i].doc);
System.out.println("id號(hào):" + sds[i].doc + ",路徑:" + doc.get("path") + "丙挽,名稱:" + doc.get("filename"));
}
searcher.close();
} catch (org.apache.lucene.queryParser.ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//不加分頁的查詢
public void searchNoPage(String query) {
try {
Directory dir = FileIndexUtil.getDirectory();
IndexSearcher searcher = getSearcher(dir);
QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
new StandardAnalyzer(Version.LUCENE_35));
Query q = parser.parse(query);
TopDocs tds = searcher.search(q, 100);//TopDocs封裝搜索結(jié)果以及 ScoreDoc 的總數(shù)
ScoreDoc[] sds = tds.scoreDocs;//ScoreDoc提供對(duì)TopDocs中每條搜索結(jié)果的訪問接口
for(int i = 0; i < sds.length; i++ ){
Document doc = searcher.doc(sds[i].doc);
System.out.println("id號(hào):" + sds[i].doc + "肺孵,路徑:" + doc.get("path") + ",名稱:" + doc.get("filename"));
}
searcher.close();
} catch (org.apache.lucene.queryParser.ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
測(cè)試:
@Test
public void searchPage01(){
util.searchPage("java", 1, 10);
System.out.println("*****************");
util.searchPage("java", 2, 10);
System.out.println("*****************");
util.searchNoPage("java");
}
從這里我們從結(jié)果中可以看出分頁查詢的效果颜阐。但是雖然是分頁查詢平窘,但是其實(shí)每次都將給定的查詢條數(shù)的索引全部查詢出來了,效率有點(diǎn)低凳怨。下面看第二種分頁查詢方式:
public void searchPageByAfter(String query, int pageIndex, int pageSize){
try {
Directory dir = FileIndexUtil.getDirectory();
IndexSearcher searcher = getSearcher(dir);
QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
new StandardAnalyzer(Version.LUCENE_35));
Query q = parser.parse(query);
TopDocs tds = searcher.search(q, 100);//TopDocs封裝搜索結(jié)果以及 ScoreDoc 的總數(shù)
ScoreDoc[] sds = tds.scoreDocs;//ScoreDoc提供對(duì)TopDocs中每條搜索結(jié)果的訪問接口
int last = (pageIndex - 1) * pageSize - 1;
tds = searcher.searchAfter(sds[last], q, 10);//表示從第十條索引開始
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println("id號(hào):" + sd.doc + "瑰艘,路徑:" + doc.get("path") + ",名稱:" + doc.get("filename"));
}
searcher.close();
} catch (org.apache.lucene.queryParser.ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
說明:這里
int last = (pageIndex - 1) * pageSize - 1;
tds = searcher.searchAfter(sds[last], q, 10);//表示從第十條索引開始
表示我們從前一頁的最后一條索引的后一條索引開始顯示肤舞。測(cè)試方法這里省略了紫新,和之前的類似。但是我們看到我們給出的查詢條數(shù)依然是100李剖,也就是說我們依然是查詢出了100條數(shù)據(jù)(如果有100條數(shù)據(jù)的話)芒率,這樣其實(shí)和第一種分頁查詢?cè)谛噬舷嗖畈淮蟆O旅嫖覀冞M(jìn)行改進(jìn):
private ScoreDoc getLaScoreDoc(int pageIndex, int pageSize, Query query, IndexSearcher searcher) throws IOException{
if(pageIndex == 1){
return null;
}
//這里我們不能像mysql中那樣每次都只取pageSize條數(shù)據(jù)篙顺,這里第一次取pageSize條偶芍,第二次取2*pageSize條,只能這樣
int num = pageSize * (pageIndex - 1);
TopDocs tds = searcher.search(query, num);
return tds.scoreDocs[num - 1];
}
public void searchPageByAfter01(String query, int pageIndex, int pageSize){
try {
Directory dir = FileIndexUtil.getDirectory();
IndexSearcher searcher = getSearcher(dir);
QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
new StandardAnalyzer(Version.LUCENE_35));
Query q = parser.parse(query);
ScoreDoc lastSd = getLaScoreDoc(pageIndex, pageSize, q, searcher);
TopDocs tds = searcher.searchAfter(lastSd, q, pageSize);
ScoreDoc[] sds = tds.scoreDocs;//ScoreDoc提供對(duì)TopDocs中每條搜索結(jié)果的訪問接口
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println("id號(hào):" + sd.doc + "慰安,路徑:" + doc.get("path") + "腋寨,名稱:" + doc.get("filename"));
}
searcher.close();
} catch (org.apache.lucene.queryParser.ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
說明:這里我們先通過每頁顯示的條數(shù)和要顯示的頁碼將顯示的起始位置的索引和需要顯示的總條數(shù)計(jì)算出來(在方法getLaScoreDoc
中)聪铺。這里注意化焕,如果我們顯示條數(shù)為10,而頁碼為2铃剔,那么要顯示的數(shù)據(jù)第一條就是10撒桨,而這里的num
為19查刻。因?yàn)槲覀儾荒芟霐?shù)據(jù)庫那樣只查詢出給定的條數(shù),必須將給定的所有條數(shù)都查詢出來凤类,這樣或多或少可以提高一定的效率穗泵。當(dāng)然此方法在3.5版本之后才提供。