這里我們拷貝之前的工程(lucene_index01
)為工程lucene_index02
损俭。然后在創(chuàng)建索引和搜索方法上進(jìn)行改進(jìn)測(cè)試。
相關(guān)代碼:
IndexUtil.java
package cn.lucene.index;
import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.StaleReaderException;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class IndexUtil {
private String[] ids = {"1", "2", "3", "4", "5", "6"};
//下面是郵件
private String[] emails = {"aa@qq.com", "bb@sina.edu", "cc@yahu.org", "ss@sina.com", "dd@gmail.com", "ee@163.com"};
//下面是郵件內(nèi)容
private String[] content = {
"welcom to visited the space,I like football",
"hello boy, i like someone",
"come on baby",
"first blood",
"I like football,I like football",
"my girlfriend is so beatiful, every body like game"
};
private int[] attaches = {2,5,6,5,8,4};//附件數(shù)量
//發(fā)件人名字
private String[] names = {"Tom", "Jack", "goudan", "alibaba", "jerry", "kitty"};
//郵件的日期
private Date[] dates = null;
private Directory directory = null;
private Map<String, Float> scores = new HashMap<String, Float>();//新建一個(gè)Map潘酗,用來存儲(chǔ)權(quán)值
public IndexUtil() {
try {
setDates();//設(shè)置日期
scores.put("qq.com", 2.0f);//如果是"qq.com"結(jié)尾的索引則讓其權(quán)值為2.0杆兵,注意:默認(rèn)是1.0
scores.put("sina.edu", 1.5f);
directory = FSDirectory.open(new File("E:/myeclipse/Lucene/index"));
} catch (IOException e) {
e.printStackTrace();
}
}
//創(chuàng)建索引
public void index(){
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
//此方法可將索引全部清空
writer.deleteAll();
Document document = null;
for(int i = 0; i < ids.length; i++){
document = new Document();
//id需要存儲(chǔ),不需要加權(quán)仔夺、分詞琐脏,email也需要存儲(chǔ),但不需要分詞囚灼,有時(shí)候也需要加權(quán)
//對(duì)于內(nèi)容骆膝,我們不需要存儲(chǔ)和加權(quán)祭衩,但需要分詞。而名字需要存儲(chǔ)阅签,不需要分詞和加權(quán)
//這里我們先不對(duì)整型數(shù)據(jù)進(jìn)行索引掐暮,后面再說
document.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
document.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
document.add(new Field("content", content[i], Field.Store.NO, Field.Index.ANALYZED));
document.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
//為數(shù)字添加索引,第三個(gè)參數(shù)設(shè)置為true表示默認(rèn)索引
document.add(new NumericField("attach", Field.Store.YES, true).setIntValue(attaches[i]));
//為日期添加索引
document.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime()));
String et = emails[i].substring(emails[i].lastIndexOf("@") + 1);
System.out.println(et);
//加入權(quán)值
if(scores.containsKey(et)){
document.setBoost(scores.get(et));
}else{
document.setBoost(0.5f);
}
writer.addDocument(document);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
if(writer != null){
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
//設(shè)置日期
private void setDates(){
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
try {
dates = new Date[ids.length];
dates[0] = sdf.parse("2015-02-15");
dates[1] = sdf.parse("2015-03-01");
dates[2] = sdf.parse("2015-05-18");
dates[3] = sdf.parse("2015-09-05");
dates[4] = sdf.parse("2015-12-15");
dates[5] = sdf.parse("2015-08-29");
} catch (ParseException e) {
e.printStackTrace();
}
}
public void search(){
IndexReader reader;
try {
reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
TermQuery query = new TermQuery(new Term("content", "like"));//搜索內(nèi)容中含有l(wèi)ike的
TopDocs tds = searcher.search(query, 10);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
//這里我們獲取權(quán)值getBoost()的時(shí)候發(fā)現(xiàn)都是1.0政钟,這是因?yàn)檫@里是獲取的一個(gè)document路克,和原來的沒有關(guān)系。
//要想看其權(quán)值信息养交,可以使用luke工具
//而這里的日期需要我們轉(zhuǎn)換成日期格式
System.out.println("(" + sd.doc + "權(quán)值:"+ doc.getBoost() + ")" + doc.get("name") + "[" + doc.get("email") + "]-->"
+ doc.get("id") + "-->" + doc.get("attach") + "-->" + doc.get("date"));
reader.close();
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
說明:
- 這里我們首先加入日期精算,同時(shí)使用
//為數(shù)字添加索引,第三個(gè)參數(shù)設(shè)置為true表示默認(rèn)索引
document.add(new NumericField("attach", Field.Store.YES, true).setIntValue(attaches[i]));
//為日期添加索引
document.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime()));
為數(shù)字和日期加入索引碎连。
- 這里要注意的是我們要想查看索引的權(quán)值不能使用
getBoost
方法灰羽,不然查出來的權(quán)值都是1.0,可以使用luke
工具查看鱼辙。