需求:每晚2點開始對所有數(shù)據(jù)建立索引罢猪,其它時間近她,每隔一定的時間更新索引。
經(jīng)過測試膳帕,5000條數(shù)據(jù)建立索引只需600ms粘捎,20000條數(shù)據(jù)約1000ms...幾十萬的數(shù)據(jù)也只需要幾秒。
若根據(jù)初步方案危彩,白天更新數(shù)據(jù)索引只更新新添加或者改動的數(shù)據(jù)攒磨,需要將數(shù)據(jù)庫查出的數(shù)據(jù)于IndexReader中的數(shù)據(jù)進行檢索剔除,此操作耗時較多汤徽。初步測試結(jié)果:5000條數(shù)據(jù)需要50s娩缰;20000條數(shù)據(jù)需要220s...
若有20w條數(shù)據(jù),則光剔除數(shù)據(jù)的時間就需要4h谒府,明顯行不通拼坎。
故還不如直接每次都重建所有索引。
不多說完疫,貼初步方案的代碼888:
package net.lucene.buildindex;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import com.mongodb.DBObject;
/**
* 創(chuàng)建數(shù)據(jù)索引
*
*[@author](http://weibo.com/n/author)liuyang
*[@version](http://weibo.com/n/version)2015.08.06
*/
public class BuildIndex {
private static String filePath = "C:/Users/365/Desktop/8月/Lucene/buildIndex";
private static String filePathAdd = "C:/Users/365/Desktop/8月/Lucene/buildIndex2";
/**
* 創(chuàng)建索引
*/
public void buildIndex() {
try {
// 如果文件夾不存在演痒,則需要首次創(chuàng)建索引
// 否則,只需增量索引
File file = new File(filePath);
if (!((file.exists()) && (file.listFiles().length > 1))) {
this.firstIndex();
} else {
this.updateIndex();
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 初始化索引庫indexWriter 一旦
* indexWriter創(chuàng)建完成趋惨,再改變IndexWriterConfig的配置鸟顺,對indexWriter將不產(chǎn)生影響
*
*[@param](http://weibo.com/n/param)OpenModeType
*[@param](http://weibo.com/n/param)fileURL
*[@return](http://weibo.com/n/return)
*/
private IndexWriter initLucene(OpenMode OpenModeType, String fileURL) {
try {
// 創(chuàng)建分詞器 analyzer
// 對原有句子按照空格進行了分詞 所有的大寫字母都可以能轉(zhuǎn)換為小寫的字母
// 可以去掉一些沒有用處的單詞,例如"is","the","are"等單詞器虾,也刪除了所有的標(biāo)點
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
// 創(chuàng)建IndexWriterConfig
// Windows系統(tǒng)用SimpleFSDirectory讯嫂,其它系統(tǒng)用NIOFSDirectory
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(
Version.LUCENE_46, analyzer);
indexWriterConfig.setOpenMode(OpenModeType);
// 創(chuàng)建目錄
Directory fileDir = new SimpleFSDirectory(new File(fileURL));
// 創(chuàng)建索引庫
IndexWriter indexWriter = new IndexWriter(fileDir,
indexWriterConfig);
return indexWriter;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
* 第一次創(chuàng)建索引
*/
private void firstIndex() {
IndexWriter indexWriter = null;
try {
// 獲取數(shù)據(jù)
Getdatas getdatas = new Getdatas();
List results = getdatas.getDatas();
// 若數(shù)據(jù)為空或者不存在,則返回兆沙;否則添加索引
if ((results.size() == 0) || null == results) {
return;
} else {
// 獲取索引庫
indexWriter = this.initLucene(OpenMode.CREATE_OR_APPEND,
filePath);
// 添加Fields
this.addFields(results, indexWriter);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
// 若第一次打開索引文件需要commint 否則會報no segment*
try {
if (null != indexWriter) {
indexWriter.commit();
indexWriter.close();
}
} catch (Exception e2) {
e2.printStackTrace();
}
}
}
/**
* 更新索引 lucene本身不支持更新
*
* 通過刪除索引然后再建立索引來更新
*/
private void updateIndex() {
IndexReader indexReader = null;
IndexWriter indexWriterAdd = null;
try {
// 獲取數(shù)據(jù)
Getdatas getdatas = new Getdatas();
List results = getdatas.getDatas();
File fileAdd = new File(filePathAdd);
// 如果文件夾不存在欧芽,創(chuàng)建
if (!fileAdd.exists()) {
fileAdd.mkdir();
}
// 創(chuàng)建IndexReader
File file = new File(filePath);
Directory dir = FSDirectory.open(file);
indexReader = DirectoryReader.open(dir);
long startTime = System.currentTimeMillis();
// 檢索最新添加的數(shù)據(jù)是否索引
List updateDatas = new ArrayList();
for (DBObject updateData : results) {
// 是否在索引庫 標(biāo)識符
boolean flag = this.isInIndex(updateData.get("_id").toString(),
indexReader);
if (flag) {
// 將不在索引庫的數(shù)據(jù),添加到updateDatas中
updateDatas.add(updateData);
}
}
long endTime = System.currentTimeMillis();
System.out.println("剔除數(shù)據(jù)耗時:" + (endTime - startTime) + "ms");
// 添加索引
if ((updateDatas.size() == 0) || (null == updateDatas)) {
return;
} else {
// indexWriter.deleteAll();
if (!((file.exists()) && (file.listFiles().length > 3))) {
return;
} else if (!((fileAdd.exists()) && (fileAdd.listFiles().length > 3))) {
indexWriterAdd = this.initLucene(OpenMode.CREATE_OR_APPEND,
filePathAdd);
this.addFields(updateDatas, indexWriterAdd);
} else {
indexWriterAdd = this.initLucene(OpenMode.CREATE,
filePathAdd);
this.addFields(updateDatas, indexWriterAdd);
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (null != indexWriterAdd) {
indexWriterAdd.commit();
// indexWriterAdd.close();
}
if (null != indexReader) {
indexReader.close();
}
} catch (Exception e2) {
e2.printStackTrace();
}
}
}
/**
* 添加Fields
*
*[@param](http://weibo.com/n/param)results
*[@param](http://weibo.com/n/param)indexWriter
*/
private void addFields(List results, IndexWriter indexWriter) {
try {
long startTime = System.currentTimeMillis();
for (int i = 0; i < results.size(); i++) {
// 創(chuàng)建Document
Document doc = new Document();
// 創(chuàng)建Field
Field idField = new StringField("_id", results.get(i).toMap()
.get("_id").toString(), Store.YES);
Field countdateField = new StringField("countdate", results
.get(i).toMap().get("countdate").toString(), Store.YES);
Field averpriceField = new LongField("averprice",
(Long) results.get(i).toMap().get("averprice"),
Store.YES);
Field countField = new IntField("count", (Integer) results
.get(i).toMap().get("count"), Store.YES);
Field appField = new StringField("app", results.get(i).toMap()
.get("app").toString(), Store.YES);
// 添加索引
doc.add(idField);
doc.add(countdateField);
doc.add(averpriceField);
doc.add(countField);
doc.add(appField);
// 將索引添加到實時中去
indexWriter.addDocument(doc);
}
long endTime = System.currentTimeMillis();
System.out.println("創(chuàng)建索引耗時:" + (endTime - startTime) + "ms");
} catch (Exception e) {
e.printStackTrace();
} finally {
// 若第一次打開索引文件需要commint 否則會報no segment*
try {
if (null != indexWriter) {
indexWriter.commit();
// indexWriter.close();
}
} catch (Exception e2) {
e2.printStackTrace();
}
}
}
/**
* 檢查數(shù)據(jù)是否在索引庫中
*
*[@param](http://weibo.com/n/param)id
*[@param](http://weibo.com/n/param)p_indexReader
*[@return](http://weibo.com/n/return)
*/
private boolean isInIndex(String id, IndexReader p_indexReader) {
boolean flag = true;
try {
for (int i = 0; i < p_indexReader.numDocs(); i++) {
Document doc = p_indexReader.document(i);
if (id.equals(doc.get("_id"))) {
flag = false;
return flag;
}
}
} catch (Exception e) {
e.printStackTrace();
}
return flag;
}
/**
* 查詢
*/
public void search(String path) {
IndexReader indexReader2 = null;
try {
File file = new File(path);
if (!((file.exists()) && (file.listFiles().length < 2))) {
Directory dir = FSDirectory.open(new File(path));
indexReader2 = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(indexReader2);
TermQuery query = new TermQuery(new Term("app", "test1"));
TopDocs hits = searcher.search(query, 10000);
System.out
.println("total " + indexReader2.maxDoc() + " datas!");
// for (ScoreDoc scoreDoc : hits.scoreDocs) {
// Document doc = searcher.doc(scoreDoc.doc);
// System.out.println("_id:" + doc.get("_id") +
// "----countdate:"
// + doc.get("countdate") + "----averprice:"
// + doc.get("averprice") + "----count:"
// + doc.get("count") + "----app:" + doc.get("app"));
// }
System.out.println("find " + hits.scoreDocs.length
+ " results!");
} else {
return;
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (null != indexReader2) {
indexReader2.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}