lucene 3.4 contrib/facet 切面搜索

来源:互联网

solr 有facet search ,BOBO也有；现在lucene3.4之后也有了，这个是贡献版本，在apache 官方的包里面有提供，这种功能对于分组统计和类别统计是一个很好的帮手；

有了这个就不用羡慕solr了，不是我抗拒solr，只是像我们公司有时间让我们开发的情况下，我更偏向于底层点的api开发，lucene更得心应手。

再说现在的solr没有近实时搜索，听说要4.0后有。

废话不说，直接上代码

public class Indexer {
//需要索引的信息
public static String[] docTitles = {
"white car",
"white dog",
};
public static String[] docTexts = {
"the white car is the one I want.",
"the white dog does not belong to anyone.",
};
//分的类别
public static CategoryPath[] categories = {
new CategoryPath("root","a","f1"), new CategoryPath("root","a","f2")
};
public static void index (Directory indexDir, Directory taxoDir) throws Exception {
// 创建一个普通的indexwriter
IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer));
//创建一个 taxonomy writer
TaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
int nDocsAdded = 0;
int nFacetsAdded = 0;
for (int docNum=0; docNum<docTexts.length; docNum++)
{
// 准备当前的切面
List<CategoryPath> facetList = SimpleUtils.categoryPathArrayToList(categories[0]);
//
CategoryDocumentBuilder categoryDocBuilder = new CategoryDocumentBuilder(taxo).setCategoryPaths(facetList);
// 创建document
Document doc = new Document();
doc.add(new Field(SimpleUtils.TITLE, docTitles[docNum], Store.YES, Index.ANALYZED));
doc.add(new Field(SimpleUtils.TEXT, docTexts[docNum], Store.NO, Index.ANALYZED));
// 把切面的索引信息添加到document
categoryDocBuilder.build(doc);
// 最终写入索引
iw.addDocument(doc);
nDocsAdded ++;
nFacetsAdded += facetList.size();
}
// commit changes.
// we commit changes to the taxonomy index prior to committing them to the search index.
// this is important, so that all facets referred to by documents in the search index
// will indeed exist in the taxonomy index.
taxo.commit();
iw.commit();
// close the taxonomy index and the index - all modifications are
// now safely in the provided directories: indexDir and taxoDir.
taxo.close();
iw.close();
System.out.println("Indexed "+nDocsAdded+" documents with overall "+nFacetsAdded+" facets.");
}
public static void main(String[] args){
String indexp = "D:/work/data/index/facet_index/n_index";
String indexp_t = "D:/work/data/index/facet_index/t_index";
try {
Directory directory = FSDirectory.open(new File(indexp));
Directory directory_t = FSDirectory.open(new File(indexp_t));
new Indexer().index(directory, directory_t);
} catch (Exception e) {
e.printStackTrace();
}
}
}

public class Searcher {
public void query(Directory indexDir, Directory taxoDir) throws CorruptIndexException, IOException
{
Query q = new TermQuery(new Term(SimpleUtils.TEXT, "white"));
IndexReader indexReader = IndexReader.open(indexDir, true);
IndexSearcher searcher = new IndexSearcher(indexReader);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(10, true);
FacetIndexingParams indexingParams = new DefaultFacetIndexingParams();
FacetSearchParams facetSearchParams = new FacetSearchParams(indexingParams);
facetSearchParams.addFacetRequest(new CountFacetRequest(new CategoryPath("root","a"), 10));
FacetsCollector facetsCollector = new FacetsCollector(facetSearchParams, indexReader, taxoReader);
// perform documents search and facets accumulation
searcher.search(q, MultiCollector.wrap(topDocsCollector, facetsCollector));
// Obtain facets results and print them
List<FacetResult> res = facetsCollector.getFacetResults();
System.out.println(res.size());
int i = 0;
for (FacetResult facetResult : res) {
System.out.println("Res " + (i++) + ": " + facetResult);
System.out.println("-------------------------------------");
System.out.println( facetResult.getFacetResultNode().getNumSubResults());
System.out.println( facetResult.getFacetResultNode().getOrdinal());
System.out.println( facetResult.getFacetResultNode().getValue());
System.out.println( facetResult.getFacetResultNode().getResidue());
}
}
public static void main(String[] args) {
String indexp = "D:/work/data/index/facet_index/n_index";
String indexp_t = "D:/work/data/index/facet_index/t_index";
try {
Directory directory = FSDirectory.open(new File(indexp));
Directory directory_t = FSDirectory.open(new File(indexp_t));
new Searcher().query(directory, directory_t);
} catch (Exception e) {
e.printStackTrace();
}
}
}