added a simple query language (with a JavaCC-compiled parser) and the
appropriate query methods in IndexService
This commit is contained in:
parent
60f24d8412
commit
7da8104954
2
INSTALL
2
INSTALL
|
@ -24,12 +24,14 @@ Venice.
|
||||||
PACKAGES REQUIRED FOR VENICE
|
PACKAGES REQUIRED FOR VENICE
|
||||||
----------------------------
|
----------------------------
|
||||||
The following packages must be referenced from within build.properties:
|
The following packages must be referenced from within build.properties:
|
||||||
|
- JavaCC 3.0 (parser generator tool)
|
||||||
- Java Servlet API 2.3 (use the servlet.jar file from Tomcat)
|
- Java Servlet API 2.3 (use the servlet.jar file from Tomcat)
|
||||||
- Jakarta Bean Scripting Framework 2.3
|
- Jakarta Bean Scripting Framework 2.3
|
||||||
- Jakarta Commons Collections Library, 2.1
|
- Jakarta Commons Collections Library, 2.1
|
||||||
- Jakarta Commons Lang Library, 1.0.1
|
- Jakarta Commons Lang Library, 1.0.1
|
||||||
- Mozilla.org Rhino, 1.5R3
|
- Mozilla.org Rhino, 1.5R3
|
||||||
- Jakarta Log4J, 1.2.7
|
- Jakarta Log4J, 1.2.7
|
||||||
|
- Jakarta Lucene, 1.3RC1
|
||||||
- Jakarta Velocity, 1.3.1
|
- Jakarta Velocity, 1.3.1
|
||||||
|
|
||||||
Optionally:
|
Optionally:
|
||||||
|
|
|
@ -24,6 +24,11 @@
|
||||||
# [Logging directory]
|
# [Logging directory]
|
||||||
# logfile.dir=${user.home}
|
# logfile.dir=${user.home}
|
||||||
|
|
||||||
|
# [Location of JavaCC 3.0]
|
||||||
|
javacc.base=/usr/local/java/javacc-3.0
|
||||||
|
# javacc.lib=${javacc.base}/bin/lib
|
||||||
|
# javacc.jarfile=javacc.jar
|
||||||
|
|
||||||
# [Location of Servlet API 2.3]
|
# [Location of Servlet API 2.3]
|
||||||
servlet.base=/usr/local/jakarta/jakarta-tomcat-4.1.24-LE-jdk14
|
servlet.base=/usr/local/jakarta/jakarta-tomcat-4.1.24-LE-jdk14
|
||||||
servlet.lib=${servlet.base}/common/lib
|
servlet.lib=${servlet.base}/common/lib
|
||||||
|
|
22
build.xml
22
build.xml
|
@ -37,6 +37,11 @@
|
||||||
<!-- [Logging directory] -->
|
<!-- [Logging directory] -->
|
||||||
<property name="logfile.dir" value="${user.home}"/>
|
<property name="logfile.dir" value="${user.home}"/>
|
||||||
|
|
||||||
|
<!-- [Location of JavaCC 3.0] -->
|
||||||
|
<property name="javacc.base" value="../javacc"/>
|
||||||
|
<property name="javacc.lib" value="${javacc.base}/bin/lib"/>
|
||||||
|
<property name="javacc.jarfile" value="javacc.jar"/>
|
||||||
|
|
||||||
<!-- [Location of Servlet API 2.3] -->
|
<!-- [Location of Servlet API 2.3] -->
|
||||||
<property name="servlet.base" value="../servletapi"/>
|
<property name="servlet.base" value="../servletapi"/>
|
||||||
<property name="servlet.lib" value="${servlet.base}/lib"/>
|
<property name="servlet.lib" value="${servlet.base}/lib"/>
|
||||||
|
@ -140,9 +145,22 @@
|
||||||
"build-dynamo" - Builds the Dynamo framework classes (com.silverwrist.dynamo.*).
|
"build-dynamo" - Builds the Dynamo framework classes (com.silverwrist.dynamo.*).
|
||||||
============================================================================ -->
|
============================================================================ -->
|
||||||
<target name="build-dynamo" depends="jar-baseutil">
|
<target name="build-dynamo" depends="jar-baseutil">
|
||||||
|
<mkdir dir="workingarea/src-dynamo-framework/com/silverwrist/dynamo/index"/>
|
||||||
|
<!-- This rule doesn't work with JavaCC 3.0
|
||||||
|
<javacc target="src/dynamo-framework/com/silverwrist/dynamo/index/Parser.jj"
|
||||||
|
javacchome="/usr/local/java/javacc/bin/lib"
|
||||||
|
outputdirectory="workingarea/src-dynamo-framework/com/silverwrist/dynamo/index"/>
|
||||||
|
-->
|
||||||
|
<java fork="true" classname="javacc">
|
||||||
|
<arg value="-output_directory:workingarea/src-dynamo-framework/com/silverwrist/dynamo/index"/>
|
||||||
|
<arg value="src/dynamo-framework/com/silverwrist/dynamo/index/Parser.jj"/>
|
||||||
|
<classpath>
|
||||||
|
<pathelement location="${javacc.lib}/${javacc.jarfile}"/>
|
||||||
|
</classpath>
|
||||||
|
</java>
|
||||||
<mkdir dir="workingarea/dynamo-framework"/>
|
<mkdir dir="workingarea/dynamo-framework"/>
|
||||||
<javac srcdir="src/dynamo-framework" destdir="workingarea/dynamo-framework" source="1.4"
|
<javac srcdir="src/dynamo-framework:workingarea/src-dynamo-framework" destdir="workingarea/dynamo-framework"
|
||||||
debug="${compile.debug}" optimize="${compile.optimize}" deprecation="${compile.deprecation}">
|
source="1.4" debug="${compile.debug}" optimize="${compile.optimize}" deprecation="off">
|
||||||
<classpath>
|
<classpath>
|
||||||
<filelist dir="jars" files="baseutil.jar"/>
|
<filelist dir="jars" files="baseutil.jar"/>
|
||||||
<path refid="base.build.path"/>
|
<path refid="base.build.path"/>
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
*/
|
*/
|
||||||
package com.silverwrist.dynamo.iface;
|
package com.silverwrist.dynamo.iface;
|
||||||
|
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
import com.silverwrist.dynamo.except.IndexException;
|
import com.silverwrist.dynamo.except.IndexException;
|
||||||
|
|
||||||
public interface IndexService
|
public interface IndexService
|
||||||
|
@ -26,4 +28,10 @@ public interface IndexService
|
||||||
|
|
||||||
public boolean deleteItem(String item_namespace, String item_name, Object item) throws IndexException;
|
public boolean deleteItem(String item_namespace, String item_name, Object item) throws IndexException;
|
||||||
|
|
||||||
|
public List query(String query_string, java.util.Date date_low, java.util.Date date_high, DynamoUser match_owner,
|
||||||
|
String match_scope, int offset, int count) throws IndexException;
|
||||||
|
|
||||||
|
public int queryCount(String query_string, java.util.Date date_low, java.util.Date date_high, DynamoUser match_owner,
|
||||||
|
String match_scope) throws IndexException;
|
||||||
|
|
||||||
} // end interface IndexService
|
} // end interface IndexService
|
||||||
|
|
|
@ -22,3 +22,5 @@ analyzer.noCreate=Unable to create an instance of the analyzer class {0}.
|
||||||
analyzer.badType=The specified analyzer class {0} is of the wrong type.
|
analyzer.badType=The specified analyzer class {0} is of the wrong type.
|
||||||
addItem.fail=Unable to add a new item (namespace {0}, name {1}) to index {2}.
|
addItem.fail=Unable to add a new item (namespace {0}, name {1}) to index {2}.
|
||||||
deleteItem.fail=Unable to remove an item (namespace {0}, name {1}) from index {2}.
|
deleteItem.fail=Unable to remove an item (namespace {0}, name {1}) from index {2}.
|
||||||
|
query.syntax=Parse error in query string: {0}
|
||||||
|
query.fail=Unable to execute search query.
|
||||||
|
|
|
@ -19,16 +19,156 @@ package com.silverwrist.dynamo.index;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.lang.ref.*;
|
import java.lang.ref.*;
|
||||||
|
import java.util.*;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.document.*;
|
import org.apache.lucene.document.*;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import com.silverwrist.util.*;
|
||||||
import com.silverwrist.dynamo.except.*;
|
import com.silverwrist.dynamo.except.*;
|
||||||
import com.silverwrist.dynamo.iface.*;
|
import com.silverwrist.dynamo.iface.*;
|
||||||
import com.silverwrist.dynamo.util.*;
|
import com.silverwrist.dynamo.util.*;
|
||||||
|
|
||||||
class IndexServiceImpl implements IndexService
|
class IndexServiceImpl implements IndexService
|
||||||
{
|
{
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Internal counting HitCollector
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
private static class CountingCollector extends HitCollector
|
||||||
|
{
|
||||||
|
/*====================================================================
|
||||||
|
* Attributes
|
||||||
|
*====================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
private int m_count = 0;
|
||||||
|
|
||||||
|
/*====================================================================
|
||||||
|
* Constructor
|
||||||
|
*====================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
CountingCollector()
|
||||||
|
{ // do nothing
|
||||||
|
} // end constructor
|
||||||
|
|
||||||
|
/*====================================================================
|
||||||
|
* Abstract implementations from class HitCollector
|
||||||
|
*====================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
public void collect(int doc, float score)
|
||||||
|
{
|
||||||
|
m_count++;
|
||||||
|
|
||||||
|
} // end collect
|
||||||
|
|
||||||
|
/*====================================================================
|
||||||
|
* External operations
|
||||||
|
*====================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
int getCount()
|
||||||
|
{
|
||||||
|
return m_count;
|
||||||
|
|
||||||
|
} // end getCount
|
||||||
|
|
||||||
|
} // end class CountingCollector
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Internal HitCollector that gathers a request subset
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
private class SubsetCollector extends HitCollector
|
||||||
|
{
|
||||||
|
/*====================================================================
|
||||||
|
* Attributes
|
||||||
|
*====================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
private int[] m_docs;
|
||||||
|
private float[] m_scores;
|
||||||
|
private int m_offset;
|
||||||
|
private int m_size = 0;
|
||||||
|
|
||||||
|
/*====================================================================
|
||||||
|
* Constructor
|
||||||
|
*====================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
SubsetCollector(int offset, int count)
|
||||||
|
{
|
||||||
|
m_docs = new int[count];
|
||||||
|
m_scores = new float[count];
|
||||||
|
m_offset = offset;
|
||||||
|
|
||||||
|
} // end constructor
|
||||||
|
|
||||||
|
/*====================================================================
|
||||||
|
* Abstract implementations from class HitCollector
|
||||||
|
*====================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
public void collect(int doc, float score)
|
||||||
|
{
|
||||||
|
if (m_offset>0)
|
||||||
|
{ // skip documents at beginning of list
|
||||||
|
m_offset--;
|
||||||
|
return;
|
||||||
|
|
||||||
|
} // end if
|
||||||
|
|
||||||
|
if (m_size<m_docs.length)
|
||||||
|
{ // add document index and score to the list
|
||||||
|
m_docs[m_size] = doc;
|
||||||
|
m_scores[m_size++] = score;
|
||||||
|
|
||||||
|
} // end if
|
||||||
|
|
||||||
|
} // end collect
|
||||||
|
|
||||||
|
/*====================================================================
|
||||||
|
* External operations
|
||||||
|
*====================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
public List outputItems(IndexReader irdr) throws IOException, IndexException
|
||||||
|
{
|
||||||
|
if (m_size==0)
|
||||||
|
return Collections.EMPTY_LIST;
|
||||||
|
ArrayList rc = new ArrayList(m_size);
|
||||||
|
for (int i=0; i<m_size; i++)
|
||||||
|
{ // get the document and retrieve its "id" field, then use that to get the object
|
||||||
|
Document doc = irdr.document(m_docs[i]);
|
||||||
|
Field id_field = doc.getField("id");
|
||||||
|
String fulltag = id_field.stringValue();
|
||||||
|
if (fulltag==null)
|
||||||
|
fulltag = IOUtils.load(id_field.readerValue()).toString();
|
||||||
|
String[] elts = StringUtils.split1(fulltag,'|',3);
|
||||||
|
Object value = m_base.resolveObjectReference(elts[0],elts[1],elts[2]);
|
||||||
|
rc.add(new ItemAndScore(value,m_scores[i]));
|
||||||
|
|
||||||
|
} // end for
|
||||||
|
|
||||||
|
return Collections.unmodifiableList(rc);
|
||||||
|
|
||||||
|
} // end outputItems
|
||||||
|
|
||||||
|
} // end class SubsetCollector
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Static data members
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
private static Logger logger = Logger.getLogger(IndexServiceImpl.class);
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
* Attributes
|
* Attributes
|
||||||
*--------------------------------------------------------------------------------
|
*--------------------------------------------------------------------------------
|
||||||
|
@ -103,6 +243,76 @@ class IndexServiceImpl implements IndexService
|
||||||
|
|
||||||
} // end createTag
|
} // end createTag
|
||||||
|
|
||||||
|
private final Query compileQuery(String query_string, java.util.Date date_low, java.util.Date date_high,
|
||||||
|
DynamoUser match_owner, String match_scope) throws IndexException
|
||||||
|
{
|
||||||
|
ArrayList queries = new ArrayList();
|
||||||
|
if (query_string!=null)
|
||||||
|
{ // we have a query language string...
|
||||||
|
try
|
||||||
|
{ // parse the query string, which matches on the "text" field only
|
||||||
|
queries.add(Parser.parse(query_string));
|
||||||
|
|
||||||
|
} // end try
|
||||||
|
catch (ParseException pe)
|
||||||
|
{ // parse error in the query
|
||||||
|
IndexException ie = new IndexException(IndexServiceImpl.class,"IndexMessages","query.syntax",pe);
|
||||||
|
ie.setParameter(0,pe.getMessage());
|
||||||
|
throw ie;
|
||||||
|
|
||||||
|
} // end catch
|
||||||
|
|
||||||
|
} // end if
|
||||||
|
|
||||||
|
if ((date_low!=null) || (date_high!=null))
|
||||||
|
{ // add an inclusive range of dates
|
||||||
|
Term term_low = null, term_high = null;
|
||||||
|
if (date_low!=null)
|
||||||
|
term_low = new Term("date",DateField.dateToString(date_low));
|
||||||
|
if (date_high!=null)
|
||||||
|
term_high = new Term("date",DateField.dateToString(date_high));
|
||||||
|
queries.add(new RangeQuery(term_low,term_high,true));
|
||||||
|
|
||||||
|
} // end if
|
||||||
|
|
||||||
|
if (match_owner!=null)
|
||||||
|
queries.add(new TermQuery(new Term("owner",match_owner.getName())));
|
||||||
|
if (match_scope!=null)
|
||||||
|
{ // treat "scope" as a possible wildcard match and create it
|
||||||
|
if (match_scope.indexOf('?')>=0)
|
||||||
|
queries.add(new WildcardQuery(new Term("scope",match_scope)));
|
||||||
|
else if (match_scope.indexOf('*')>=0)
|
||||||
|
{ // append another query
|
||||||
|
String s = match_scope.substring(0,match_scope.length()-1);
|
||||||
|
if (s.indexOf('*')<0)
|
||||||
|
queries.add(new PrefixQuery(new Term("scope",s)));
|
||||||
|
else
|
||||||
|
queries.add(new WildcardQuery(new Term("scope",match_scope)));
|
||||||
|
|
||||||
|
} // end else if
|
||||||
|
else // match the scope directly
|
||||||
|
queries.add(new TermQuery(new Term("scope",match_scope)));
|
||||||
|
|
||||||
|
} // end if
|
||||||
|
|
||||||
|
// Boil down all the queries for me.
|
||||||
|
if (queries.size()==0)
|
||||||
|
return null;
|
||||||
|
if (queries.size()==1)
|
||||||
|
return (Query)(queries.get(0));
|
||||||
|
BooleanQuery rc = new BooleanQuery();
|
||||||
|
for (int i=0; i<queries.size(); i++)
|
||||||
|
rc.add((Query)(queries.get(i)),true,false);
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
} // end compileQuery
|
||||||
|
|
||||||
|
private final void doQuery(String query_string, java.util.Date date_low, java.util.Date date_high,
|
||||||
|
DynamoUser match_owner, String match_scope, HitCollector output) throws IndexException
|
||||||
|
{
|
||||||
|
|
||||||
|
} // end doQuery
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
* Implementations from interface IndexService
|
* Implementations from interface IndexService
|
||||||
*--------------------------------------------------------------------------------
|
*--------------------------------------------------------------------------------
|
||||||
|
@ -163,4 +373,85 @@ class IndexServiceImpl implements IndexService
|
||||||
|
|
||||||
} // end deleteItem
|
} // end deleteItem
|
||||||
|
|
||||||
|
public List query(String query_string, java.util.Date date_low, java.util.Date date_high, DynamoUser match_owner,
|
||||||
|
String match_scope, int offset, int count) throws IndexException
|
||||||
|
{
|
||||||
|
Query query = compileQuery(query_string,date_low,date_high,match_owner,match_scope);
|
||||||
|
SubsetCollector subc = new SubsetCollector(offset,count);
|
||||||
|
|
||||||
|
List rc = null;
|
||||||
|
IndexReader irdr = null;
|
||||||
|
IndexSearcher srch = null;
|
||||||
|
try
|
||||||
|
{ // run that puppy!
|
||||||
|
irdr = IndexReader.open(m_directory);
|
||||||
|
srch = new IndexSearcher(irdr);
|
||||||
|
srch.search(query,subc);
|
||||||
|
rc = subc.outputItems(irdr);
|
||||||
|
|
||||||
|
} // end try
|
||||||
|
catch (IOException e)
|
||||||
|
{ // the query failed somehow - throw an error
|
||||||
|
throw new IndexException(IndexServiceImpl.class,"IndexMessages","query.fail",e);
|
||||||
|
|
||||||
|
} // end catch
|
||||||
|
finally
|
||||||
|
{ // make sure we close down OK
|
||||||
|
try
|
||||||
|
{ // close the search and index reader
|
||||||
|
if (srch!=null)
|
||||||
|
srch.close();
|
||||||
|
if (irdr!=null)
|
||||||
|
irdr.close();
|
||||||
|
|
||||||
|
} // end try
|
||||||
|
catch (IOException e)
|
||||||
|
{ // shouldn't happen
|
||||||
|
logger.warn("query(): error closing stuff",e);
|
||||||
|
|
||||||
|
} // end catch
|
||||||
|
|
||||||
|
} // end finally
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
} // end query
|
||||||
|
|
||||||
|
public int queryCount(String query_string, java.util.Date date_low, java.util.Date date_high, DynamoUser match_owner,
|
||||||
|
String match_scope) throws IndexException
|
||||||
|
{
|
||||||
|
Query query = compileQuery(query_string,date_low,date_high,match_owner,match_scope);
|
||||||
|
CountingCollector cc = new CountingCollector();
|
||||||
|
IndexSearcher srch = null;
|
||||||
|
try
|
||||||
|
{ // run that puppy!
|
||||||
|
srch = new IndexSearcher(m_directory);
|
||||||
|
srch.search(query,cc);
|
||||||
|
|
||||||
|
} // end try
|
||||||
|
catch (IOException e)
|
||||||
|
{ // the query failed somehow - throw an error
|
||||||
|
throw new IndexException(IndexServiceImpl.class,"IndexMessages","query.fail",e);
|
||||||
|
|
||||||
|
} // end catch
|
||||||
|
finally
|
||||||
|
{ // make sure we close down OK
|
||||||
|
try
|
||||||
|
{ // close the search and index reader
|
||||||
|
if (srch!=null)
|
||||||
|
srch.close();
|
||||||
|
|
||||||
|
} // end try
|
||||||
|
catch (IOException e)
|
||||||
|
{ // shouldn't happen
|
||||||
|
logger.warn("queryCount(): error closing stuff",e);
|
||||||
|
|
||||||
|
} // end catch
|
||||||
|
|
||||||
|
} // end finally
|
||||||
|
|
||||||
|
return cc.getCount();
|
||||||
|
|
||||||
|
} // end queryCount
|
||||||
|
|
||||||
} // end class IndexServiceImpl
|
} // end class IndexServiceImpl
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
/*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||||
|
* (the "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at <http://www.mozilla.org/MPL/>.
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis, WITHOUT
|
||||||
|
* WARRANTY OF ANY KIND, either express or implied. See the License for the specific
|
||||||
|
* language governing rights and limitations under the License.
|
||||||
|
*
|
||||||
|
* The Original Code is the Venice Web Communities System.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
||||||
|
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||||
|
* Copyright (C) 2003 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*/
|
||||||
|
package com.silverwrist.dynamo.index;
|
||||||
|
|
||||||
|
public final class ItemAndScore
|
||||||
|
{
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Attributes
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
private Object m_item;
|
||||||
|
private float m_score;
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Constructor
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
public ItemAndScore(Object item, float score)
|
||||||
|
{
|
||||||
|
m_item = item;
|
||||||
|
m_score = score;
|
||||||
|
|
||||||
|
} // end constructor
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* External getters
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
public Object getItem()
|
||||||
|
{
|
||||||
|
return m_item;
|
||||||
|
|
||||||
|
} // end getItem
|
||||||
|
|
||||||
|
public float getScore()
|
||||||
|
{
|
||||||
|
return m_score;
|
||||||
|
|
||||||
|
} // end getScore
|
||||||
|
|
||||||
|
} // end class ItemAndScore
|
350
src/dynamo-framework/com/silverwrist/dynamo/index/Parser.jj
Normal file
350
src/dynamo-framework/com/silverwrist/dynamo/index/Parser.jj
Normal file
|
@ -0,0 +1,350 @@
|
||||||
|
/*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||||
|
* (the "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at <http://www.mozilla.org/MPL/>.
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis, WITHOUT
|
||||||
|
* WARRANTY OF ANY KIND, either express or implied. See the License for the specific
|
||||||
|
* language governing rights and limitations under the License.
|
||||||
|
*
|
||||||
|
* The Original Code is the Venice Web Communities System.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
||||||
|
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||||
|
* Copyright (C) 2003 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*/
|
||||||
|
|
||||||
|
options
|
||||||
|
{
|
||||||
|
STATIC = false;
|
||||||
|
JAVA_UNICODE_ESCAPE = true;
|
||||||
|
USER_CHAR_STREAM = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
PARSER_BEGIN(Parser)
|
||||||
|
|
||||||
|
package com.silverwrist.dynamo.index;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.analysis.*;
|
||||||
|
import org.apache.lucene.document.*;
|
||||||
|
import org.apache.lucene.search.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* N.B.: A lot of this is based on the Lucene QueryParser code, but streamlined to fit the needs of the Dynamo
|
||||||
|
* indexing system.
|
||||||
|
*/
|
||||||
|
class Parser
|
||||||
|
{
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Attributes
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
private Analyzer m_analyzer;
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Internal operations
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
private static final Query createWildcardQuery(String data)
|
||||||
|
{
|
||||||
|
Term t = new Term("text",data.toLowerCase());
|
||||||
|
return new WildcardQuery(t);
|
||||||
|
|
||||||
|
} // end createWildcardQuery
|
||||||
|
|
||||||
|
private static final Query createPrefixQuery(String data)
|
||||||
|
{
|
||||||
|
Term t = new Term("text",data.toLowerCase());
|
||||||
|
return new PrefixQuery(t);
|
||||||
|
|
||||||
|
} // end createPrefixQuery
|
||||||
|
|
||||||
|
private static final Query createFuzzyQuery(String data)
|
||||||
|
{
|
||||||
|
Term t = new Term("text",data);
|
||||||
|
return new FuzzyQuery(t);
|
||||||
|
|
||||||
|
} // end createFuzzyQuery
|
||||||
|
|
||||||
|
private final Query createNormalQuery(String data)
|
||||||
|
{
|
||||||
|
TokenStream tstm = m_analyzer.tokenStream("text",new StringReader(data));
|
||||||
|
ArrayList toks = new ArrayList();
|
||||||
|
org.apache.lucene.analysis.Token t = null;
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{ // use the Lucene TokenStream to find all the tokens and eliminate stopwords
|
||||||
|
try
|
||||||
|
{ // get the next token from the input
|
||||||
|
t = tstm.next();
|
||||||
|
|
||||||
|
} // end try
|
||||||
|
catch (IOException e)
|
||||||
|
{ // whoops!
|
||||||
|
t = null;
|
||||||
|
|
||||||
|
} // end catch
|
||||||
|
|
||||||
|
if (t==null)
|
||||||
|
break; // done scanning the string
|
||||||
|
|
||||||
|
toks.add(t.termText());
|
||||||
|
|
||||||
|
} // end for (ever)
|
||||||
|
|
||||||
|
if (toks.size()==0)
|
||||||
|
return null; // no query
|
||||||
|
if (toks.size()==1) // single term query
|
||||||
|
return new TermQuery(new Term("text",(String)(toks.get(0))));
|
||||||
|
|
||||||
|
// Build a PhraseQuery and return that.
|
||||||
|
PhraseQuery rc = new PhraseQuery();
|
||||||
|
rc.setSlop(0);
|
||||||
|
for (int i=0; i<toks.size(); i++)
|
||||||
|
rc.add(new Term("text",(String)(toks.get(i))));
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
} // end createNormalQuery
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* External static operations
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
public static Query parse(String s) throws ParseException
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{ // quickie parse
|
||||||
|
Parser p = new Parser(new StaticCharStream(s));
|
||||||
|
return p.search();
|
||||||
|
|
||||||
|
} // end try
|
||||||
|
catch (TokenMgrError tme)
|
||||||
|
{ // translate into a ParseException
|
||||||
|
throw new ParseException(tme.getMessage());
|
||||||
|
|
||||||
|
} // end catch
|
||||||
|
|
||||||
|
} // end parse
|
||||||
|
|
||||||
|
} // end class Parser
|
||||||
|
|
||||||
|
PARSER_END(Parser)
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Token (lexer) definitions
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
<*> TOKEN:
|
||||||
|
{
|
||||||
|
<#_DIGIT: ["0"-"9"]>
|
||||||
|
| <#_WHITESPACE: [" ", "\t"]>
|
||||||
|
| <#_RESERVED: ["+", "-", "(", ")", "~", "^", "\"", "*", "?"]>
|
||||||
|
| <#_ESCAPED: "\\" ( <_RESERVED> | "\\" )>
|
||||||
|
| <#_VALID: ~["+", "-", "(", ")", "~", "^", "\"", "*", "?", " ", "\t"]>
|
||||||
|
| <#_TERMCHAR: <_VALID> | <_ESCAPED>>
|
||||||
|
|
||||||
|
} // end token definitions
|
||||||
|
|
||||||
|
<DEFAULT> SKIP:
|
||||||
|
{
|
||||||
|
<<_WHITESPACE>>
|
||||||
|
|
||||||
|
} // end skip definition
|
||||||
|
|
||||||
|
<DEFAULT> TOKEN:
|
||||||
|
{
|
||||||
|
<AND: ("AND" | "&&")> // AND query
|
||||||
|
| <OR: ("OR" | "||")> // OR query
|
||||||
|
| <PLUS: "+"> // plus sign
|
||||||
|
| <MINUS: "-"> // minus sign
|
||||||
|
| <LPAREN: "("> // left parenthesis
|
||||||
|
| <RPAREN: ")"> // right parenthesis
|
||||||
|
| <FUZZY: "~"> // "fuzzy" operator
|
||||||
|
| <WEIGHT: "^"> : Weight // "weighting" operator
|
||||||
|
| <QSTRING: "\"" (~["\""])+ "\""> // quoted string
|
||||||
|
| <TERM: <_TERMCHAR> (<_TERMCHAR>)*> // simple term
|
||||||
|
| <PREFIXTERM: <_TERMCHAR> (<_TERMCHAR>)* "*"> // prefix term
|
||||||
|
| <WILDTERM: <_TERMCHAR> (<_TERMCHAR> | "?" | "*")*> // term with wildcards
|
||||||
|
|
||||||
|
} // end default token definitions
|
||||||
|
|
||||||
|
<Weight> TOKEN:
|
||||||
|
{
|
||||||
|
<WEIGHTVAL: (<_DIGIT>)+ ( "." (<_DIGIT>)+ )?> : DEFAULT
|
||||||
|
}
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* BNF (parser) definitions
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
Query search():
|
||||||
|
{
|
||||||
|
Query rc = null;
|
||||||
|
|
||||||
|
} // end search declarations
|
||||||
|
{
|
||||||
|
(
|
||||||
|
<EOF>
|
||||||
|
| rc=search_expression() <EOF>
|
||||||
|
)
|
||||||
|
{ return rc; }
|
||||||
|
|
||||||
|
} // end search
|
||||||
|
|
||||||
|
Query search_expression():
|
||||||
|
{
|
||||||
|
ArrayList clauses = new ArrayList();
|
||||||
|
BooleanClause x = null;
|
||||||
|
|
||||||
|
} // end search_expression declarations
|
||||||
|
{
|
||||||
|
x=or_expression() { if (x!=null) clauses.add(x); }
|
||||||
|
( (<AND>)? x=or_expression() { if (x!=null) clauses.add(x); } )*
|
||||||
|
{
|
||||||
|
if (clauses.size()==0)
|
||||||
|
return null;
|
||||||
|
if (clauses.size()==1)
|
||||||
|
{
|
||||||
|
x = (BooleanClause)(clauses.get(0));
|
||||||
|
if (!(x.prohibited))
|
||||||
|
return x.query;
|
||||||
|
BooleanQuery rc = new BooleanQuery();
|
||||||
|
rc.add(x);
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
} // end if
|
||||||
|
|
||||||
|
BooleanQuery q = new BooleanQuery();
|
||||||
|
for (int i=0; i<clauses.size(); i++)
|
||||||
|
{ // get the clauses
|
||||||
|
x = (BooleanClause)(clauses.get(i));
|
||||||
|
if (!(x.prohibited))
|
||||||
|
x.required = true;
|
||||||
|
q.add(x);
|
||||||
|
|
||||||
|
} // end for
|
||||||
|
|
||||||
|
return q;
|
||||||
|
|
||||||
|
} // end block
|
||||||
|
|
||||||
|
} // end search_expression rule
|
||||||
|
|
||||||
|
BooleanClause or_expression():
|
||||||
|
{
|
||||||
|
ArrayList clauses = new ArrayList();
|
||||||
|
BooleanClause x = null;
|
||||||
|
|
||||||
|
} // end or_expression declarations
|
||||||
|
{
|
||||||
|
x=boolean_expression() { if (x!=null) clauses.add(x); }
|
||||||
|
( <OR> x=boolean_expression() { if (x!=null) clauses.add(x); } )*
|
||||||
|
{
|
||||||
|
if (clauses.size()==0)
|
||||||
|
return null;
|
||||||
|
if (clauses.size()==1)
|
||||||
|
return (BooleanClause)(clauses.get(0));
|
||||||
|
BooleanQuery q = new BooleanQuery();
|
||||||
|
for (int i=0; i<clauses.size(); i++)
|
||||||
|
{ // get the clauses
|
||||||
|
x = (BooleanClause)(clauses.get(i));
|
||||||
|
if (!(x.prohibited))
|
||||||
|
x.required = false;
|
||||||
|
q.add(x);
|
||||||
|
|
||||||
|
} // end for
|
||||||
|
|
||||||
|
return new BooleanClause(q,false,false);
|
||||||
|
|
||||||
|
} // end block
|
||||||
|
|
||||||
|
} // end or_expression
|
||||||
|
|
||||||
|
BooleanClause boolean_expression():
|
||||||
|
{
|
||||||
|
Query q = null;
|
||||||
|
boolean prohibited = false;
|
||||||
|
boolean required = false;
|
||||||
|
|
||||||
|
} // end boolean_expression declarations
|
||||||
|
{
|
||||||
|
[ <PLUS> { required = true; } | <MINUS> { prohibited = true; } ] q=simple_expression()
|
||||||
|
{
|
||||||
|
return (q==null) ? null : new BooleanClause(q,required,prohibited);
|
||||||
|
|
||||||
|
} // end block
|
||||||
|
|
||||||
|
} // end boolean_expression
|
||||||
|
|
||||||
|
Query simple_expression():
|
||||||
|
{
|
||||||
|
Query rc = null;
|
||||||
|
Token wght = null;
|
||||||
|
Token data = null;
|
||||||
|
boolean is_prefix = false;
|
||||||
|
boolean is_wildcard = false;
|
||||||
|
boolean is_fuzzy = false;
|
||||||
|
|
||||||
|
} // end simple_expression declarations
|
||||||
|
{
|
||||||
|
(
|
||||||
|
(
|
||||||
|
data=<TERM>
|
||||||
|
| data=<PREFIXTERM> { is_prefix = true; }
|
||||||
|
| data=<WILDTERM> { is_wildcard = true; }
|
||||||
|
| data=<WEIGHTVAL>
|
||||||
|
)
|
||||||
|
[ <FUZZY> { is_fuzzy = true; } ] [ <WEIGHT> wght=<WEIGHTVAL> [ <FUZZY> { is_fuzzy = true; } ] ]
|
||||||
|
{ // "data" contains the search term value
|
||||||
|
if (is_wildcard)
|
||||||
|
rc = createWildcardQuery(data.image);
|
||||||
|
else if (is_prefix)
|
||||||
|
rc = createPrefixQuery(data.image.substring(0,data.image.length()-1));
|
||||||
|
else if (is_fuzzy)
|
||||||
|
rc = createFuzzyQuery(data.image);
|
||||||
|
else
|
||||||
|
rc = createNormalQuery(data.image);
|
||||||
|
|
||||||
|
} // end block
|
||||||
|
|
||||||
|
| data=<QSTRING> [ <WEIGHT> wght=<WEIGHTVAL> ]
|
||||||
|
{ // "data" contains the search term value
|
||||||
|
rc = createNormalQuery(data.image.substring(1,data.image.length()-1));
|
||||||
|
|
||||||
|
} // end block
|
||||||
|
|
||||||
|
| <LPAREN> rc=search_expression() <RPAREN> [ <WEIGHT> wght=<WEIGHTVAL> ]
|
||||||
|
)
|
||||||
|
{
|
||||||
|
if (wght!=null)
|
||||||
|
{ // set the weight of this query
|
||||||
|
float weightval = 1.0F;
|
||||||
|
try
|
||||||
|
{ // parse the float value
|
||||||
|
weightval = Float.parseFloat(wght.image);
|
||||||
|
|
||||||
|
} // end try
|
||||||
|
catch (Exception e)
|
||||||
|
{ // ignore exceptions
|
||||||
|
} // end catch
|
||||||
|
|
||||||
|
if (rc!=null)
|
||||||
|
rc.setBoost(weightval);
|
||||||
|
|
||||||
|
} // end if
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
} // end block
|
||||||
|
|
||||||
|
} // end simple_expression
|
|
@ -0,0 +1,189 @@
|
||||||
|
/*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||||
|
* (the "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at <http://www.mozilla.org/MPL/>.
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis, WITHOUT
|
||||||
|
* WARRANTY OF ANY KIND, either express or implied. See the License for the specific
|
||||||
|
* language governing rights and limitations under the License.
|
||||||
|
*
|
||||||
|
* The Original Code is the Venice Web Communities System.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
||||||
|
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||||
|
* Copyright (C) 2003 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*/
|
||||||
|
package com.silverwrist.dynamo.index;
|
||||||
|
|
||||||
|
class StaticCharStream implements CharStream
|
||||||
|
{
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Attributes
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
private char[] m_array; // array full of characters to be read
|
||||||
|
int m_pos = 0; // index of next character to be read
|
||||||
|
int m_tokenstart = 0; // index of start of token
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Constructor
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
StaticCharStream(String s)
|
||||||
|
{
|
||||||
|
m_array = s.toCharArray();
|
||||||
|
|
||||||
|
} // end constructor
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Implementations from interface CharStream
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next character from the selected input. The method
|
||||||
|
* of selecting the input is the responsibility of the class
|
||||||
|
* implementing this interface. Can throw any java.io.IOException.
|
||||||
|
*/
|
||||||
|
public char readChar() throws java.io.IOException
|
||||||
|
{
|
||||||
|
if (m_pos==m_array.length)
|
||||||
|
throw new java.io.IOException("read past EOF");
|
||||||
|
return m_array[m_pos++];
|
||||||
|
|
||||||
|
} // end readChar
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the column position of the character last read.
|
||||||
|
* @deprecated
|
||||||
|
* @see #getEndColumn
|
||||||
|
*/
|
||||||
|
public int getColumn()
|
||||||
|
{
|
||||||
|
return m_pos;
|
||||||
|
|
||||||
|
} // end getColumn
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the line number of the character last read.
|
||||||
|
* @deprecated
|
||||||
|
* @see #getEndLine
|
||||||
|
*/
|
||||||
|
public int getLine()
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
} // end getLine
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the column number of the last character for current token (being
|
||||||
|
* matched after the last call to BeginTOken).
|
||||||
|
*/
|
||||||
|
public int getEndColumn()
|
||||||
|
{
|
||||||
|
return m_pos;
|
||||||
|
|
||||||
|
} // end getEndColumn
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the line number of the last character for current token (being
|
||||||
|
* matched after the last call to BeginTOken).
|
||||||
|
*/
|
||||||
|
public int getEndLine()
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
} // end getLine
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the column number of the first character for current token (being
|
||||||
|
* matched after the last call to BeginTOken).
|
||||||
|
*/
|
||||||
|
public int getBeginColumn()
|
||||||
|
{
|
||||||
|
return m_tokenstart;
|
||||||
|
|
||||||
|
} // end getBeginColumn
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the line number of the first character for current token (being
|
||||||
|
* matched after the last call to BeginTOken).
|
||||||
|
*/
|
||||||
|
public int getBeginLine()
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
} // end getBeginLine
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Backs up the input stream by amount steps. Lexer calls this method if it
|
||||||
|
* had already read some characters, but could not use them to match a
|
||||||
|
* (longer) token. So, they will be used again as the prefix of the next
|
||||||
|
* token and it is the implemetation's responsibility to do this right.
|
||||||
|
*/
|
||||||
|
public void backup(int amount)
|
||||||
|
{
|
||||||
|
m_pos -= amount;
|
||||||
|
|
||||||
|
} // end backup
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next character that marks the beginning of the next token.
|
||||||
|
* All characters must remain in the buffer between two successive calls
|
||||||
|
* to this method to implement backup correctly.
|
||||||
|
*/
|
||||||
|
public char BeginToken() throws java.io.IOException
|
||||||
|
{
|
||||||
|
m_tokenstart = m_pos;
|
||||||
|
return this.readChar();
|
||||||
|
|
||||||
|
} // end BeginToken
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a string made up of characters from the marked token beginning
|
||||||
|
* to the current buffer position. Implementations have the choice of returning
|
||||||
|
* anything that they want to. For example, for efficiency, one might decide
|
||||||
|
* to just return null, which is a valid implementation.
|
||||||
|
*/
|
||||||
|
public String GetImage()
|
||||||
|
{
|
||||||
|
return new String(m_array,m_tokenstart,m_pos - m_tokenstart);
|
||||||
|
|
||||||
|
} // end GetImage
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of characters that make up the suffix of length 'len' for
|
||||||
|
* the currently matched token. This is used to build up the matched string
|
||||||
|
* for use in actions in the case of MORE. A simple and inefficient
|
||||||
|
* implementation of this is as follows :
|
||||||
|
*
|
||||||
|
* {
|
||||||
|
* String t = GetImage();
|
||||||
|
* return t.substring(t.length() - len, t.length()).toCharArray();
|
||||||
|
* }
|
||||||
|
*/
|
||||||
|
public char[] GetSuffix(int len)
|
||||||
|
{
|
||||||
|
char[] rc = new char[len];
|
||||||
|
System.arraycopy(m_array,m_pos - len,rc,0,len);
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
} // end getSuffix
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The lexer calls this function to indicate that it is done with the stream
|
||||||
|
* and hence implementations can free any resources held by this class.
|
||||||
|
* Again, the body of this function can be just empty and it will not
|
||||||
|
* affect the lexer's operation.
|
||||||
|
*/
|
||||||
|
public void Done()
|
||||||
|
{
|
||||||
|
m_array = null;
|
||||||
|
|
||||||
|
} // end Done
|
||||||
|
|
||||||
|
} // end class StaticCharStream
|
Loading…
Reference in New Issue
Block a user