added a simple query language (with a JavaCC-compiled parser) and the
appropriate query methods in IndexService
This commit is contained in:
parent
60f24d8412
commit
7da8104954
2
INSTALL
2
INSTALL
|
@ -24,12 +24,14 @@ Venice.
|
|||
PACKAGES REQUIRED FOR VENICE
|
||||
----------------------------
|
||||
The following packages must be referenced from within build.properties:
|
||||
- JavaCC 3.0 (parser generator tool)
|
||||
- Java Servlet API 2.3 (use the servlet.jar file from Tomcat)
|
||||
- Jakarta Bean Scripting Framework 2.3
|
||||
- Jakarta Commons Collections Library, 2.1
|
||||
- Jakarta Commons Lang Library, 1.0.1
|
||||
- Mozilla.org Rhino, 1.5R3
|
||||
- Jakarta Log4J, 1.2.7
|
||||
- Jakarta Lucene, 1.3RC1
|
||||
- Jakarta Velocity, 1.3.1
|
||||
|
||||
Optionally:
|
||||
|
|
|
@ -24,6 +24,11 @@
|
|||
# [Logging directory]
|
||||
# logfile.dir=${user.home}
|
||||
|
||||
# [Location of JavaCC 3.0]
|
||||
javacc.base=/usr/local/java/javacc-3.0
|
||||
# javacc.lib=${javacc.base}/bin/lib
|
||||
# javacc.jarfile=javacc.jar
|
||||
|
||||
# [Location of Servlet API 2.3]
|
||||
servlet.base=/usr/local/jakarta/jakarta-tomcat-4.1.24-LE-jdk14
|
||||
servlet.lib=${servlet.base}/common/lib
|
||||
|
|
22
build.xml
22
build.xml
|
@ -37,6 +37,11 @@
|
|||
<!-- [Logging directory] -->
|
||||
<property name="logfile.dir" value="${user.home}"/>
|
||||
|
||||
<!-- [Location of JavaCC 3.0] -->
|
||||
<property name="javacc.base" value="../javacc"/>
|
||||
<property name="javacc.lib" value="${javacc.base}/bin/lib"/>
|
||||
<property name="javacc.jarfile" value="javacc.jar"/>
|
||||
|
||||
<!-- [Location of Servlet API 2.3] -->
|
||||
<property name="servlet.base" value="../servletapi"/>
|
||||
<property name="servlet.lib" value="${servlet.base}/lib"/>
|
||||
|
@ -140,9 +145,22 @@
|
|||
"build-dynamo" - Builds the Dynamo framework classes (com.silverwrist.dynamo.*).
|
||||
============================================================================ -->
|
||||
<target name="build-dynamo" depends="jar-baseutil">
|
||||
<mkdir dir="workingarea/src-dynamo-framework/com/silverwrist/dynamo/index"/>
|
||||
<!-- This rule doesn't work with JavaCC 3.0
|
||||
<javacc target="src/dynamo-framework/com/silverwrist/dynamo/index/Parser.jj"
|
||||
javacchome="/usr/local/java/javacc/bin/lib"
|
||||
outputdirectory="workingarea/src-dynamo-framework/com/silverwrist/dynamo/index"/>
|
||||
-->
|
||||
<java fork="true" classname="javacc">
|
||||
<arg value="-output_directory:workingarea/src-dynamo-framework/com/silverwrist/dynamo/index"/>
|
||||
<arg value="src/dynamo-framework/com/silverwrist/dynamo/index/Parser.jj"/>
|
||||
<classpath>
|
||||
<pathelement location="${javacc.lib}/${javacc.jarfile}"/>
|
||||
</classpath>
|
||||
</java>
|
||||
<mkdir dir="workingarea/dynamo-framework"/>
|
||||
<javac srcdir="src/dynamo-framework" destdir="workingarea/dynamo-framework" source="1.4"
|
||||
debug="${compile.debug}" optimize="${compile.optimize}" deprecation="${compile.deprecation}">
|
||||
<javac srcdir="src/dynamo-framework:workingarea/src-dynamo-framework" destdir="workingarea/dynamo-framework"
|
||||
source="1.4" debug="${compile.debug}" optimize="${compile.optimize}" deprecation="off">
|
||||
<classpath>
|
||||
<filelist dir="jars" files="baseutil.jar"/>
|
||||
<path refid="base.build.path"/>
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
*/
|
||||
package com.silverwrist.dynamo.iface;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import com.silverwrist.dynamo.except.IndexException;
|
||||
|
||||
public interface IndexService
|
||||
|
@ -26,4 +28,10 @@ public interface IndexService
|
|||
|
||||
public boolean deleteItem(String item_namespace, String item_name, Object item) throws IndexException;
|
||||
|
||||
public List query(String query_string, java.util.Date date_low, java.util.Date date_high, DynamoUser match_owner,
|
||||
String match_scope, int offset, int count) throws IndexException;
|
||||
|
||||
public int queryCount(String query_string, java.util.Date date_low, java.util.Date date_high, DynamoUser match_owner,
|
||||
String match_scope) throws IndexException;
|
||||
|
||||
} // end interface IndexService
|
||||
|
|
|
@ -22,3 +22,5 @@ analyzer.noCreate=Unable to create an instance of the analyzer class {0}.
|
|||
analyzer.badType=The specified analyzer class {0} is of the wrong type.
|
||||
addItem.fail=Unable to add a new item (namespace {0}, name {1}) to index {2}.
|
||||
deleteItem.fail=Unable to remove an item (namespace {0}, name {1}) from index {2}.
|
||||
query.syntax=Parse error in query string: {0}
|
||||
query.fail=Unable to execute search query.
|
||||
|
|
|
@ -19,16 +19,156 @@ package com.silverwrist.dynamo.index;
|
|||
|
||||
import java.io.*;
|
||||
import java.lang.ref.*;
|
||||
import java.util.*;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import com.silverwrist.util.*;
|
||||
import com.silverwrist.dynamo.except.*;
|
||||
import com.silverwrist.dynamo.iface.*;
|
||||
import com.silverwrist.dynamo.util.*;
|
||||
|
||||
class IndexServiceImpl implements IndexService
|
||||
{
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Internal counting HitCollector
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private static class CountingCollector extends HitCollector
|
||||
{
|
||||
/*====================================================================
|
||||
* Attributes
|
||||
*====================================================================
|
||||
*/
|
||||
|
||||
private int m_count = 0;
|
||||
|
||||
/*====================================================================
|
||||
* Constructor
|
||||
*====================================================================
|
||||
*/
|
||||
|
||||
CountingCollector()
|
||||
{ // do nothing
|
||||
} // end constructor
|
||||
|
||||
/*====================================================================
|
||||
* Abstract implementations from class HitCollector
|
||||
*====================================================================
|
||||
*/
|
||||
|
||||
public void collect(int doc, float score)
|
||||
{
|
||||
m_count++;
|
||||
|
||||
} // end collect
|
||||
|
||||
/*====================================================================
|
||||
* External operations
|
||||
*====================================================================
|
||||
*/
|
||||
|
||||
int getCount()
|
||||
{
|
||||
return m_count;
|
||||
|
||||
} // end getCount
|
||||
|
||||
} // end class CountingCollector
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Internal HitCollector that gathers a request subset
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private class SubsetCollector extends HitCollector
|
||||
{
|
||||
/*====================================================================
|
||||
* Attributes
|
||||
*====================================================================
|
||||
*/
|
||||
|
||||
private int[] m_docs;
|
||||
private float[] m_scores;
|
||||
private int m_offset;
|
||||
private int m_size = 0;
|
||||
|
||||
/*====================================================================
|
||||
* Constructor
|
||||
*====================================================================
|
||||
*/
|
||||
|
||||
SubsetCollector(int offset, int count)
|
||||
{
|
||||
m_docs = new int[count];
|
||||
m_scores = new float[count];
|
||||
m_offset = offset;
|
||||
|
||||
} // end constructor
|
||||
|
||||
/*====================================================================
|
||||
* Abstract implementations from class HitCollector
|
||||
*====================================================================
|
||||
*/
|
||||
|
||||
public void collect(int doc, float score)
|
||||
{
|
||||
if (m_offset>0)
|
||||
{ // skip documents at beginning of list
|
||||
m_offset--;
|
||||
return;
|
||||
|
||||
} // end if
|
||||
|
||||
if (m_size<m_docs.length)
|
||||
{ // add document index and score to the list
|
||||
m_docs[m_size] = doc;
|
||||
m_scores[m_size++] = score;
|
||||
|
||||
} // end if
|
||||
|
||||
} // end collect
|
||||
|
||||
/*====================================================================
|
||||
* External operations
|
||||
*====================================================================
|
||||
*/
|
||||
|
||||
public List outputItems(IndexReader irdr) throws IOException, IndexException
|
||||
{
|
||||
if (m_size==0)
|
||||
return Collections.EMPTY_LIST;
|
||||
ArrayList rc = new ArrayList(m_size);
|
||||
for (int i=0; i<m_size; i++)
|
||||
{ // get the document and retrieve its "id" field, then use that to get the object
|
||||
Document doc = irdr.document(m_docs[i]);
|
||||
Field id_field = doc.getField("id");
|
||||
String fulltag = id_field.stringValue();
|
||||
if (fulltag==null)
|
||||
fulltag = IOUtils.load(id_field.readerValue()).toString();
|
||||
String[] elts = StringUtils.split1(fulltag,'|',3);
|
||||
Object value = m_base.resolveObjectReference(elts[0],elts[1],elts[2]);
|
||||
rc.add(new ItemAndScore(value,m_scores[i]));
|
||||
|
||||
} // end for
|
||||
|
||||
return Collections.unmodifiableList(rc);
|
||||
|
||||
} // end outputItems
|
||||
|
||||
} // end class SubsetCollector
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Static data members
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private static Logger logger = Logger.getLogger(IndexServiceImpl.class);
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Attributes
|
||||
*--------------------------------------------------------------------------------
|
||||
|
@ -103,6 +243,76 @@ class IndexServiceImpl implements IndexService
|
|||
|
||||
} // end createTag
|
||||
|
||||
private final Query compileQuery(String query_string, java.util.Date date_low, java.util.Date date_high,
|
||||
DynamoUser match_owner, String match_scope) throws IndexException
|
||||
{
|
||||
ArrayList queries = new ArrayList();
|
||||
if (query_string!=null)
|
||||
{ // we have a query language string...
|
||||
try
|
||||
{ // parse the query string, which matches on the "text" field only
|
||||
queries.add(Parser.parse(query_string));
|
||||
|
||||
} // end try
|
||||
catch (ParseException pe)
|
||||
{ // parse error in the query
|
||||
IndexException ie = new IndexException(IndexServiceImpl.class,"IndexMessages","query.syntax",pe);
|
||||
ie.setParameter(0,pe.getMessage());
|
||||
throw ie;
|
||||
|
||||
} // end catch
|
||||
|
||||
} // end if
|
||||
|
||||
if ((date_low!=null) || (date_high!=null))
|
||||
{ // add an inclusive range of dates
|
||||
Term term_low = null, term_high = null;
|
||||
if (date_low!=null)
|
||||
term_low = new Term("date",DateField.dateToString(date_low));
|
||||
if (date_high!=null)
|
||||
term_high = new Term("date",DateField.dateToString(date_high));
|
||||
queries.add(new RangeQuery(term_low,term_high,true));
|
||||
|
||||
} // end if
|
||||
|
||||
if (match_owner!=null)
|
||||
queries.add(new TermQuery(new Term("owner",match_owner.getName())));
|
||||
if (match_scope!=null)
|
||||
{ // treat "scope" as a possible wildcard match and create it
|
||||
if (match_scope.indexOf('?')>=0)
|
||||
queries.add(new WildcardQuery(new Term("scope",match_scope)));
|
||||
else if (match_scope.indexOf('*')>=0)
|
||||
{ // append another query
|
||||
String s = match_scope.substring(0,match_scope.length()-1);
|
||||
if (s.indexOf('*')<0)
|
||||
queries.add(new PrefixQuery(new Term("scope",s)));
|
||||
else
|
||||
queries.add(new WildcardQuery(new Term("scope",match_scope)));
|
||||
|
||||
} // end else if
|
||||
else // match the scope directly
|
||||
queries.add(new TermQuery(new Term("scope",match_scope)));
|
||||
|
||||
} // end if
|
||||
|
||||
// Boil down all the queries for me.
|
||||
if (queries.size()==0)
|
||||
return null;
|
||||
if (queries.size()==1)
|
||||
return (Query)(queries.get(0));
|
||||
BooleanQuery rc = new BooleanQuery();
|
||||
for (int i=0; i<queries.size(); i++)
|
||||
rc.add((Query)(queries.get(i)),true,false);
|
||||
return rc;
|
||||
|
||||
} // end compileQuery
|
||||
|
||||
private final void doQuery(String query_string, java.util.Date date_low, java.util.Date date_high,
|
||||
DynamoUser match_owner, String match_scope, HitCollector output) throws IndexException
|
||||
{
|
||||
|
||||
} // end doQuery
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Implementations from interface IndexService
|
||||
*--------------------------------------------------------------------------------
|
||||
|
@ -163,4 +373,85 @@ class IndexServiceImpl implements IndexService
|
|||
|
||||
} // end deleteItem
|
||||
|
||||
public List query(String query_string, java.util.Date date_low, java.util.Date date_high, DynamoUser match_owner,
|
||||
String match_scope, int offset, int count) throws IndexException
|
||||
{
|
||||
Query query = compileQuery(query_string,date_low,date_high,match_owner,match_scope);
|
||||
SubsetCollector subc = new SubsetCollector(offset,count);
|
||||
|
||||
List rc = null;
|
||||
IndexReader irdr = null;
|
||||
IndexSearcher srch = null;
|
||||
try
|
||||
{ // run that puppy!
|
||||
irdr = IndexReader.open(m_directory);
|
||||
srch = new IndexSearcher(irdr);
|
||||
srch.search(query,subc);
|
||||
rc = subc.outputItems(irdr);
|
||||
|
||||
} // end try
|
||||
catch (IOException e)
|
||||
{ // the query failed somehow - throw an error
|
||||
throw new IndexException(IndexServiceImpl.class,"IndexMessages","query.fail",e);
|
||||
|
||||
} // end catch
|
||||
finally
|
||||
{ // make sure we close down OK
|
||||
try
|
||||
{ // close the search and index reader
|
||||
if (srch!=null)
|
||||
srch.close();
|
||||
if (irdr!=null)
|
||||
irdr.close();
|
||||
|
||||
} // end try
|
||||
catch (IOException e)
|
||||
{ // shouldn't happen
|
||||
logger.warn("query(): error closing stuff",e);
|
||||
|
||||
} // end catch
|
||||
|
||||
} // end finally
|
||||
|
||||
return rc;
|
||||
|
||||
} // end query
|
||||
|
||||
public int queryCount(String query_string, java.util.Date date_low, java.util.Date date_high, DynamoUser match_owner,
|
||||
String match_scope) throws IndexException
|
||||
{
|
||||
Query query = compileQuery(query_string,date_low,date_high,match_owner,match_scope);
|
||||
CountingCollector cc = new CountingCollector();
|
||||
IndexSearcher srch = null;
|
||||
try
|
||||
{ // run that puppy!
|
||||
srch = new IndexSearcher(m_directory);
|
||||
srch.search(query,cc);
|
||||
|
||||
} // end try
|
||||
catch (IOException e)
|
||||
{ // the query failed somehow - throw an error
|
||||
throw new IndexException(IndexServiceImpl.class,"IndexMessages","query.fail",e);
|
||||
|
||||
} // end catch
|
||||
finally
|
||||
{ // make sure we close down OK
|
||||
try
|
||||
{ // close the search and index reader
|
||||
if (srch!=null)
|
||||
srch.close();
|
||||
|
||||
} // end try
|
||||
catch (IOException e)
|
||||
{ // shouldn't happen
|
||||
logger.warn("queryCount(): error closing stuff",e);
|
||||
|
||||
} // end catch
|
||||
|
||||
} // end finally
|
||||
|
||||
return cc.getCount();
|
||||
|
||||
} // end queryCount
|
||||
|
||||
} // end class IndexServiceImpl
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||
* (the "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at <http://www.mozilla.org/MPL/>.
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis, WITHOUT
|
||||
* WARRANTY OF ANY KIND, either express or implied. See the License for the specific
|
||||
* language governing rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is the Venice Web Communities System.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||
* Copyright (C) 2003 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*/
|
||||
package com.silverwrist.dynamo.index;
|
||||
|
||||
public final class ItemAndScore
|
||||
{
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Attributes
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private Object m_item;
|
||||
private float m_score;
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Constructor
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
public ItemAndScore(Object item, float score)
|
||||
{
|
||||
m_item = item;
|
||||
m_score = score;
|
||||
|
||||
} // end constructor
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* External getters
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
public Object getItem()
|
||||
{
|
||||
return m_item;
|
||||
|
||||
} // end getItem
|
||||
|
||||
public float getScore()
|
||||
{
|
||||
return m_score;
|
||||
|
||||
} // end getScore
|
||||
|
||||
} // end class ItemAndScore
|
350
src/dynamo-framework/com/silverwrist/dynamo/index/Parser.jj
Normal file
350
src/dynamo-framework/com/silverwrist/dynamo/index/Parser.jj
Normal file
|
@ -0,0 +1,350 @@
|
|||
/*
|
||||
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||
* (the "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at <http://www.mozilla.org/MPL/>.
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis, WITHOUT
|
||||
* WARRANTY OF ANY KIND, either express or implied. See the License for the specific
|
||||
* language governing rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is the Venice Web Communities System.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||
* Copyright (C) 2003 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*/
|
||||
|
||||
options
|
||||
{
|
||||
STATIC = false;
|
||||
JAVA_UNICODE_ESCAPE = true;
|
||||
USER_CHAR_STREAM = true;
|
||||
}
|
||||
|
||||
PARSER_BEGIN(Parser)
|
||||
|
||||
package com.silverwrist.dynamo.index;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.search.*;
|
||||
|
||||
/**
|
||||
* N.B.: A lot of this is based on the Lucene QueryParser code, but streamlined to fit the needs of the Dynamo
|
||||
* indexing system.
|
||||
*/
|
||||
class Parser
|
||||
{
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Attributes
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private Analyzer m_analyzer;
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Internal operations
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private static final Query createWildcardQuery(String data)
|
||||
{
|
||||
Term t = new Term("text",data.toLowerCase());
|
||||
return new WildcardQuery(t);
|
||||
|
||||
} // end createWildcardQuery
|
||||
|
||||
private static final Query createPrefixQuery(String data)
|
||||
{
|
||||
Term t = new Term("text",data.toLowerCase());
|
||||
return new PrefixQuery(t);
|
||||
|
||||
} // end createPrefixQuery
|
||||
|
||||
private static final Query createFuzzyQuery(String data)
|
||||
{
|
||||
Term t = new Term("text",data);
|
||||
return new FuzzyQuery(t);
|
||||
|
||||
} // end createFuzzyQuery
|
||||
|
||||
private final Query createNormalQuery(String data)
|
||||
{
|
||||
TokenStream tstm = m_analyzer.tokenStream("text",new StringReader(data));
|
||||
ArrayList toks = new ArrayList();
|
||||
org.apache.lucene.analysis.Token t = null;
|
||||
|
||||
for (;;)
|
||||
{ // use the Lucene TokenStream to find all the tokens and eliminate stopwords
|
||||
try
|
||||
{ // get the next token from the input
|
||||
t = tstm.next();
|
||||
|
||||
} // end try
|
||||
catch (IOException e)
|
||||
{ // whoops!
|
||||
t = null;
|
||||
|
||||
} // end catch
|
||||
|
||||
if (t==null)
|
||||
break; // done scanning the string
|
||||
|
||||
toks.add(t.termText());
|
||||
|
||||
} // end for (ever)
|
||||
|
||||
if (toks.size()==0)
|
||||
return null; // no query
|
||||
if (toks.size()==1) // single term query
|
||||
return new TermQuery(new Term("text",(String)(toks.get(0))));
|
||||
|
||||
// Build a PhraseQuery and return that.
|
||||
PhraseQuery rc = new PhraseQuery();
|
||||
rc.setSlop(0);
|
||||
for (int i=0; i<toks.size(); i++)
|
||||
rc.add(new Term("text",(String)(toks.get(i))));
|
||||
return rc;
|
||||
|
||||
} // end createNormalQuery
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* External static operations
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
public static Query parse(String s) throws ParseException
|
||||
{
|
||||
try
|
||||
{ // quickie parse
|
||||
Parser p = new Parser(new StaticCharStream(s));
|
||||
return p.search();
|
||||
|
||||
} // end try
|
||||
catch (TokenMgrError tme)
|
||||
{ // translate into a ParseException
|
||||
throw new ParseException(tme.getMessage());
|
||||
|
||||
} // end catch
|
||||
|
||||
} // end parse
|
||||
|
||||
} // end class Parser
|
||||
|
||||
PARSER_END(Parser)
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Token (lexer) definitions
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
<*> TOKEN:
|
||||
{
|
||||
<#_DIGIT: ["0"-"9"]>
|
||||
| <#_WHITESPACE: [" ", "\t"]>
|
||||
| <#_RESERVED: ["+", "-", "(", ")", "~", "^", "\"", "*", "?"]>
|
||||
| <#_ESCAPED: "\\" ( <_RESERVED> | "\\" )>
|
||||
| <#_VALID: ~["+", "-", "(", ")", "~", "^", "\"", "*", "?", " ", "\t"]>
|
||||
| <#_TERMCHAR: <_VALID> | <_ESCAPED>>
|
||||
|
||||
} // end token definitions
|
||||
|
||||
<DEFAULT> SKIP:
|
||||
{
|
||||
<<_WHITESPACE>>
|
||||
|
||||
} // end skip definition
|
||||
|
||||
<DEFAULT> TOKEN:
|
||||
{
|
||||
<AND: ("AND" | "&&")> // AND query
|
||||
| <OR: ("OR" | "||")> // OR query
|
||||
| <PLUS: "+"> // plus sign
|
||||
| <MINUS: "-"> // minus sign
|
||||
| <LPAREN: "("> // left parenthesis
|
||||
| <RPAREN: ")"> // right parenthesis
|
||||
| <FUZZY: "~"> // "fuzzy" operator
|
||||
| <WEIGHT: "^"> : Weight // "weighting" operator
|
||||
| <QSTRING: "\"" (~["\""])+ "\""> // quoted string
|
||||
| <TERM: <_TERMCHAR> (<_TERMCHAR>)*> // simple term
|
||||
| <PREFIXTERM: <_TERMCHAR> (<_TERMCHAR>)* "*"> // prefix term
|
||||
| <WILDTERM: <_TERMCHAR> (<_TERMCHAR> | "?" | "*")*> // term with wildcards
|
||||
|
||||
} // end default token definitions
|
||||
|
||||
<Weight> TOKEN:
|
||||
{
|
||||
<WEIGHTVAL: (<_DIGIT>)+ ( "." (<_DIGIT>)+ )?> : DEFAULT
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* BNF (parser) definitions
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
Query search():
|
||||
{
|
||||
Query rc = null;
|
||||
|
||||
} // end search declarations
|
||||
{
|
||||
(
|
||||
<EOF>
|
||||
| rc=search_expression() <EOF>
|
||||
)
|
||||
{ return rc; }
|
||||
|
||||
} // end search
|
||||
|
||||
Query search_expression():
|
||||
{
|
||||
ArrayList clauses = new ArrayList();
|
||||
BooleanClause x = null;
|
||||
|
||||
} // end search_expression declarations
|
||||
{
|
||||
x=or_expression() { if (x!=null) clauses.add(x); }
|
||||
( (<AND>)? x=or_expression() { if (x!=null) clauses.add(x); } )*
|
||||
{
|
||||
if (clauses.size()==0)
|
||||
return null;
|
||||
if (clauses.size()==1)
|
||||
{
|
||||
x = (BooleanClause)(clauses.get(0));
|
||||
if (!(x.prohibited))
|
||||
return x.query;
|
||||
BooleanQuery rc = new BooleanQuery();
|
||||
rc.add(x);
|
||||
return rc;
|
||||
|
||||
} // end if
|
||||
|
||||
BooleanQuery q = new BooleanQuery();
|
||||
for (int i=0; i<clauses.size(); i++)
|
||||
{ // get the clauses
|
||||
x = (BooleanClause)(clauses.get(i));
|
||||
if (!(x.prohibited))
|
||||
x.required = true;
|
||||
q.add(x);
|
||||
|
||||
} // end for
|
||||
|
||||
return q;
|
||||
|
||||
} // end block
|
||||
|
||||
} // end search_expression rule
|
||||
|
||||
BooleanClause or_expression():
|
||||
{
|
||||
ArrayList clauses = new ArrayList();
|
||||
BooleanClause x = null;
|
||||
|
||||
} // end or_expression declarations
|
||||
{
|
||||
x=boolean_expression() { if (x!=null) clauses.add(x); }
|
||||
( <OR> x=boolean_expression() { if (x!=null) clauses.add(x); } )*
|
||||
{
|
||||
if (clauses.size()==0)
|
||||
return null;
|
||||
if (clauses.size()==1)
|
||||
return (BooleanClause)(clauses.get(0));
|
||||
BooleanQuery q = new BooleanQuery();
|
||||
for (int i=0; i<clauses.size(); i++)
|
||||
{ // get the clauses
|
||||
x = (BooleanClause)(clauses.get(i));
|
||||
if (!(x.prohibited))
|
||||
x.required = false;
|
||||
q.add(x);
|
||||
|
||||
} // end for
|
||||
|
||||
return new BooleanClause(q,false,false);
|
||||
|
||||
} // end block
|
||||
|
||||
} // end or_expression
|
||||
|
||||
BooleanClause boolean_expression():
|
||||
{
|
||||
Query q = null;
|
||||
boolean prohibited = false;
|
||||
boolean required = false;
|
||||
|
||||
} // end boolean_expression declarations
|
||||
{
|
||||
[ <PLUS> { required = true; } | <MINUS> { prohibited = true; } ] q=simple_expression()
|
||||
{
|
||||
return (q==null) ? null : new BooleanClause(q,required,prohibited);
|
||||
|
||||
} // end block
|
||||
|
||||
} // end boolean_expression
|
||||
|
||||
Query simple_expression():
|
||||
{
|
||||
Query rc = null;
|
||||
Token wght = null;
|
||||
Token data = null;
|
||||
boolean is_prefix = false;
|
||||
boolean is_wildcard = false;
|
||||
boolean is_fuzzy = false;
|
||||
|
||||
} // end simple_expression declarations
|
||||
{
|
||||
(
|
||||
(
|
||||
data=<TERM>
|
||||
| data=<PREFIXTERM> { is_prefix = true; }
|
||||
| data=<WILDTERM> { is_wildcard = true; }
|
||||
| data=<WEIGHTVAL>
|
||||
)
|
||||
[ <FUZZY> { is_fuzzy = true; } ] [ <WEIGHT> wght=<WEIGHTVAL> [ <FUZZY> { is_fuzzy = true; } ] ]
|
||||
{ // "data" contains the search term value
|
||||
if (is_wildcard)
|
||||
rc = createWildcardQuery(data.image);
|
||||
else if (is_prefix)
|
||||
rc = createPrefixQuery(data.image.substring(0,data.image.length()-1));
|
||||
else if (is_fuzzy)
|
||||
rc = createFuzzyQuery(data.image);
|
||||
else
|
||||
rc = createNormalQuery(data.image);
|
||||
|
||||
} // end block
|
||||
|
||||
| data=<QSTRING> [ <WEIGHT> wght=<WEIGHTVAL> ]
|
||||
{ // "data" contains the search term value
|
||||
rc = createNormalQuery(data.image.substring(1,data.image.length()-1));
|
||||
|
||||
} // end block
|
||||
|
||||
| <LPAREN> rc=search_expression() <RPAREN> [ <WEIGHT> wght=<WEIGHTVAL> ]
|
||||
)
|
||||
{
|
||||
if (wght!=null)
|
||||
{ // set the weight of this query
|
||||
float weightval = 1.0F;
|
||||
try
|
||||
{ // parse the float value
|
||||
weightval = Float.parseFloat(wght.image);
|
||||
|
||||
} // end try
|
||||
catch (Exception e)
|
||||
{ // ignore exceptions
|
||||
} // end catch
|
||||
|
||||
if (rc!=null)
|
||||
rc.setBoost(weightval);
|
||||
|
||||
} // end if
|
||||
|
||||
return rc;
|
||||
|
||||
} // end block
|
||||
|
||||
} // end simple_expression
|
|
@ -0,0 +1,189 @@
|
|||
/*
|
||||
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||
* (the "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at <http://www.mozilla.org/MPL/>.
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis, WITHOUT
|
||||
* WARRANTY OF ANY KIND, either express or implied. See the License for the specific
|
||||
* language governing rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is the Venice Web Communities System.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||
* Copyright (C) 2003 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*/
|
||||
package com.silverwrist.dynamo.index;
|
||||
|
||||
class StaticCharStream implements CharStream
|
||||
{
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Attributes
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private char[] m_array; // array full of characters to be read
|
||||
int m_pos = 0; // index of next character to be read
|
||||
int m_tokenstart = 0; // index of start of token
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Constructor
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
StaticCharStream(String s)
|
||||
{
|
||||
m_array = s.toCharArray();
|
||||
|
||||
} // end constructor
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Implementations from interface CharStream
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/**
|
||||
* Returns the next character from the selected input. The method
|
||||
* of selecting the input is the responsibility of the class
|
||||
* implementing this interface. Can throw any java.io.IOException.
|
||||
*/
|
||||
public char readChar() throws java.io.IOException
|
||||
{
|
||||
if (m_pos==m_array.length)
|
||||
throw new java.io.IOException("read past EOF");
|
||||
return m_array[m_pos++];
|
||||
|
||||
} // end readChar
|
||||
|
||||
/**
|
||||
* Returns the column position of the character last read.
|
||||
* @deprecated
|
||||
* @see #getEndColumn
|
||||
*/
|
||||
public int getColumn()
|
||||
{
|
||||
return m_pos;
|
||||
|
||||
} // end getColumn
|
||||
|
||||
/**
|
||||
* Returns the line number of the character last read.
|
||||
* @deprecated
|
||||
* @see #getEndLine
|
||||
*/
|
||||
public int getLine()
|
||||
{
|
||||
return 1;
|
||||
|
||||
} // end getLine
|
||||
|
||||
/**
|
||||
* Returns the column number of the last character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
public int getEndColumn()
|
||||
{
|
||||
return m_pos;
|
||||
|
||||
} // end getEndColumn
|
||||
|
||||
/**
|
||||
* Returns the line number of the last character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
public int getEndLine()
|
||||
{
|
||||
return 1;
|
||||
|
||||
} // end getLine
|
||||
|
||||
/**
|
||||
* Returns the column number of the first character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
public int getBeginColumn()
|
||||
{
|
||||
return m_tokenstart;
|
||||
|
||||
} // end getBeginColumn
|
||||
|
||||
/**
|
||||
* Returns the line number of the first character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
public int getBeginLine()
|
||||
{
|
||||
return 1;
|
||||
|
||||
} // end getBeginLine
|
||||
|
||||
/**
|
||||
* Backs up the input stream by amount steps. Lexer calls this method if it
|
||||
* had already read some characters, but could not use them to match a
|
||||
* (longer) token. So, they will be used again as the prefix of the next
|
||||
* token and it is the implemetation's responsibility to do this right.
|
||||
*/
|
||||
public void backup(int amount)
|
||||
{
|
||||
m_pos -= amount;
|
||||
|
||||
} // end backup
|
||||
|
||||
/**
|
||||
* Returns the next character that marks the beginning of the next token.
|
||||
* All characters must remain in the buffer between two successive calls
|
||||
* to this method to implement backup correctly.
|
||||
*/
|
||||
public char BeginToken() throws java.io.IOException
|
||||
{
|
||||
m_tokenstart = m_pos;
|
||||
return this.readChar();
|
||||
|
||||
} // end BeginToken
|
||||
|
||||
/**
|
||||
* Returns a string made up of characters from the marked token beginning
|
||||
* to the current buffer position. Implementations have the choice of returning
|
||||
* anything that they want to. For example, for efficiency, one might decide
|
||||
* to just return null, which is a valid implementation.
|
||||
*/
|
||||
public String GetImage()
|
||||
{
|
||||
return new String(m_array,m_tokenstart,m_pos - m_tokenstart);
|
||||
|
||||
} // end GetImage
|
||||
|
||||
/**
|
||||
* Returns an array of characters that make up the suffix of length 'len' for
|
||||
* the currently matched token. This is used to build up the matched string
|
||||
* for use in actions in the case of MORE. A simple and inefficient
|
||||
* implementation of this is as follows :
|
||||
*
|
||||
* {
|
||||
* String t = GetImage();
|
||||
* return t.substring(t.length() - len, t.length()).toCharArray();
|
||||
* }
|
||||
*/
|
||||
public char[] GetSuffix(int len)
|
||||
{
|
||||
char[] rc = new char[len];
|
||||
System.arraycopy(m_array,m_pos - len,rc,0,len);
|
||||
return rc;
|
||||
|
||||
} // end getSuffix
|
||||
|
||||
/**
|
||||
* The lexer calls this function to indicate that it is done with the stream
|
||||
* and hence implementations can free any resources held by this class.
|
||||
* Again, the body of this function can be just empty and it will not
|
||||
* affect the lexer's operation.
|
||||
*/
|
||||
public void Done()
|
||||
{
|
||||
m_array = null;
|
||||
|
||||
} // end Done
|
||||
|
||||
} // end class StaticCharStream
|
Loading…
Reference in New Issue
Block a user