added the ability for the HTML checker to keep track of internal and external

references in any post, so we can do trackbacks
This commit is contained in:
Eric J. Bowersox 2004-12-30 08:08:13 +00:00
parent 7e72ec21d0
commit 4c5c7ffe85
8 changed files with 341 additions and 151 deletions

View File

@ -9,9 +9,9 @@
* *
* The Original Code is the Venice Web Communities System. * The Original Code is the Venice Web Communities System.
* *
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>, * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are * for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
* Copyright (C) 2001-02 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
* *
* Contributor(s): * Contributor(s):
*/ */
@ -40,7 +40,7 @@ public class PostLinkRewriter implements Rewriter
*-------------------------------------------------------------------------------- *--------------------------------------------------------------------------------
*/ */
private GlobalSite globalsite; // global site containing utilities private final GlobalSite globalsite; // global site containing utilities
/*-------------------------------------------------------------------------------- /*--------------------------------------------------------------------------------
* Constructor * Constructor
@ -60,7 +60,7 @@ public class PostLinkRewriter implements Rewriter
private static final String buildPostLink(PostLinkDecoder pl, PostLinkDecoderContext ctxt) private static final String buildPostLink(PostLinkDecoder pl, PostLinkDecoderContext ctxt)
{ {
StringBuffer b = new StringBuffer(URI_PREFIX); StringBuffer b = new StringBuffer();
boolean started = false; boolean started = false;
if (pl.getCommunity()==null) if (pl.getCommunity()==null)
b.append(ctxt.getCommunityName()); b.append(ctxt.getCommunityName());
@ -180,14 +180,18 @@ public class PostLinkRewriter implements Rewriter
} // end catch } // end catch
// build the post link and add it as an internal reference
String link = buildPostLink(pl,ctxt);
svc.addInternalReference(link);
// build the necessary markup and return it // build the necessary markup and return it
StringBuffer open_a = new StringBuffer("<A HREF=\""); StringBuffer open_a = new StringBuffer("<a href=\"");
open_a.append(buildPostLink(pl,ctxt)).append("\""); open_a.append(URI_PREFIX).append(link).append("\"");
String catenate = svc.getRewriterAttrValue("ANCHORTAIL"); String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
if (!(StringUtil.isStringEmpty(catenate))) if (!(StringUtil.isStringEmpty(catenate)))
open_a.append(' ').append(catenate); open_a.append(' ').append(catenate);
open_a.append('>'); open_a.append('>');
return new MarkupData(open_a.toString(),data,"</A>"); return new MarkupData(open_a.toString(),data,"</a>");
} // end rewrite } // end rewrite

View File

@ -17,6 +17,8 @@
*/ */
package com.silverwrist.venice.htmlcheck; package com.silverwrist.venice.htmlcheck;
import java.util.Set;
public interface HTMLChecker public interface HTMLChecker
{ {
public void append(String str) throws AlreadyFinishedException; public void append(String str) throws AlreadyFinishedException;
@ -37,4 +39,8 @@ public interface HTMLChecker
public void setContextValue(String name, Object val); public void setContextValue(String name, Object val);
public Set getExternalReferences() throws NotYetFinishedException;
public Set getInternalReferences() throws NotYetFinishedException;
} // end interface HTMLChecker } // end interface HTMLChecker

View File

@ -9,18 +9,24 @@
* *
* The Original Code is the Venice Web Community System. * The Original Code is the Venice Web Community System.
* *
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>, * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are * for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
* *
* Contributor(s): * Contributor(s):
*/ */
package com.silverwrist.venice.htmlcheck; package com.silverwrist.venice.htmlcheck;
import java.net.URL;
public interface RewriterServices public interface RewriterServices
{ {
public abstract String getRewriterAttrValue(String name); public String getRewriterAttrValue(String name);
public abstract Object getRewriterContextValue(String name); public Object getRewriterContextValue(String name);
public void addExternalReference(URL ref);
public void addInternalReference(String ref);
} // end interface RewriterServices } // end interface RewriterServices

View File

@ -9,21 +9,33 @@
* *
* The Original Code is the Venice Web Community System. * The Original Code is the Venice Web Community System.
* *
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>, * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are * for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
* *
* Contributor(s): * Contributor(s):
*/ */
package com.silverwrist.venice.htmlcheck.filters; package com.silverwrist.venice.htmlcheck.filters;
import java.util.*; import java.util.*;
import org.apache.log4j.Logger;
import org.apache.regexp.*;
import com.silverwrist.util.*;
import com.silverwrist.venice.htmlcheck.Rewriter; import com.silverwrist.venice.htmlcheck.Rewriter;
import com.silverwrist.venice.htmlcheck.RewriterServices; import com.silverwrist.venice.htmlcheck.RewriterServices;
import com.silverwrist.venice.htmlcheck.MarkupData; import com.silverwrist.venice.htmlcheck.MarkupData;
public class EmailRewriter implements Rewriter public class EmailRewriter implements Rewriter
{ {
/*--------------------------------------------------------------------------------
* Static data members
*--------------------------------------------------------------------------------
*/
private static Logger logger = Logger.getLogger(EmailRewriter.class);
private static REProgram s_match = null;
/*-------------------------------------------------------------------------------- /*--------------------------------------------------------------------------------
* Constructor * Constructor
*-------------------------------------------------------------------------------- *--------------------------------------------------------------------------------
@ -46,23 +58,42 @@ public class EmailRewriter implements Rewriter
public MarkupData rewrite(String data, RewriterServices svc) public MarkupData rewrite(String data, RewriterServices svc)
{ {
int at_pos = data.indexOf('@'); RE m = new RE(s_match,RE.MATCH_CASEINDEPENDENT);
if ((at_pos<=0) || (at_pos==(data.length()-1))) if (!(m.match(data)))
return null; return null; // not a valid E-mail address
// TODO: put in more validation checking
// build the <A> tag (the gnarliest part) // build the <A> tag (the gnarliest part)
StringBuffer open_a = new StringBuffer("<A HREF=\"mailto:"); StringBuffer open_a = new StringBuffer("<a href=\"mailto:");
open_a.append(data).append("\""); open_a.append(data).append("\"");
String catenate = svc.getRewriterAttrValue("ANCHORTAIL"); String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
if ((catenate!=null) && (catenate.length()>0)) if (!(StringUtil.isStringEmpty(catenate)))
open_a.append(' ').append(catenate); open_a.append(' ').append(catenate);
open_a.append('>'); open_a.append('>');
// return the markup data back to the checker // return the markup data back to the checker
return new MarkupData(open_a.toString(),data,"</A>"); return new MarkupData(open_a.toString(),data,"</a>");
} // end rewrite } // end rewrite
/*--------------------------------------------------------------------------------
* Static initializer
*--------------------------------------------------------------------------------
*/
static
{
try
{ // compile our regular expression
RECompiler compiler = new RECompiler();
s_match = compiler.compile("^[A-Za-z0-9!#$%*+-/=?^_`{|}~.]+@[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$");
} // end try
catch (RESyntaxException e)
{ // shouldn't happen
logger.fatal("caught RESyntaxException in EmailRewriter initializer",e);
} // end catch
} // end static initializer
} // end class EmailRewriter } // end class EmailRewriter

View File

@ -9,29 +9,87 @@
* *
* The Original Code is the Venice Web Community System. * The Original Code is the Venice Web Community System.
* *
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>, * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are * for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
* *
* Contributor(s): * Contributor(s):
*/ */
package com.silverwrist.venice.htmlcheck.filters; package com.silverwrist.venice.htmlcheck.filters;
import java.net.*;
import java.util.*; import java.util.*;
import org.apache.log4j.Logger;
import org.apache.regexp.*;
import com.silverwrist.util.*;
import com.silverwrist.venice.htmlcheck.Rewriter; import com.silverwrist.venice.htmlcheck.Rewriter;
import com.silverwrist.venice.htmlcheck.RewriterServices; import com.silverwrist.venice.htmlcheck.RewriterServices;
import com.silverwrist.venice.htmlcheck.MarkupData; import com.silverwrist.venice.htmlcheck.MarkupData;
public class URLRewriter implements Rewriter public class URLRewriter implements Rewriter
{ {
/*--------------------------------------------------------------------------------
* Internal class containing URL elements.
*--------------------------------------------------------------------------------
*/
private static class URLElement
{
private REProgram m_match;
private String m_prefix;
URLElement(String pattern, String prefix)
{
try
{ // fill the classes
m_match = COMPILER.compile(pattern);
m_prefix = prefix;
} // end try
catch (RESyntaxException e)
{ // shouldn't happen
logger.fatal("got RESyntaxException in URLElement",e);
} // end catch
} // end constructor
String eval(String input)
{
RE m = new RE(m_match,RE.MATCH_CASEINDEPENDENT);
if (m.match(input))
return m_prefix + input;
else
return null;
} // end eval
} // end class URLElement
/*-------------------------------------------------------------------------------- /*--------------------------------------------------------------------------------
* Static data members * Static data members
*-------------------------------------------------------------------------------- *--------------------------------------------------------------------------------
*/ */
private static final String NULLSTRING = ""; private static Logger logger = Logger.getLogger(URLRewriter.class);
private static Hashtable prefix_list = null;
private static boolean set_up = true; private static final RECompiler COMPILER = new RECompiler();
private static final String[] SETUP_DATA =
{
"^http://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
"^ftp://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
"^gopher://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
"^mailto:[A-Za-z0-9!#$%*+-/=?^_`{|}~.]+@[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$", "",
"^news:[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$", "",
"^nntp://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
"^telnet://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
"^tn3270://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
"^www\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*", "http://",
"^ftp\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*", "ftp://",
"^gopher\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*", "gopher://"
};
private static final List KNOWN_ELEMENTS;
/*-------------------------------------------------------------------------------- /*--------------------------------------------------------------------------------
* Constructor * Constructor
@ -39,40 +97,9 @@ public class URLRewriter implements Rewriter
*/ */
public URLRewriter() public URLRewriter()
{ { // do nothing
setUpPrefixes(); // make sure the prefix data is set up
} // end constructor } // end constructor
/*--------------------------------------------------------------------------------
* Internal functions
*--------------------------------------------------------------------------------
*/
private static void setUpPrefixes()
{
if (set_up)
{ // allocate the hash table
set_up = false;
prefix_list = new Hashtable(10,0.9F);
// fill it with the proper URL prefixes
prefix_list.put("http:",NULLSTRING);
prefix_list.put("ftp:",NULLSTRING);
prefix_list.put("gopher:",NULLSTRING);
prefix_list.put("mailto:",NULLSTRING);
prefix_list.put("news:",NULLSTRING);
prefix_list.put("nntp:",NULLSTRING);
prefix_list.put("telnet:",NULLSTRING);
prefix_list.put("tn3270:",NULLSTRING);
prefix_list.put("www.",new String("http://"));
prefix_list.put("ftp.",new String("ftp://"));
prefix_list.put("gopher.",new String("gopher://"));
} // end if
} // end setUpPrefixes
/*-------------------------------------------------------------------------------- /*--------------------------------------------------------------------------------
* Implementations from interface Rewriter * Implementations from interface Rewriter
*-------------------------------------------------------------------------------- *--------------------------------------------------------------------------------
@ -86,29 +113,53 @@ public class URLRewriter implements Rewriter
public MarkupData rewrite(String data, RewriterServices svc) public MarkupData rewrite(String data, RewriterServices svc)
{ {
Enumeration prefixes = prefix_list.keys(); for (Iterator it=KNOWN_ELEMENTS.iterator(); it.hasNext(); )
while (prefixes.hasMoreElements()) { // test each element in turn
{ // get the next prefix and compare against the beginning of the string URLElement ue = (URLElement)(it.next());
String pfx = (String)(prefixes.nextElement()); String s = ue.eval(data);
if (data.regionMatches(true,0,pfx,0,pfx.length())) if (s!=null)
{ // good enough! build the open <A> tag (the gnarliest part of the markup) { // got a match! record the external reference and build the open <A> tag
StringBuffer open_a = new StringBuffer("<A HREF=\""); try
String catenate = (String)(prefix_list.get(pfx)); { // create URL and add it
open_a.append(catenate).append(data).append("\""); if (s.toLowerCase().startsWith("http:"))
catenate = svc.getRewriterAttrValue("ANCHORTAIL"); svc.addExternalReference(new URL(s));
if ((catenate!=null) && (catenate.length()>0))
} // end try
catch (MalformedURLException e)
{ // forget it
} // end catch
StringBuffer open_a = new StringBuffer("<a href=\"");
open_a.append(s).append("\"");
String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
if (!(StringUtil.isStringEmpty(catenate)))
open_a.append(' ').append(catenate); open_a.append(' ').append(catenate);
open_a.append('>'); open_a.append('>');
// here's how you mark it up! // here's how you mark it up!
return new MarkupData(open_a.toString(),data,"</A>"); return new MarkupData(open_a.toString(),data,"</a>");
} // end if } // end if
} // end while } // end for
return null; // sorry, no can do return null; // sorry, no can do
} // end rewrite } // end rewrite
/*--------------------------------------------------------------------------------
* Static initializer
*--------------------------------------------------------------------------------
*/
static
{
ArrayList tmp = new ArrayList();
for (int i=0; i<SETUP_DATA.length; i+=2)
tmp.add(new URLElement(SETUP_DATA[i],SETUP_DATA[i + 1]));
tmp.trimToSize();
KNOWN_ELEMENTS = Collections.unmodifiableList(tmp);
} // end static initializer
} // end class URLRewriter } // end class URLRewriter

View File

@ -9,20 +9,26 @@
* *
* The Original Code is the Venice Web Community System. * The Original Code is the Venice Web Community System.
* *
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>, * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are * for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
* *
* Contributor(s): * Contributor(s):
*/ */
package com.silverwrist.venice.htmlcheck.impl; package com.silverwrist.venice.htmlcheck.impl;
import java.net.URL;
public interface HTMLCheckerBackend public interface HTMLCheckerBackend
{ {
public abstract String getCheckerAttrValue(String name); public String getCheckerAttrValue(String name);
public abstract void sendTagMessage(String msg); public void sendTagMessage(String msg);
public abstract Object getCheckerContextValue(String name); public Object getCheckerContextValue(String name);
public void addExternalReference(URL ref);
public void addInternalReference(String ref);
} // end interface HTMLCheckerBackend } // end interface HTMLCheckerBackend

View File

@ -17,6 +17,7 @@
*/ */
package com.silverwrist.venice.htmlcheck.impl; package com.silverwrist.venice.htmlcheck.impl;
import java.net.URL;
import java.util.*; import java.util.*;
import org.apache.log4j.*; import org.apache.log4j.*;
import com.silverwrist.venice.htmlcheck.*; import com.silverwrist.venice.htmlcheck.*;
@ -139,6 +140,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
private ArrayList m_tag_rewriters = new ArrayList(); // tag rewriter instances private ArrayList m_tag_rewriters = new ArrayList(); // tag rewriter instances
private ArrayList m_paren_rewriters = new ArrayList(); // paren rewriter instances private ArrayList m_paren_rewriters = new ArrayList(); // paren rewriter instances
private HashMap m_context_data = new HashMap(); // context variables private HashMap m_context_data = new HashMap(); // context variables
private HashSet m_external_references = new HashSet(); // list of external references
private HashSet m_internal_references = new HashSet(); // list of internal references
/*-------------------------------------------------------------------------------- /*--------------------------------------------------------------------------------
* Constructor * Constructor
@ -161,7 +164,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
*/ */
/** /**
* Returns <CODE>true</CODE> if this character belongs as part of a word, <CODE>false</CODE> if not. * Returns <code>true</code> if this character belongs as part of a word, <code>false</code> if not.
* *
* @param ch Character to be tested. * @param ch Character to be tested.
* @return See above. * @return See above.
@ -210,8 +213,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
} // end getRunLength } // end getRunLength
/** /**
* Copies the <CODE>Rewriter</CODE> objects from an outside list to an internal list, wrapping * Copies the <code>Rewriter</code> objects from an outside list to an internal list, wrapping
* named rewriters in <CODE>CountingRewriter</CODE> objects as appropriate. * named rewriters in <code>CountingRewriter</code> objects as appropriate.
* *
* @param dest Destination to copy rewriters to. * @param dest Destination to copy rewriters to.
* @param source List to copy rewriters from. * @param source List to copy rewriters from.
@ -290,8 +293,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
* *
* @param ch Character to output. * @param ch Character to output.
* @param filters List of filters to use to attempt to process the character. * @param filters List of filters to use to attempt to process the character.
* @param count_cols <CODE>true</CODE> if the character output adds to the column counter, * @param count_cols <code>true</code> if the character output adds to the column counter,
* <CODE>false</CODE> if not. * <code>false</code> if not.
*/ */
private final void emitChar(char ch, List filters, boolean count_cols) private final void emitChar(char ch, List filters, boolean count_cols)
{ {
@ -841,7 +844,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
} // end handleAsHTML } // end handleAsHTML
/** /**
* Returns <CODE>true</CODE> if the temporary buffer contains the start of an HTML comment. (The * Returns <code>true</code> if the temporary buffer contains the start of an HTML comment. (The
* leading and trailing angle brackets are assumed.) * leading and trailing angle brackets are assumed.)
* *
* @return See above. * @return See above.
@ -853,7 +856,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
} // end containsHTMLComment } // end containsHTMLComment
/** /**
* Returns <CODE>true</CODE> if the temporary buffer contains a complete HTML comment. (The leading * Returns <code>true</code> if the temporary buffer contains a complete HTML comment. (The leading
* and trailing angle brackets are assumed.) * and trailing angle brackets are assumed.)
* *
* @return See above. * @return See above.
@ -869,7 +872,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
} // end containsCompleteHTMLComment } // end containsCompleteHTMLComment
/** /**
* Returns <CODE>true</CODE> if the temporary buffer contains an XML construct, i.e. a tag that * Returns <code>true</code> if the temporary buffer contains an XML construct, i.e. a tag that
* contains a ':', and may or may not have a leading '/'. (The leading and trailing angle brackets * contains a ':', and may or may not have a leading '/'. (The leading and trailing angle brackets
* are assumed.) * are assumed.)
* *
@ -1381,6 +1384,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
m_lines = 0; m_lines = 0;
m_paren_level = 0; m_paren_level = 0;
m_output_buffer = null; m_output_buffer = null;
m_external_references.clear();
m_internal_references.clear();
killState(); killState();
// Also reset all the counters. // Also reset all the counters.
@ -1441,6 +1446,28 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
} // end setContextValue } // end setContextValue
public Set getExternalReferences() throws NotYetFinishedException
{
if (!m_finished)
throw new NotYetFinishedException();
if (m_external_references.isEmpty())
return Collections.EMPTY_SET;
HashSet rc = new HashSet(m_external_references);
return Collections.unmodifiableSet(rc);
} // end getExternalReferences
public Set getInternalReferences() throws NotYetFinishedException
{
if (!m_finished)
throw new NotYetFinishedException();
if (m_internal_references.isEmpty())
return Collections.EMPTY_SET;
HashSet rc = new HashSet(m_internal_references);
return Collections.unmodifiableSet(rc);
} // end getInternalReferences
/*-------------------------------------------------------------------------------- /*--------------------------------------------------------------------------------
* Implementations from interface HTMLCheckerBackend * Implementations from interface HTMLCheckerBackend
*-------------------------------------------------------------------------------- *--------------------------------------------------------------------------------
@ -1488,6 +1515,18 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
} // end getCheckerContextValue } // end getCheckerContextValue
public void addExternalReference(URL ref)
{
m_external_references.add(ref);
} // end addExternalReference
public void addInternalReference(String ref)
{
m_internal_references.add(ref);
} // end addInternalReference
/*-------------------------------------------------------------------------------- /*--------------------------------------------------------------------------------
* Implementations from interface RewriterServices * Implementations from interface RewriterServices
*-------------------------------------------------------------------------------- *--------------------------------------------------------------------------------
@ -1505,4 +1544,6 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
} // end getRewriterContextValue } // end getRewriterContextValue
// addExternalReference is implemented as part of HTMLCheckerBackend
} // end class HTMLCheckerImpl } // end class HTMLCheckerImpl

View File

@ -9,14 +9,19 @@
* *
* The Original Code is the Venice Web Community System. * The Original Code is the Venice Web Community System.
* *
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>, * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are * for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
* *
* Contributor(s): * Contributor(s):
*/ */
package com.silverwrist.venice.htmlcheck.impl; package com.silverwrist.venice.htmlcheck.impl;
import java.net.*;
import org.apache.log4j.Logger;
import org.apache.regexp.*;
import com.silverwrist.util.*;
class TagA extends BalancedTag class TagA extends BalancedTag
{ {
/*-------------------------------------------------------------------------------- /*--------------------------------------------------------------------------------
@ -24,21 +29,94 @@ class TagA extends BalancedTag
*-------------------------------------------------------------------------------- *--------------------------------------------------------------------------------
*/ */
private static final String TARGET_ATTR = "TARGET"; /** The instance of {@link org.apache.log4j.Logger Logger} for use by this class. */
private static Logger logger = Logger.getLogger(TagA.class);
/*--------------------------------------------------------------------------------
* Attributes
*--------------------------------------------------------------------------------
*/
/** Regular expression program to look for "HREF=" attribute. */
private REProgram m_href = null;
/** Regular expression program to look for "TARGET=" attribute. */
private REProgram m_target = null;
/*-------------------------------------------------------------------------------- /*--------------------------------------------------------------------------------
* Constructor * Constructor
*-------------------------------------------------------------------------------- *--------------------------------------------------------------------------------
*/ */
/**
* Creates a new instance of <code>TagA</code>.
*/
TagA() TagA()
{ {
super("A",false); super("A",false);
try
{ // compile regular expressions
RECompiler compiler = new RECompiler();
m_href = compiler.compile("href\\s*=");
m_target = compiler.compile("target\\s*=");
} // end try
catch (RESyntaxException e)
{ // shouldn't happen
logger.fatal("got RESyntaxException in TagA",e);
} // end catch
} // end constructor } // end constructor
/*-------------------------------------------------------------------------------- /*--------------------------------------------------------------------------------
* External operations * Internal operations
*--------------------------------------------------------------------------------
*/
/**
* Extracts an attribute value from the start of the string. The attribute value may be enclosed
* in quotes, or may simply be a series of nonblank characters delimited by blanks.
*
* @param s The string to extract the attribute value from.
* @return The attribute value extracted.
*/
private static final String extractAttribute(String s)
{
char[] a = s.toCharArray();
int i = 0;
while ((i<a.length) && Character.isWhitespace(a[i]))
i++;
if (i==a.length)
return "";
int st = i;
if ((a[st]=='\'') || (a[st]=='\"'))
{ // find quoted string boundaries
i++;
while ((i<a.length) && (a[i]!=a[st]))
i++;
if (i==a.length)
return "";
st++;
} // end if
else
{ // skip over non-whitespace
while ((i<a.length) && !(Character.isWhitespace(a[i])))
i++;
// if i==a.length, just take the "rest"
} // end else
if (i==a.length)
return s.substring(st);
else
return s.substring(st,i);
} // end extractAttribute
/*--------------------------------------------------------------------------------
* Overrides from class SimpleTag
*-------------------------------------------------------------------------------- *--------------------------------------------------------------------------------
*/ */
@ -47,77 +125,44 @@ class TagA extends BalancedTag
if (is_closing) if (is_closing)
return contents; // don't bother checking close tags return contents; // don't bother checking close tags
// Skip over the initial word of the tag data, as that's the tag name. // Pull out the HREF= attribute, as that's an "external reference" we need to keep track of.
int i = 0; RE m = new RE(m_href,RE.MATCH_CASEINDEPENDENT);
while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i)))) if (m.match(contents))
i++; { // get the attribute value
try
{ // turn it into a URL and add it as an external reference
String s = extractAttribute(contents.substring(m.getParenEnd(0)));
if (!(StringUtil.isStringEmpty(s)) && s.toLowerCase().startsWith("http:"))
{ // turn it into the URL and add it
URL ref = new URL(s);
context.addExternalReference(ref);
// Search for the TARGET= attribute in the tag data. } // end if
// else ignore me
} // end try
catch (MalformedURLException e)
{ // ignore this reference
} // end catch
} // end if
// Look for the TARGET= attribute.
boolean target_seen = false; boolean target_seen = false;
while (i!=contents.length()) m = new RE(m_target,RE.MATCH_CASEINDEPENDENT);
{ // skip over any whitespace between one attribute (or the name) and the next one if (m.match(contents))
while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i))) { // get the attribute value
i++; String s = extractAttribute(contents.substring(m.getParenEnd(0)));
if (i==contents.length()) if (!(StringUtil.isStringEmpty(s)))
break; // reached end of string, all done searching
// Mark the start of this attribute name and start skipping over it.
int start_name = i;
while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i)))
&& (contents.charAt(i)!='='))
i++;
// We now know where the attribute name is, see if it's "TARGET".
if ((i-start_name)==TARGET_ATTR.length())
{ // compare the substring to see if it's right
String name = contents.substring(start_name,i);
if (name.equalsIgnoreCase(TARGET_ATTR))
{ // OK, we saw the TARGET tag in the list! Bail out!
target_seen = true; target_seen = true;
break;
} // end if } // end if
} // end if
while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
i++; // skip over whitespace at end of name but before the = sign
if ((i<contents.length()) && (contents.charAt(i)=='='))
{ // skip over the = sign first
i++;
while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
i++; // skip over whitespace after the = sign
if (i==contents.length())
break; // reached end of string, all done searching
if ((contents.charAt(i)=='\'') || (contents.charAt(i)=='\"'))
{ // this is a quoted string - swallow it
char quote_char = contents.charAt(i++); // skip the quote part
while ((i!=contents.length()) && (contents.charAt(i)!=quote_char))
i++; // skip over data between quotes
if (i!=contents.length())
i++; // skip over last quote
} // end if
else
{ // skip over a single word
while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i))))
i++;
} // end else
} // end if
// else this tag had no value - just go on to the next one
} // end while
if (target_seen) if (target_seen)
return contents; // no need to alter the string return contents; // no need to alter the string
String tail = (String)(context.getCheckerAttrValue("ANCHORTAIL")); String tail = (String)(context.getCheckerAttrValue("ANCHORTAIL"));
return new String(contents + " " + tail); return contents + " " + tail;
} // end rewriteTagContents } // end rewriteTagContents