added the ability for the HTML checker to keep track of internal and external

references in any post, so we can do trackbacks
2004-12-30 08:08:13 +00:00 · 2004-12-30 08:08:13 +00:00 · 4c5c7ffe85
commit 4c5c7ffe85
parent 7e72ec21d0
8 changed files with 341 additions and 151 deletions
--- a/src/com/silverwrist/venice/db/PostLinkRewriter.java
+++ b/src/com/silverwrist/venice/db/PostLinkRewriter.java
@ -9,9 +9,9 @@
 * 
 * The Original Code is the Venice Web Communities System.
 * 
- * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
+ * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
- * Copyright (C) 2001-02 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
+ * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 * 
 * Contributor(s): 
 */
@ -40,7 +40,7 @@ public class PostLinkRewriter implements Rewriter
   *--------------------------------------------------------------------------------
   */

-  private GlobalSite globalsite;         // global site containing utilities
+  private final GlobalSite globalsite;         // global site containing utilities

  /*--------------------------------------------------------------------------------
   * Constructor
@ -60,7 +60,7 @@ public class PostLinkRewriter implements Rewriter

  private static final String buildPostLink(PostLinkDecoder pl, PostLinkDecoderContext ctxt)
  {
-    StringBuffer b = new StringBuffer(URI_PREFIX);
+    StringBuffer b = new StringBuffer();
    boolean started = false;
    if (pl.getCommunity()==null)
      b.append(ctxt.getCommunityName());
@ -180,14 +180,18 @@ public class PostLinkRewriter implements Rewriter

    } // end catch

+    // build the post link and add it as an internal reference
+    String link = buildPostLink(pl,ctxt);
+    svc.addInternalReference(link);
+
    // build the necessary markup and return it
-    StringBuffer open_a = new StringBuffer("<A HREF=\"");
-    open_a.append(buildPostLink(pl,ctxt)).append("\"");
+    StringBuffer open_a = new StringBuffer("<a href=\"");
+    open_a.append(URI_PREFIX).append(link).append("\"");
    String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
    if (!(StringUtil.isStringEmpty(catenate)))
      open_a.append(' ').append(catenate);
    open_a.append('>');
-    return new MarkupData(open_a.toString(),data,"</A>");
+    return new MarkupData(open_a.toString(),data,"</a>");

  } // end rewrite

--- a/src/com/silverwrist/venice/htmlcheck/HTMLChecker.java
+++ b/src/com/silverwrist/venice/htmlcheck/HTMLChecker.java
@ -17,6 +17,8 @@
 */
 package com.silverwrist.venice.htmlcheck;

+import java.util.Set;
+
 public interface HTMLChecker
 {
  public void append(String str) throws AlreadyFinishedException;
@ -37,4 +39,8 @@ public interface HTMLChecker

  public void setContextValue(String name, Object val);

+  public Set getExternalReferences() throws NotYetFinishedException;
+
+  public Set getInternalReferences() throws NotYetFinishedException;
+
 } // end interface HTMLChecker
--- a/src/com/silverwrist/venice/htmlcheck/RewriterServices.java
+++ b/src/com/silverwrist/venice/htmlcheck/RewriterServices.java
@ -9,18 +9,24 @@
 * 
 * The Original Code is the Venice Web Community System.
 * 
- * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
+ * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
- * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
+ * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 * 
 * Contributor(s): 
 */
 package com.silverwrist.venice.htmlcheck;

+import java.net.URL;
+
 public interface RewriterServices
 {
-  public abstract String getRewriterAttrValue(String name);
+  public String getRewriterAttrValue(String name);

-  public abstract Object getRewriterContextValue(String name);
+  public Object getRewriterContextValue(String name);
+
+  public void addExternalReference(URL ref);
+
+  public void addInternalReference(String ref);

 } // end interface RewriterServices
--- a/src/com/silverwrist/venice/htmlcheck/filters/EmailRewriter.java
+++ b/src/com/silverwrist/venice/htmlcheck/filters/EmailRewriter.java
@ -9,21 +9,33 @@
 * 
 * The Original Code is the Venice Web Community System.
 * 
- * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
+ * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
- * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
+ * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 * 
 * Contributor(s): 
 */
 package com.silverwrist.venice.htmlcheck.filters;

 import java.util.*;
+import org.apache.log4j.Logger;
+import org.apache.regexp.*;
+import com.silverwrist.util.*;
 import com.silverwrist.venice.htmlcheck.Rewriter;
 import com.silverwrist.venice.htmlcheck.RewriterServices;
 import com.silverwrist.venice.htmlcheck.MarkupData;

 public class EmailRewriter implements Rewriter
 {
+  /*--------------------------------------------------------------------------------
+   * Static data members
+   *--------------------------------------------------------------------------------
+   */
+
+  private static Logger logger = Logger.getLogger(EmailRewriter.class);
+
+  private static REProgram s_match = null;
+
  /*--------------------------------------------------------------------------------
   * Constructor
   *--------------------------------------------------------------------------------
@ -46,23 +58,42 @@ public class EmailRewriter implements Rewriter

  public MarkupData rewrite(String data, RewriterServices svc)
  {
-    int at_pos = data.indexOf('@');
-    if ((at_pos<=0) || (at_pos==(data.length()-1)))
-      return null;
-
-    // TODO: put in more validation checking
+    RE m = new RE(s_match,RE.MATCH_CASEINDEPENDENT);
+    if (!(m.match(data)))
+      return null;  // not a valid E-mail address

    // build the <A> tag (the gnarliest part)
-    StringBuffer open_a = new StringBuffer("<A HREF=\"mailto:");
+    StringBuffer open_a = new StringBuffer("<a href=\"mailto:");
    open_a.append(data).append("\"");
    String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
-    if ((catenate!=null) && (catenate.length()>0))
+    if (!(StringUtil.isStringEmpty(catenate)))
      open_a.append(' ').append(catenate);
    open_a.append('>');

    // return the markup data back to the checker
-    return new MarkupData(open_a.toString(),data,"</A>");
+    return new MarkupData(open_a.toString(),data,"</a>");

  } // end rewrite

+  /*--------------------------------------------------------------------------------
+   * Static initializer
+   *--------------------------------------------------------------------------------
+   */
+
+  static
+  {
+    try
+    { // compile our regular expression
+      RECompiler compiler = new RECompiler();
+      s_match = compiler.compile("^[A-Za-z0-9!#$%*+-/=?^_`{|}~.]+@[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$");
+
+    } // end try
+    catch (RESyntaxException e)
+    { // shouldn't happen
+      logger.fatal("caught RESyntaxException in EmailRewriter initializer",e);
+
+    } // end catch
+
+  } // end static initializer
+
 } // end class EmailRewriter
--- a/src/com/silverwrist/venice/htmlcheck/filters/URLRewriter.java
+++ b/src/com/silverwrist/venice/htmlcheck/filters/URLRewriter.java
@ -9,29 +9,87 @@
 * 
 * The Original Code is the Venice Web Community System.
 * 
- * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
+ * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
- * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
+ * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 * 
 * Contributor(s): 
 */
 package com.silverwrist.venice.htmlcheck.filters;

+import java.net.*;
 import java.util.*;
+import org.apache.log4j.Logger;
+import org.apache.regexp.*;
+import com.silverwrist.util.*;
 import com.silverwrist.venice.htmlcheck.Rewriter;
 import com.silverwrist.venice.htmlcheck.RewriterServices;
 import com.silverwrist.venice.htmlcheck.MarkupData;

 public class URLRewriter implements Rewriter
 {
+  /*--------------------------------------------------------------------------------
+   * Internal class containing URL elements.
+   *--------------------------------------------------------------------------------
+   */
+
+  private static class URLElement
+  {
+    private REProgram m_match;
+    private String m_prefix;
+
+    URLElement(String pattern, String prefix)
+    {
+      try
+      { // fill the classes
+	m_match = COMPILER.compile(pattern);
+	m_prefix = prefix;
+
+      } // end try
+      catch (RESyntaxException e)
+      { // shouldn't happen
+	logger.fatal("got RESyntaxException in URLElement",e);
+
+      } // end catch
+
+    } // end constructor
+
+    String eval(String input)
+    {
+      RE m = new RE(m_match,RE.MATCH_CASEINDEPENDENT);
+      if (m.match(input))
+	return m_prefix + input;
+      else
+	return null;
+
+    } // end eval
+
+  } // end class URLElement
+
  /*--------------------------------------------------------------------------------
   * Static data members
   *--------------------------------------------------------------------------------
   */

-  private static final String NULLSTRING = "";
-  private static Hashtable prefix_list = null;
-  private static boolean set_up = true;
+  private static Logger logger = Logger.getLogger(URLRewriter.class);
+
+  private static final RECompiler COMPILER = new RECompiler();
+  private static final String[] SETUP_DATA =
+    {
+      "^http://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+",                                 "",
+      "^ftp://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+",                                  "",
+      "^gopher://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+",                               "",
+      "^mailto:[A-Za-z0-9!#$%*+-/=?^_`{|}~.]+@[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$", "",
+      "^news:[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$",                                  "",
+      "^nntp://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+",                                 "",
+      "^telnet://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+",                               "",
+      "^tn3270://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+",                               "",
+      "^www\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*",                                  "http://",
+      "^ftp\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*",                                  "ftp://",
+      "^gopher\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*",                               "gopher://"
+    };
+
+  private static final List KNOWN_ELEMENTS;

  /*--------------------------------------------------------------------------------
   * Constructor
@ -39,40 +97,9 @@ public class URLRewriter implements Rewriter
   */

  public URLRewriter()
-  {
-    setUpPrefixes();  // make sure the prefix data is set up
-
+  { // do nothing
  } // end constructor

-  /*--------------------------------------------------------------------------------
-   * Internal functions
-   *--------------------------------------------------------------------------------
-   */
-
-  private static void setUpPrefixes()
-  {
-    if (set_up)
-    { // allocate the hash table
-      set_up = false;
-      prefix_list = new Hashtable(10,0.9F);
-
-      // fill it with the proper URL prefixes
-      prefix_list.put("http:",NULLSTRING);
-      prefix_list.put("ftp:",NULLSTRING);
-      prefix_list.put("gopher:",NULLSTRING);
-      prefix_list.put("mailto:",NULLSTRING);
-      prefix_list.put("news:",NULLSTRING);
-      prefix_list.put("nntp:",NULLSTRING);
-      prefix_list.put("telnet:",NULLSTRING);
-      prefix_list.put("tn3270:",NULLSTRING);
-      prefix_list.put("www.",new String("http://"));
-      prefix_list.put("ftp.",new String("ftp://"));
-      prefix_list.put("gopher.",new String("gopher://"));
-
-    } // end if
-
-  } // end setUpPrefixes
-
  /*--------------------------------------------------------------------------------
   * Implementations from interface Rewriter
   *--------------------------------------------------------------------------------
@ -86,29 +113,53 @@ public class URLRewriter implements Rewriter

  public MarkupData rewrite(String data, RewriterServices svc)
  {
-    Enumeration prefixes = prefix_list.keys();
-    while (prefixes.hasMoreElements())
-    { // get the next prefix and compare against the beginning of the string
-      String pfx = (String)(prefixes.nextElement());
-      if (data.regionMatches(true,0,pfx,0,pfx.length()))
-      { // good enough!  build the open <A> tag (the gnarliest part of the markup)
-	StringBuffer open_a = new StringBuffer("<A HREF=\"");
-	String catenate = (String)(prefix_list.get(pfx));
-	open_a.append(catenate).append(data).append("\"");
-	catenate = svc.getRewriterAttrValue("ANCHORTAIL");
-	if ((catenate!=null) && (catenate.length()>0))
+    for (Iterator it=KNOWN_ELEMENTS.iterator(); it.hasNext(); )
+    { // test each element in turn
+      URLElement ue = (URLElement)(it.next());
+      String s = ue.eval(data);
+      if (s!=null)
+      { // got a match!  record the external reference and build the open <A> tag
+	try
+	{ // create URL and add it
+	  if (s.toLowerCase().startsWith("http:"))
+	    svc.addExternalReference(new URL(s));
+
+	} // end try
+	catch (MalformedURLException e)
+	{ // forget it
+	} // end catch
+
+	StringBuffer open_a = new StringBuffer("<a href=\"");
+	open_a.append(s).append("\"");
+	String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
+	if (!(StringUtil.isStringEmpty(catenate)))
 	  open_a.append(' ').append(catenate);
 	open_a.append('>');

 	// here's how you mark it up!
-	return new MarkupData(open_a.toString(),data,"</A>");
+	return new MarkupData(open_a.toString(),data,"</a>");

      } // end if

-    } // end while
+    } // end for

    return null;  // sorry, no can do

  } // end rewrite

+  /*--------------------------------------------------------------------------------
+   * Static initializer
+   *--------------------------------------------------------------------------------
+   */
+
+  static
+  {
+    ArrayList tmp = new ArrayList();
+    for (int i=0; i<SETUP_DATA.length; i+=2)
+      tmp.add(new URLElement(SETUP_DATA[i],SETUP_DATA[i + 1]));
+    tmp.trimToSize();
+    KNOWN_ELEMENTS = Collections.unmodifiableList(tmp);
+
+  } // end static initializer
+
 } // end class URLRewriter
--- a/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerBackend.java
+++ b/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerBackend.java
@ -9,20 +9,26 @@
 * 
 * The Original Code is the Venice Web Community System.
 * 
- * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
+ * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
- * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
+ * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 * 
 * Contributor(s): 
 */
 package com.silverwrist.venice.htmlcheck.impl;

+import java.net.URL;
+
 public interface HTMLCheckerBackend
 {
-  public abstract String getCheckerAttrValue(String name);
+  public String getCheckerAttrValue(String name);

-  public abstract void sendTagMessage(String msg);
+  public void sendTagMessage(String msg);

-  public abstract Object getCheckerContextValue(String name);
+  public Object getCheckerContextValue(String name);
+
+  public void addExternalReference(URL ref);
+
+  public void addInternalReference(String ref);

 } // end interface HTMLCheckerBackend
--- a/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerImpl.java
+++ b/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerImpl.java
@ -17,6 +17,7 @@
 */
 package com.silverwrist.venice.htmlcheck.impl;

+import java.net.URL;
 import java.util.*;
 import org.apache.log4j.*;
 import com.silverwrist.venice.htmlcheck.*;
@ -139,6 +140,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
  private ArrayList m_tag_rewriters = new ArrayList();      // tag rewriter instances
  private ArrayList m_paren_rewriters = new ArrayList();    // paren rewriter instances
  private HashMap m_context_data = new HashMap();   // context variables
+  private HashSet m_external_references = new HashSet();  // list of external references
+  private HashSet m_internal_references = new HashSet();  // list of internal references

  /*--------------------------------------------------------------------------------
   * Constructor
@ -161,7 +164,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
   */

  /**
-   * Returns <CODE>true</CODE> if this character belongs as part of a word, <CODE>false</CODE> if not.
+   * Returns <code>true</code> if this character belongs as part of a word, <code>false</code> if not.
   *
   * @param ch Character to be tested.
   * @return See above.
@ -210,8 +213,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
  } // end getRunLength

  /**
-   * Copies the <CODE>Rewriter</CODE> objects from an outside list to an internal list, wrapping
-   * named rewriters in <CODE>CountingRewriter</CODE> objects as appropriate.
+   * Copies the <code>Rewriter</code> objects from an outside list to an internal list, wrapping
+   * named rewriters in <code>CountingRewriter</code> objects as appropriate.
   *
   * @param dest Destination to copy rewriters to.
   * @param source List to copy rewriters from.
@ -290,8 +293,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
   *
   * @param ch Character to output.
   * @param filters List of filters to use to attempt to process the character.
-   * @param count_cols <CODE>true</CODE> if the character output adds to the column counter,
-   *                   <CODE>false</CODE> if not.
+   * @param count_cols <code>true</code> if the character output adds to the column counter,
+   *                   <code>false</code> if not.
   */
  private final void emitChar(char ch, List filters, boolean count_cols)
  {
@ -841,7 +844,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
  } // end handleAsHTML

  /**
-   * Returns <CODE>true</CODE> if the temporary buffer contains the start of an HTML comment.  (The
+   * Returns <code>true</code> if the temporary buffer contains the start of an HTML comment.  (The
   * leading and trailing angle brackets are assumed.)
   *
   * @return See above.
@ -853,7 +856,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
  } // end containsHTMLComment

  /**
-   * Returns <CODE>true</CODE> if the temporary buffer contains a complete HTML comment.  (The leading
+   * Returns <code>true</code> if the temporary buffer contains a complete HTML comment.  (The leading
   * and trailing angle brackets are assumed.)
   *
   * @return See above.
@ -869,7 +872,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
  } // end containsCompleteHTMLComment

  /**
-   * Returns <CODE>true</CODE> if the temporary buffer contains an XML construct, i.e. a tag that
+   * Returns <code>true</code> if the temporary buffer contains an XML construct, i.e. a tag that
   * contains a ':', and may or may not have a leading '/'.  (The leading and trailing angle brackets
   * are assumed.)
   *
@ -1381,6 +1384,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
    m_lines = 0;
    m_paren_level = 0;
    m_output_buffer = null;
+    m_external_references.clear();
+    m_internal_references.clear();
    killState();

    // Also reset all the counters.
@ -1441,6 +1446,28 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic

  } // end setContextValue

+  public Set getExternalReferences() throws NotYetFinishedException
+  {
+    if (!m_finished)
+      throw new NotYetFinishedException();
+    if (m_external_references.isEmpty())
+      return Collections.EMPTY_SET;
+    HashSet rc = new HashSet(m_external_references);
+    return Collections.unmodifiableSet(rc);
+
+  } // end getExternalReferences
+
+  public Set getInternalReferences() throws NotYetFinishedException
+  {
+    if (!m_finished)
+      throw new NotYetFinishedException();
+    if (m_internal_references.isEmpty())
+      return Collections.EMPTY_SET;
+    HashSet rc = new HashSet(m_internal_references);
+    return Collections.unmodifiableSet(rc);
+
+  } // end getInternalReferences
+
  /*--------------------------------------------------------------------------------
   * Implementations from interface HTMLCheckerBackend
   *--------------------------------------------------------------------------------
@ -1488,6 +1515,18 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic

  } // end getCheckerContextValue

+  public void addExternalReference(URL ref)
+  {
+    m_external_references.add(ref);
+
+  } // end addExternalReference
+
+  public void addInternalReference(String ref)
+  {
+    m_internal_references.add(ref);
+
+  } // end addInternalReference
+
  /*--------------------------------------------------------------------------------
   * Implementations from interface RewriterServices
   *--------------------------------------------------------------------------------
@ -1505,4 +1544,6 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic

  } // end getRewriterContextValue

+  // addExternalReference is implemented as part of HTMLCheckerBackend
+
 } // end class HTMLCheckerImpl
--- a/src/com/silverwrist/venice/htmlcheck/impl/TagA.java
+++ b/src/com/silverwrist/venice/htmlcheck/impl/TagA.java
@ -9,14 +9,19 @@
 * 
 * The Original Code is the Venice Web Community System.
 * 
- * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
+ * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
- * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
+ * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 * 
 * Contributor(s): 
 */
 package com.silverwrist.venice.htmlcheck.impl;

+import java.net.*;
+import org.apache.log4j.Logger;
+import org.apache.regexp.*;
+import com.silverwrist.util.*;
+
 class TagA extends BalancedTag
 {
  /*--------------------------------------------------------------------------------
@ -24,21 +29,94 @@ class TagA extends BalancedTag
   *--------------------------------------------------------------------------------
   */

-  private static final String TARGET_ATTR = "TARGET";
+  /** The instance of {@link org.apache.log4j.Logger Logger} for use by this class. */
+  private static Logger logger = Logger.getLogger(TagA.class);
+
+  /*--------------------------------------------------------------------------------
+   * Attributes
+   *--------------------------------------------------------------------------------
+   */
+
+  /** Regular expression program to look for "HREF=" attribute. */
+  private REProgram m_href = null;
+
+  /** Regular expression program to look for "TARGET=" attribute. */
+  private REProgram m_target = null;

  /*--------------------------------------------------------------------------------
   * Constructor
   *--------------------------------------------------------------------------------
   */

+  /**
+   * Creates a new instance of <code>TagA</code>.
+   */
  TagA()
  {
    super("A",false);
+    try
+    { // compile regular expressions
+      RECompiler compiler = new RECompiler();
+      m_href = compiler.compile("href\\s*=");
+      m_target = compiler.compile("target\\s*=");
+
+    } // end try
+    catch (RESyntaxException e)
+    { // shouldn't happen
+      logger.fatal("got RESyntaxException in TagA",e);
+
+    } // end catch

  } // end constructor

  /*--------------------------------------------------------------------------------
-   * External operations
+   * Internal operations
+   *--------------------------------------------------------------------------------
+   */
+
+  /**
+   * Extracts an attribute value from the start of the string.  The attribute value may be enclosed
+   * in quotes, or may simply be a series of nonblank characters delimited by blanks.
+   *
+   * @param s The string to extract the attribute value from.
+   * @return The attribute value extracted.
+   */
+  private static final String extractAttribute(String s)
+  {
+    char[] a = s.toCharArray();
+    int i = 0;
+    while ((i<a.length) && Character.isWhitespace(a[i]))
+      i++;
+    if (i==a.length)
+      return "";
+    int st = i;
+    if ((a[st]=='\'') || (a[st]=='\"'))
+    { // find quoted string boundaries
+      i++;
+      while ((i<a.length) && (a[i]!=a[st]))
+	i++;
+      if (i==a.length)
+	return "";
+      st++;
+
+    } // end if
+    else
+    { // skip over non-whitespace
+      while ((i<a.length) && !(Character.isWhitespace(a[i])))
+	i++;
+      // if i==a.length, just take the "rest"
+
+    } // end else
+
+    if (i==a.length)
+      return s.substring(st);
+    else
+      return s.substring(st,i);
+
+  } // end extractAttribute
+
+  /*--------------------------------------------------------------------------------
+   * Overrides from class SimpleTag
   *--------------------------------------------------------------------------------
   */

@ -47,77 +125,44 @@ class TagA extends BalancedTag
    if (is_closing)
      return contents;  // don't bother checking close tags

-    // Skip over the initial word of the tag data, as that's the tag name.
-    int i = 0;
-    while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i))))
-      i++;
+    // Pull out the HREF= attribute, as that's an "external reference" we need to keep track of.
+    RE m = new RE(m_href,RE.MATCH_CASEINDEPENDENT);
+    if (m.match(contents))
+    { // get the attribute value
+      try
+      { // turn it into a URL and add it as an external reference
+	String s = extractAttribute(contents.substring(m.getParenEnd(0)));
+	if (!(StringUtil.isStringEmpty(s)) && s.toLowerCase().startsWith("http:"))
+	{ // turn it into the URL and add it
+	  URL ref = new URL(s);
+	  context.addExternalReference(ref);

-    // Search for the TARGET= attribute in the tag data.
+	} // end if
+	// else ignore me
+
+      } // end try
+      catch (MalformedURLException e)
+      { // ignore this reference
+      } // end catch
+
+    } // end if
+
+    // Look for the TARGET= attribute.
    boolean target_seen = false;
-    while (i!=contents.length())
-    { // skip over any whitespace between one attribute (or the name) and the next one
-      while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
-	i++;
-      if (i==contents.length())
-	break;  // reached end of string, all done searching
+    m = new RE(m_target,RE.MATCH_CASEINDEPENDENT);
+    if (m.match(contents))
+    { // get the attribute value
+      String s = extractAttribute(contents.substring(m.getParenEnd(0)));
+      if (!(StringUtil.isStringEmpty(s)))
+	target_seen = true;

-      // Mark the start of this attribute name and start skipping over it.
-      int start_name = i;
-      while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i)))
-	     && (contents.charAt(i)!='='))
-	i++;
-
-      // We now know where the attribute name is, see if it's "TARGET".
-      if ((i-start_name)==TARGET_ATTR.length())
-      { // compare the substring to see if it's right
-	String name = contents.substring(start_name,i);
-	if (name.equalsIgnoreCase(TARGET_ATTR))
-	{ // OK, we saw the TARGET tag in the list!  Bail out!
-	  target_seen = true;
-	  break;
-
-	} // end if
-
-      } // end if
-
-      while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
-	i++;  // skip over whitespace at end of name but before the = sign
-
-      if ((i<contents.length()) && (contents.charAt(i)=='='))
-      { // skip over the = sign first
-	i++;
-	while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
-	  i++;  // skip over whitespace after the = sign
-
-	if (i==contents.length())
-	  break;  // reached end of string, all done searching
-
-	if ((contents.charAt(i)=='\'') || (contents.charAt(i)=='\"'))
-	{ // this is a quoted string - swallow it
-	  char quote_char = contents.charAt(i++); // skip the quote part
-	  while ((i!=contents.length()) && (contents.charAt(i)!=quote_char))
-	    i++;  // skip over data between quotes
-	  if (i!=contents.length())
-	    i++;  // skip over last quote
-
-	} // end if
-	else
-	{ // skip over a single word
-	  while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i))))
-	    i++;
-
-	} // end else
-
-      } // end if
-      // else this tag had no value - just go on to the next one
-
-    } // end while
+    } // end if

    if (target_seen)
      return contents;  // no need to alter the string

    String tail = (String)(context.getCheckerAttrValue("ANCHORTAIL"));
-    return new String(contents + " " + tail);
+    return contents + " " + tail;

  } // end rewriteTagContents