added the ability for the HTML checker to keep track of internal and external
references in any post, so we can do trackbacks
This commit is contained in:
parent
7e72ec21d0
commit
4c5c7ffe85
|
@ -9,9 +9,9 @@
|
|||
*
|
||||
* The Original Code is the Venice Web Communities System.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
|
||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||
* Copyright (C) 2001-02 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
* Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*/
|
||||
|
@ -40,7 +40,7 @@ public class PostLinkRewriter implements Rewriter
|
|||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private GlobalSite globalsite; // global site containing utilities
|
||||
private final GlobalSite globalsite; // global site containing utilities
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Constructor
|
||||
|
@ -60,7 +60,7 @@ public class PostLinkRewriter implements Rewriter
|
|||
|
||||
private static final String buildPostLink(PostLinkDecoder pl, PostLinkDecoderContext ctxt)
|
||||
{
|
||||
StringBuffer b = new StringBuffer(URI_PREFIX);
|
||||
StringBuffer b = new StringBuffer();
|
||||
boolean started = false;
|
||||
if (pl.getCommunity()==null)
|
||||
b.append(ctxt.getCommunityName());
|
||||
|
@ -180,14 +180,18 @@ public class PostLinkRewriter implements Rewriter
|
|||
|
||||
} // end catch
|
||||
|
||||
// build the post link and add it as an internal reference
|
||||
String link = buildPostLink(pl,ctxt);
|
||||
svc.addInternalReference(link);
|
||||
|
||||
// build the necessary markup and return it
|
||||
StringBuffer open_a = new StringBuffer("<A HREF=\"");
|
||||
open_a.append(buildPostLink(pl,ctxt)).append("\"");
|
||||
StringBuffer open_a = new StringBuffer("<a href=\"");
|
||||
open_a.append(URI_PREFIX).append(link).append("\"");
|
||||
String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
|
||||
if (!(StringUtil.isStringEmpty(catenate)))
|
||||
open_a.append(' ').append(catenate);
|
||||
open_a.append('>');
|
||||
return new MarkupData(open_a.toString(),data,"</A>");
|
||||
return new MarkupData(open_a.toString(),data,"</a>");
|
||||
|
||||
} // end rewrite
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
*/
|
||||
package com.silverwrist.venice.htmlcheck;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
public interface HTMLChecker
|
||||
{
|
||||
public void append(String str) throws AlreadyFinishedException;
|
||||
|
@ -37,4 +39,8 @@ public interface HTMLChecker
|
|||
|
||||
public void setContextValue(String name, Object val);
|
||||
|
||||
public Set getExternalReferences() throws NotYetFinishedException;
|
||||
|
||||
public Set getInternalReferences() throws NotYetFinishedException;
|
||||
|
||||
} // end interface HTMLChecker
|
||||
|
|
|
@ -9,18 +9,24 @@
|
|||
*
|
||||
* The Original Code is the Venice Web Community System.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
|
||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
* Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*/
|
||||
package com.silverwrist.venice.htmlcheck;
|
||||
|
||||
import java.net.URL;
|
||||
|
||||
public interface RewriterServices
|
||||
{
|
||||
public abstract String getRewriterAttrValue(String name);
|
||||
public String getRewriterAttrValue(String name);
|
||||
|
||||
public abstract Object getRewriterContextValue(String name);
|
||||
public Object getRewriterContextValue(String name);
|
||||
|
||||
public void addExternalReference(URL ref);
|
||||
|
||||
public void addInternalReference(String ref);
|
||||
|
||||
} // end interface RewriterServices
|
||||
|
|
|
@ -9,21 +9,33 @@
|
|||
*
|
||||
* The Original Code is the Venice Web Community System.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
|
||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
* Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*/
|
||||
package com.silverwrist.venice.htmlcheck.filters;
|
||||
|
||||
import java.util.*;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.regexp.*;
|
||||
import com.silverwrist.util.*;
|
||||
import com.silverwrist.venice.htmlcheck.Rewriter;
|
||||
import com.silverwrist.venice.htmlcheck.RewriterServices;
|
||||
import com.silverwrist.venice.htmlcheck.MarkupData;
|
||||
|
||||
public class EmailRewriter implements Rewriter
|
||||
{
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Static data members
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private static Logger logger = Logger.getLogger(EmailRewriter.class);
|
||||
|
||||
private static REProgram s_match = null;
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Constructor
|
||||
*--------------------------------------------------------------------------------
|
||||
|
@ -46,23 +58,42 @@ public class EmailRewriter implements Rewriter
|
|||
|
||||
public MarkupData rewrite(String data, RewriterServices svc)
|
||||
{
|
||||
int at_pos = data.indexOf('@');
|
||||
if ((at_pos<=0) || (at_pos==(data.length()-1)))
|
||||
return null;
|
||||
|
||||
// TODO: put in more validation checking
|
||||
RE m = new RE(s_match,RE.MATCH_CASEINDEPENDENT);
|
||||
if (!(m.match(data)))
|
||||
return null; // not a valid E-mail address
|
||||
|
||||
// build the <A> tag (the gnarliest part)
|
||||
StringBuffer open_a = new StringBuffer("<A HREF=\"mailto:");
|
||||
StringBuffer open_a = new StringBuffer("<a href=\"mailto:");
|
||||
open_a.append(data).append("\"");
|
||||
String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
|
||||
if ((catenate!=null) && (catenate.length()>0))
|
||||
if (!(StringUtil.isStringEmpty(catenate)))
|
||||
open_a.append(' ').append(catenate);
|
||||
open_a.append('>');
|
||||
|
||||
// return the markup data back to the checker
|
||||
return new MarkupData(open_a.toString(),data,"</A>");
|
||||
return new MarkupData(open_a.toString(),data,"</a>");
|
||||
|
||||
} // end rewrite
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Static initializer
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
static
|
||||
{
|
||||
try
|
||||
{ // compile our regular expression
|
||||
RECompiler compiler = new RECompiler();
|
||||
s_match = compiler.compile("^[A-Za-z0-9!#$%*+-/=?^_`{|}~.]+@[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$");
|
||||
|
||||
} // end try
|
||||
catch (RESyntaxException e)
|
||||
{ // shouldn't happen
|
||||
logger.fatal("caught RESyntaxException in EmailRewriter initializer",e);
|
||||
|
||||
} // end catch
|
||||
|
||||
} // end static initializer
|
||||
|
||||
} // end class EmailRewriter
|
||||
|
|
|
@ -9,29 +9,87 @@
|
|||
*
|
||||
* The Original Code is the Venice Web Community System.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
|
||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
* Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*/
|
||||
package com.silverwrist.venice.htmlcheck.filters;
|
||||
|
||||
import java.net.*;
|
||||
import java.util.*;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.regexp.*;
|
||||
import com.silverwrist.util.*;
|
||||
import com.silverwrist.venice.htmlcheck.Rewriter;
|
||||
import com.silverwrist.venice.htmlcheck.RewriterServices;
|
||||
import com.silverwrist.venice.htmlcheck.MarkupData;
|
||||
|
||||
public class URLRewriter implements Rewriter
|
||||
{
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Internal class containing URL elements.
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private static class URLElement
|
||||
{
|
||||
private REProgram m_match;
|
||||
private String m_prefix;
|
||||
|
||||
URLElement(String pattern, String prefix)
|
||||
{
|
||||
try
|
||||
{ // fill the classes
|
||||
m_match = COMPILER.compile(pattern);
|
||||
m_prefix = prefix;
|
||||
|
||||
} // end try
|
||||
catch (RESyntaxException e)
|
||||
{ // shouldn't happen
|
||||
logger.fatal("got RESyntaxException in URLElement",e);
|
||||
|
||||
} // end catch
|
||||
|
||||
} // end constructor
|
||||
|
||||
String eval(String input)
|
||||
{
|
||||
RE m = new RE(m_match,RE.MATCH_CASEINDEPENDENT);
|
||||
if (m.match(input))
|
||||
return m_prefix + input;
|
||||
else
|
||||
return null;
|
||||
|
||||
} // end eval
|
||||
|
||||
} // end class URLElement
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Static data members
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private static final String NULLSTRING = "";
|
||||
private static Hashtable prefix_list = null;
|
||||
private static boolean set_up = true;
|
||||
private static Logger logger = Logger.getLogger(URLRewriter.class);
|
||||
|
||||
private static final RECompiler COMPILER = new RECompiler();
|
||||
private static final String[] SETUP_DATA =
|
||||
{
|
||||
"^http://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
|
||||
"^ftp://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
|
||||
"^gopher://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
|
||||
"^mailto:[A-Za-z0-9!#$%*+-/=?^_`{|}~.]+@[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$", "",
|
||||
"^news:[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$", "",
|
||||
"^nntp://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
|
||||
"^telnet://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
|
||||
"^tn3270://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
|
||||
"^www\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*", "http://",
|
||||
"^ftp\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*", "ftp://",
|
||||
"^gopher\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*", "gopher://"
|
||||
};
|
||||
|
||||
private static final List KNOWN_ELEMENTS;
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Constructor
|
||||
|
@ -39,40 +97,9 @@ public class URLRewriter implements Rewriter
|
|||
*/
|
||||
|
||||
public URLRewriter()
|
||||
{
|
||||
setUpPrefixes(); // make sure the prefix data is set up
|
||||
|
||||
{ // do nothing
|
||||
} // end constructor
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Internal functions
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private static void setUpPrefixes()
|
||||
{
|
||||
if (set_up)
|
||||
{ // allocate the hash table
|
||||
set_up = false;
|
||||
prefix_list = new Hashtable(10,0.9F);
|
||||
|
||||
// fill it with the proper URL prefixes
|
||||
prefix_list.put("http:",NULLSTRING);
|
||||
prefix_list.put("ftp:",NULLSTRING);
|
||||
prefix_list.put("gopher:",NULLSTRING);
|
||||
prefix_list.put("mailto:",NULLSTRING);
|
||||
prefix_list.put("news:",NULLSTRING);
|
||||
prefix_list.put("nntp:",NULLSTRING);
|
||||
prefix_list.put("telnet:",NULLSTRING);
|
||||
prefix_list.put("tn3270:",NULLSTRING);
|
||||
prefix_list.put("www.",new String("http://"));
|
||||
prefix_list.put("ftp.",new String("ftp://"));
|
||||
prefix_list.put("gopher.",new String("gopher://"));
|
||||
|
||||
} // end if
|
||||
|
||||
} // end setUpPrefixes
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Implementations from interface Rewriter
|
||||
*--------------------------------------------------------------------------------
|
||||
|
@ -86,29 +113,53 @@ public class URLRewriter implements Rewriter
|
|||
|
||||
public MarkupData rewrite(String data, RewriterServices svc)
|
||||
{
|
||||
Enumeration prefixes = prefix_list.keys();
|
||||
while (prefixes.hasMoreElements())
|
||||
{ // get the next prefix and compare against the beginning of the string
|
||||
String pfx = (String)(prefixes.nextElement());
|
||||
if (data.regionMatches(true,0,pfx,0,pfx.length()))
|
||||
{ // good enough! build the open <A> tag (the gnarliest part of the markup)
|
||||
StringBuffer open_a = new StringBuffer("<A HREF=\"");
|
||||
String catenate = (String)(prefix_list.get(pfx));
|
||||
open_a.append(catenate).append(data).append("\"");
|
||||
catenate = svc.getRewriterAttrValue("ANCHORTAIL");
|
||||
if ((catenate!=null) && (catenate.length()>0))
|
||||
for (Iterator it=KNOWN_ELEMENTS.iterator(); it.hasNext(); )
|
||||
{ // test each element in turn
|
||||
URLElement ue = (URLElement)(it.next());
|
||||
String s = ue.eval(data);
|
||||
if (s!=null)
|
||||
{ // got a match! record the external reference and build the open <A> tag
|
||||
try
|
||||
{ // create URL and add it
|
||||
if (s.toLowerCase().startsWith("http:"))
|
||||
svc.addExternalReference(new URL(s));
|
||||
|
||||
} // end try
|
||||
catch (MalformedURLException e)
|
||||
{ // forget it
|
||||
} // end catch
|
||||
|
||||
StringBuffer open_a = new StringBuffer("<a href=\"");
|
||||
open_a.append(s).append("\"");
|
||||
String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
|
||||
if (!(StringUtil.isStringEmpty(catenate)))
|
||||
open_a.append(' ').append(catenate);
|
||||
open_a.append('>');
|
||||
|
||||
// here's how you mark it up!
|
||||
return new MarkupData(open_a.toString(),data,"</A>");
|
||||
return new MarkupData(open_a.toString(),data,"</a>");
|
||||
|
||||
} // end if
|
||||
|
||||
} // end while
|
||||
} // end for
|
||||
|
||||
return null; // sorry, no can do
|
||||
|
||||
} // end rewrite
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Static initializer
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
static
|
||||
{
|
||||
ArrayList tmp = new ArrayList();
|
||||
for (int i=0; i<SETUP_DATA.length; i+=2)
|
||||
tmp.add(new URLElement(SETUP_DATA[i],SETUP_DATA[i + 1]));
|
||||
tmp.trimToSize();
|
||||
KNOWN_ELEMENTS = Collections.unmodifiableList(tmp);
|
||||
|
||||
} // end static initializer
|
||||
|
||||
} // end class URLRewriter
|
||||
|
|
|
@ -9,20 +9,26 @@
|
|||
*
|
||||
* The Original Code is the Venice Web Community System.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
|
||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
* Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*/
|
||||
package com.silverwrist.venice.htmlcheck.impl;
|
||||
|
||||
import java.net.URL;
|
||||
|
||||
public interface HTMLCheckerBackend
|
||||
{
|
||||
public abstract String getCheckerAttrValue(String name);
|
||||
public String getCheckerAttrValue(String name);
|
||||
|
||||
public abstract void sendTagMessage(String msg);
|
||||
public void sendTagMessage(String msg);
|
||||
|
||||
public abstract Object getCheckerContextValue(String name);
|
||||
public Object getCheckerContextValue(String name);
|
||||
|
||||
public void addExternalReference(URL ref);
|
||||
|
||||
public void addInternalReference(String ref);
|
||||
|
||||
} // end interface HTMLCheckerBackend
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
*/
|
||||
package com.silverwrist.venice.htmlcheck.impl;
|
||||
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
import org.apache.log4j.*;
|
||||
import com.silverwrist.venice.htmlcheck.*;
|
||||
|
@ -139,6 +140,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
|||
private ArrayList m_tag_rewriters = new ArrayList(); // tag rewriter instances
|
||||
private ArrayList m_paren_rewriters = new ArrayList(); // paren rewriter instances
|
||||
private HashMap m_context_data = new HashMap(); // context variables
|
||||
private HashSet m_external_references = new HashSet(); // list of external references
|
||||
private HashSet m_internal_references = new HashSet(); // list of internal references
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Constructor
|
||||
|
@ -161,7 +164,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
|||
*/
|
||||
|
||||
/**
|
||||
* Returns <CODE>true</CODE> if this character belongs as part of a word, <CODE>false</CODE> if not.
|
||||
* Returns <code>true</code> if this character belongs as part of a word, <code>false</code> if not.
|
||||
*
|
||||
* @param ch Character to be tested.
|
||||
* @return See above.
|
||||
|
@ -210,8 +213,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
|||
} // end getRunLength
|
||||
|
||||
/**
|
||||
* Copies the <CODE>Rewriter</CODE> objects from an outside list to an internal list, wrapping
|
||||
* named rewriters in <CODE>CountingRewriter</CODE> objects as appropriate.
|
||||
* Copies the <code>Rewriter</code> objects from an outside list to an internal list, wrapping
|
||||
* named rewriters in <code>CountingRewriter</code> objects as appropriate.
|
||||
*
|
||||
* @param dest Destination to copy rewriters to.
|
||||
* @param source List to copy rewriters from.
|
||||
|
@ -290,8 +293,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
|||
*
|
||||
* @param ch Character to output.
|
||||
* @param filters List of filters to use to attempt to process the character.
|
||||
* @param count_cols <CODE>true</CODE> if the character output adds to the column counter,
|
||||
* <CODE>false</CODE> if not.
|
||||
* @param count_cols <code>true</code> if the character output adds to the column counter,
|
||||
* <code>false</code> if not.
|
||||
*/
|
||||
private final void emitChar(char ch, List filters, boolean count_cols)
|
||||
{
|
||||
|
@ -841,7 +844,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
|||
} // end handleAsHTML
|
||||
|
||||
/**
|
||||
* Returns <CODE>true</CODE> if the temporary buffer contains the start of an HTML comment. (The
|
||||
* Returns <code>true</code> if the temporary buffer contains the start of an HTML comment. (The
|
||||
* leading and trailing angle brackets are assumed.)
|
||||
*
|
||||
* @return See above.
|
||||
|
@ -853,7 +856,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
|||
} // end containsHTMLComment
|
||||
|
||||
/**
|
||||
* Returns <CODE>true</CODE> if the temporary buffer contains a complete HTML comment. (The leading
|
||||
* Returns <code>true</code> if the temporary buffer contains a complete HTML comment. (The leading
|
||||
* and trailing angle brackets are assumed.)
|
||||
*
|
||||
* @return See above.
|
||||
|
@ -869,7 +872,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
|||
} // end containsCompleteHTMLComment
|
||||
|
||||
/**
|
||||
* Returns <CODE>true</CODE> if the temporary buffer contains an XML construct, i.e. a tag that
|
||||
* Returns <code>true</code> if the temporary buffer contains an XML construct, i.e. a tag that
|
||||
* contains a ':', and may or may not have a leading '/'. (The leading and trailing angle brackets
|
||||
* are assumed.)
|
||||
*
|
||||
|
@ -1381,6 +1384,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
|||
m_lines = 0;
|
||||
m_paren_level = 0;
|
||||
m_output_buffer = null;
|
||||
m_external_references.clear();
|
||||
m_internal_references.clear();
|
||||
killState();
|
||||
|
||||
// Also reset all the counters.
|
||||
|
@ -1441,6 +1446,28 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
|||
|
||||
} // end setContextValue
|
||||
|
||||
public Set getExternalReferences() throws NotYetFinishedException
|
||||
{
|
||||
if (!m_finished)
|
||||
throw new NotYetFinishedException();
|
||||
if (m_external_references.isEmpty())
|
||||
return Collections.EMPTY_SET;
|
||||
HashSet rc = new HashSet(m_external_references);
|
||||
return Collections.unmodifiableSet(rc);
|
||||
|
||||
} // end getExternalReferences
|
||||
|
||||
public Set getInternalReferences() throws NotYetFinishedException
|
||||
{
|
||||
if (!m_finished)
|
||||
throw new NotYetFinishedException();
|
||||
if (m_internal_references.isEmpty())
|
||||
return Collections.EMPTY_SET;
|
||||
HashSet rc = new HashSet(m_internal_references);
|
||||
return Collections.unmodifiableSet(rc);
|
||||
|
||||
} // end getInternalReferences
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Implementations from interface HTMLCheckerBackend
|
||||
*--------------------------------------------------------------------------------
|
||||
|
@ -1488,6 +1515,18 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
|||
|
||||
} // end getCheckerContextValue
|
||||
|
||||
public void addExternalReference(URL ref)
|
||||
{
|
||||
m_external_references.add(ref);
|
||||
|
||||
} // end addExternalReference
|
||||
|
||||
public void addInternalReference(String ref)
|
||||
{
|
||||
m_internal_references.add(ref);
|
||||
|
||||
} // end addInternalReference
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Implementations from interface RewriterServices
|
||||
*--------------------------------------------------------------------------------
|
||||
|
@ -1505,4 +1544,6 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
|||
|
||||
} // end getRewriterContextValue
|
||||
|
||||
// addExternalReference is implemented as part of HTMLCheckerBackend
|
||||
|
||||
} // end class HTMLCheckerImpl
|
||||
|
|
|
@ -9,14 +9,19 @@
|
|||
*
|
||||
* The Original Code is the Venice Web Community System.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
|
||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
* Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*/
|
||||
package com.silverwrist.venice.htmlcheck.impl;
|
||||
|
||||
import java.net.*;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.regexp.*;
|
||||
import com.silverwrist.util.*;
|
||||
|
||||
class TagA extends BalancedTag
|
||||
{
|
||||
/*--------------------------------------------------------------------------------
|
||||
|
@ -24,21 +29,94 @@ class TagA extends BalancedTag
|
|||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
private static final String TARGET_ATTR = "TARGET";
|
||||
/** The instance of {@link org.apache.log4j.Logger Logger} for use by this class. */
|
||||
private static Logger logger = Logger.getLogger(TagA.class);
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Attributes
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/** Regular expression program to look for "HREF=" attribute. */
|
||||
private REProgram m_href = null;
|
||||
|
||||
/** Regular expression program to look for "TARGET=" attribute. */
|
||||
private REProgram m_target = null;
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Constructor
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/**
|
||||
* Creates a new instance of <code>TagA</code>.
|
||||
*/
|
||||
TagA()
|
||||
{
|
||||
super("A",false);
|
||||
try
|
||||
{ // compile regular expressions
|
||||
RECompiler compiler = new RECompiler();
|
||||
m_href = compiler.compile("href\\s*=");
|
||||
m_target = compiler.compile("target\\s*=");
|
||||
|
||||
} // end try
|
||||
catch (RESyntaxException e)
|
||||
{ // shouldn't happen
|
||||
logger.fatal("got RESyntaxException in TagA",e);
|
||||
|
||||
} // end catch
|
||||
|
||||
} // end constructor
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* External operations
|
||||
* Internal operations
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/**
|
||||
* Extracts an attribute value from the start of the string. The attribute value may be enclosed
|
||||
* in quotes, or may simply be a series of nonblank characters delimited by blanks.
|
||||
*
|
||||
* @param s The string to extract the attribute value from.
|
||||
* @return The attribute value extracted.
|
||||
*/
|
||||
private static final String extractAttribute(String s)
|
||||
{
|
||||
char[] a = s.toCharArray();
|
||||
int i = 0;
|
||||
while ((i<a.length) && Character.isWhitespace(a[i]))
|
||||
i++;
|
||||
if (i==a.length)
|
||||
return "";
|
||||
int st = i;
|
||||
if ((a[st]=='\'') || (a[st]=='\"'))
|
||||
{ // find quoted string boundaries
|
||||
i++;
|
||||
while ((i<a.length) && (a[i]!=a[st]))
|
||||
i++;
|
||||
if (i==a.length)
|
||||
return "";
|
||||
st++;
|
||||
|
||||
} // end if
|
||||
else
|
||||
{ // skip over non-whitespace
|
||||
while ((i<a.length) && !(Character.isWhitespace(a[i])))
|
||||
i++;
|
||||
// if i==a.length, just take the "rest"
|
||||
|
||||
} // end else
|
||||
|
||||
if (i==a.length)
|
||||
return s.substring(st);
|
||||
else
|
||||
return s.substring(st,i);
|
||||
|
||||
} // end extractAttribute
|
||||
|
||||
/*--------------------------------------------------------------------------------
|
||||
* Overrides from class SimpleTag
|
||||
*--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
@ -47,77 +125,44 @@ class TagA extends BalancedTag
|
|||
if (is_closing)
|
||||
return contents; // don't bother checking close tags
|
||||
|
||||
// Skip over the initial word of the tag data, as that's the tag name.
|
||||
int i = 0;
|
||||
while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i))))
|
||||
i++;
|
||||
// Pull out the HREF= attribute, as that's an "external reference" we need to keep track of.
|
||||
RE m = new RE(m_href,RE.MATCH_CASEINDEPENDENT);
|
||||
if (m.match(contents))
|
||||
{ // get the attribute value
|
||||
try
|
||||
{ // turn it into a URL and add it as an external reference
|
||||
String s = extractAttribute(contents.substring(m.getParenEnd(0)));
|
||||
if (!(StringUtil.isStringEmpty(s)) && s.toLowerCase().startsWith("http:"))
|
||||
{ // turn it into the URL and add it
|
||||
URL ref = new URL(s);
|
||||
context.addExternalReference(ref);
|
||||
|
||||
// Search for the TARGET= attribute in the tag data.
|
||||
} // end if
|
||||
// else ignore me
|
||||
|
||||
} // end try
|
||||
catch (MalformedURLException e)
|
||||
{ // ignore this reference
|
||||
} // end catch
|
||||
|
||||
} // end if
|
||||
|
||||
// Look for the TARGET= attribute.
|
||||
boolean target_seen = false;
|
||||
while (i!=contents.length())
|
||||
{ // skip over any whitespace between one attribute (or the name) and the next one
|
||||
while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
|
||||
i++;
|
||||
if (i==contents.length())
|
||||
break; // reached end of string, all done searching
|
||||
|
||||
// Mark the start of this attribute name and start skipping over it.
|
||||
int start_name = i;
|
||||
while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i)))
|
||||
&& (contents.charAt(i)!='='))
|
||||
i++;
|
||||
|
||||
// We now know where the attribute name is, see if it's "TARGET".
|
||||
if ((i-start_name)==TARGET_ATTR.length())
|
||||
{ // compare the substring to see if it's right
|
||||
String name = contents.substring(start_name,i);
|
||||
if (name.equalsIgnoreCase(TARGET_ATTR))
|
||||
{ // OK, we saw the TARGET tag in the list! Bail out!
|
||||
m = new RE(m_target,RE.MATCH_CASEINDEPENDENT);
|
||||
if (m.match(contents))
|
||||
{ // get the attribute value
|
||||
String s = extractAttribute(contents.substring(m.getParenEnd(0)));
|
||||
if (!(StringUtil.isStringEmpty(s)))
|
||||
target_seen = true;
|
||||
break;
|
||||
|
||||
} // end if
|
||||
|
||||
} // end if
|
||||
|
||||
while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
|
||||
i++; // skip over whitespace at end of name but before the = sign
|
||||
|
||||
if ((i<contents.length()) && (contents.charAt(i)=='='))
|
||||
{ // skip over the = sign first
|
||||
i++;
|
||||
while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
|
||||
i++; // skip over whitespace after the = sign
|
||||
|
||||
if (i==contents.length())
|
||||
break; // reached end of string, all done searching
|
||||
|
||||
if ((contents.charAt(i)=='\'') || (contents.charAt(i)=='\"'))
|
||||
{ // this is a quoted string - swallow it
|
||||
char quote_char = contents.charAt(i++); // skip the quote part
|
||||
while ((i!=contents.length()) && (contents.charAt(i)!=quote_char))
|
||||
i++; // skip over data between quotes
|
||||
if (i!=contents.length())
|
||||
i++; // skip over last quote
|
||||
|
||||
} // end if
|
||||
else
|
||||
{ // skip over a single word
|
||||
while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i))))
|
||||
i++;
|
||||
|
||||
} // end else
|
||||
|
||||
} // end if
|
||||
// else this tag had no value - just go on to the next one
|
||||
|
||||
} // end while
|
||||
|
||||
if (target_seen)
|
||||
return contents; // no need to alter the string
|
||||
|
||||
String tail = (String)(context.getCheckerAttrValue("ANCHORTAIL"));
|
||||
return new String(contents + " " + tail);
|
||||
return contents + " " + tail;
|
||||
|
||||
} // end rewriteTagContents
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user