added the ability for the HTML checker to keep track of internal and external
references in any post, so we can do trackbacks
This commit is contained in:
parent
7e72ec21d0
commit
4c5c7ffe85
|
@ -9,9 +9,9 @@
|
||||||
*
|
*
|
||||||
* The Original Code is the Venice Web Communities System.
|
* The Original Code is the Venice Web Communities System.
|
||||||
*
|
*
|
||||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
|
||||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||||
* Copyright (C) 2001-02 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
* Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Contributor(s):
|
* Contributor(s):
|
||||||
*/
|
*/
|
||||||
|
@ -40,7 +40,7 @@ public class PostLinkRewriter implements Rewriter
|
||||||
*--------------------------------------------------------------------------------
|
*--------------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private GlobalSite globalsite; // global site containing utilities
|
private final GlobalSite globalsite; // global site containing utilities
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
* Constructor
|
* Constructor
|
||||||
|
@ -60,7 +60,7 @@ public class PostLinkRewriter implements Rewriter
|
||||||
|
|
||||||
private static final String buildPostLink(PostLinkDecoder pl, PostLinkDecoderContext ctxt)
|
private static final String buildPostLink(PostLinkDecoder pl, PostLinkDecoderContext ctxt)
|
||||||
{
|
{
|
||||||
StringBuffer b = new StringBuffer(URI_PREFIX);
|
StringBuffer b = new StringBuffer();
|
||||||
boolean started = false;
|
boolean started = false;
|
||||||
if (pl.getCommunity()==null)
|
if (pl.getCommunity()==null)
|
||||||
b.append(ctxt.getCommunityName());
|
b.append(ctxt.getCommunityName());
|
||||||
|
@ -180,14 +180,18 @@ public class PostLinkRewriter implements Rewriter
|
||||||
|
|
||||||
} // end catch
|
} // end catch
|
||||||
|
|
||||||
|
// build the post link and add it as an internal reference
|
||||||
|
String link = buildPostLink(pl,ctxt);
|
||||||
|
svc.addInternalReference(link);
|
||||||
|
|
||||||
// build the necessary markup and return it
|
// build the necessary markup and return it
|
||||||
StringBuffer open_a = new StringBuffer("<A HREF=\"");
|
StringBuffer open_a = new StringBuffer("<a href=\"");
|
||||||
open_a.append(buildPostLink(pl,ctxt)).append("\"");
|
open_a.append(URI_PREFIX).append(link).append("\"");
|
||||||
String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
|
String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
|
||||||
if (!(StringUtil.isStringEmpty(catenate)))
|
if (!(StringUtil.isStringEmpty(catenate)))
|
||||||
open_a.append(' ').append(catenate);
|
open_a.append(' ').append(catenate);
|
||||||
open_a.append('>');
|
open_a.append('>');
|
||||||
return new MarkupData(open_a.toString(),data,"</A>");
|
return new MarkupData(open_a.toString(),data,"</a>");
|
||||||
|
|
||||||
} // end rewrite
|
} // end rewrite
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
*/
|
*/
|
||||||
package com.silverwrist.venice.htmlcheck;
|
package com.silverwrist.venice.htmlcheck;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
public interface HTMLChecker
|
public interface HTMLChecker
|
||||||
{
|
{
|
||||||
public void append(String str) throws AlreadyFinishedException;
|
public void append(String str) throws AlreadyFinishedException;
|
||||||
|
@ -37,4 +39,8 @@ public interface HTMLChecker
|
||||||
|
|
||||||
public void setContextValue(String name, Object val);
|
public void setContextValue(String name, Object val);
|
||||||
|
|
||||||
|
public Set getExternalReferences() throws NotYetFinishedException;
|
||||||
|
|
||||||
|
public Set getInternalReferences() throws NotYetFinishedException;
|
||||||
|
|
||||||
} // end interface HTMLChecker
|
} // end interface HTMLChecker
|
||||||
|
|
|
@ -9,18 +9,24 @@
|
||||||
*
|
*
|
||||||
* The Original Code is the Venice Web Community System.
|
* The Original Code is the Venice Web Community System.
|
||||||
*
|
*
|
||||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
|
||||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||||
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
* Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Contributor(s):
|
* Contributor(s):
|
||||||
*/
|
*/
|
||||||
package com.silverwrist.venice.htmlcheck;
|
package com.silverwrist.venice.htmlcheck;
|
||||||
|
|
||||||
|
import java.net.URL;
|
||||||
|
|
||||||
public interface RewriterServices
|
public interface RewriterServices
|
||||||
{
|
{
|
||||||
public abstract String getRewriterAttrValue(String name);
|
public String getRewriterAttrValue(String name);
|
||||||
|
|
||||||
public abstract Object getRewriterContextValue(String name);
|
public Object getRewriterContextValue(String name);
|
||||||
|
|
||||||
|
public void addExternalReference(URL ref);
|
||||||
|
|
||||||
|
public void addInternalReference(String ref);
|
||||||
|
|
||||||
} // end interface RewriterServices
|
} // end interface RewriterServices
|
||||||
|
|
|
@ -9,21 +9,33 @@
|
||||||
*
|
*
|
||||||
* The Original Code is the Venice Web Community System.
|
* The Original Code is the Venice Web Community System.
|
||||||
*
|
*
|
||||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
|
||||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||||
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
* Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Contributor(s):
|
* Contributor(s):
|
||||||
*/
|
*/
|
||||||
package com.silverwrist.venice.htmlcheck.filters;
|
package com.silverwrist.venice.htmlcheck.filters;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.apache.regexp.*;
|
||||||
|
import com.silverwrist.util.*;
|
||||||
import com.silverwrist.venice.htmlcheck.Rewriter;
|
import com.silverwrist.venice.htmlcheck.Rewriter;
|
||||||
import com.silverwrist.venice.htmlcheck.RewriterServices;
|
import com.silverwrist.venice.htmlcheck.RewriterServices;
|
||||||
import com.silverwrist.venice.htmlcheck.MarkupData;
|
import com.silverwrist.venice.htmlcheck.MarkupData;
|
||||||
|
|
||||||
public class EmailRewriter implements Rewriter
|
public class EmailRewriter implements Rewriter
|
||||||
{
|
{
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Static data members
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
private static Logger logger = Logger.getLogger(EmailRewriter.class);
|
||||||
|
|
||||||
|
private static REProgram s_match = null;
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
* Constructor
|
* Constructor
|
||||||
*--------------------------------------------------------------------------------
|
*--------------------------------------------------------------------------------
|
||||||
|
@ -46,23 +58,42 @@ public class EmailRewriter implements Rewriter
|
||||||
|
|
||||||
public MarkupData rewrite(String data, RewriterServices svc)
|
public MarkupData rewrite(String data, RewriterServices svc)
|
||||||
{
|
{
|
||||||
int at_pos = data.indexOf('@');
|
RE m = new RE(s_match,RE.MATCH_CASEINDEPENDENT);
|
||||||
if ((at_pos<=0) || (at_pos==(data.length()-1)))
|
if (!(m.match(data)))
|
||||||
return null;
|
return null; // not a valid E-mail address
|
||||||
|
|
||||||
// TODO: put in more validation checking
|
|
||||||
|
|
||||||
// build the <A> tag (the gnarliest part)
|
// build the <A> tag (the gnarliest part)
|
||||||
StringBuffer open_a = new StringBuffer("<A HREF=\"mailto:");
|
StringBuffer open_a = new StringBuffer("<a href=\"mailto:");
|
||||||
open_a.append(data).append("\"");
|
open_a.append(data).append("\"");
|
||||||
String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
|
String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
|
||||||
if ((catenate!=null) && (catenate.length()>0))
|
if (!(StringUtil.isStringEmpty(catenate)))
|
||||||
open_a.append(' ').append(catenate);
|
open_a.append(' ').append(catenate);
|
||||||
open_a.append('>');
|
open_a.append('>');
|
||||||
|
|
||||||
// return the markup data back to the checker
|
// return the markup data back to the checker
|
||||||
return new MarkupData(open_a.toString(),data,"</A>");
|
return new MarkupData(open_a.toString(),data,"</a>");
|
||||||
|
|
||||||
} // end rewrite
|
} // end rewrite
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Static initializer
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
static
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{ // compile our regular expression
|
||||||
|
RECompiler compiler = new RECompiler();
|
||||||
|
s_match = compiler.compile("^[A-Za-z0-9!#$%*+-/=?^_`{|}~.]+@[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$");
|
||||||
|
|
||||||
|
} // end try
|
||||||
|
catch (RESyntaxException e)
|
||||||
|
{ // shouldn't happen
|
||||||
|
logger.fatal("caught RESyntaxException in EmailRewriter initializer",e);
|
||||||
|
|
||||||
|
} // end catch
|
||||||
|
|
||||||
|
} // end static initializer
|
||||||
|
|
||||||
} // end class EmailRewriter
|
} // end class EmailRewriter
|
||||||
|
|
|
@ -9,29 +9,87 @@
|
||||||
*
|
*
|
||||||
* The Original Code is the Venice Web Community System.
|
* The Original Code is the Venice Web Community System.
|
||||||
*
|
*
|
||||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
|
||||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||||
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
* Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Contributor(s):
|
* Contributor(s):
|
||||||
*/
|
*/
|
||||||
package com.silverwrist.venice.htmlcheck.filters;
|
package com.silverwrist.venice.htmlcheck.filters;
|
||||||
|
|
||||||
|
import java.net.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.apache.regexp.*;
|
||||||
|
import com.silverwrist.util.*;
|
||||||
import com.silverwrist.venice.htmlcheck.Rewriter;
|
import com.silverwrist.venice.htmlcheck.Rewriter;
|
||||||
import com.silverwrist.venice.htmlcheck.RewriterServices;
|
import com.silverwrist.venice.htmlcheck.RewriterServices;
|
||||||
import com.silverwrist.venice.htmlcheck.MarkupData;
|
import com.silverwrist.venice.htmlcheck.MarkupData;
|
||||||
|
|
||||||
public class URLRewriter implements Rewriter
|
public class URLRewriter implements Rewriter
|
||||||
{
|
{
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Internal class containing URL elements.
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
private static class URLElement
|
||||||
|
{
|
||||||
|
private REProgram m_match;
|
||||||
|
private String m_prefix;
|
||||||
|
|
||||||
|
URLElement(String pattern, String prefix)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{ // fill the classes
|
||||||
|
m_match = COMPILER.compile(pattern);
|
||||||
|
m_prefix = prefix;
|
||||||
|
|
||||||
|
} // end try
|
||||||
|
catch (RESyntaxException e)
|
||||||
|
{ // shouldn't happen
|
||||||
|
logger.fatal("got RESyntaxException in URLElement",e);
|
||||||
|
|
||||||
|
} // end catch
|
||||||
|
|
||||||
|
} // end constructor
|
||||||
|
|
||||||
|
String eval(String input)
|
||||||
|
{
|
||||||
|
RE m = new RE(m_match,RE.MATCH_CASEINDEPENDENT);
|
||||||
|
if (m.match(input))
|
||||||
|
return m_prefix + input;
|
||||||
|
else
|
||||||
|
return null;
|
||||||
|
|
||||||
|
} // end eval
|
||||||
|
|
||||||
|
} // end class URLElement
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
* Static data members
|
* Static data members
|
||||||
*--------------------------------------------------------------------------------
|
*--------------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private static final String NULLSTRING = "";
|
private static Logger logger = Logger.getLogger(URLRewriter.class);
|
||||||
private static Hashtable prefix_list = null;
|
|
||||||
private static boolean set_up = true;
|
private static final RECompiler COMPILER = new RECompiler();
|
||||||
|
private static final String[] SETUP_DATA =
|
||||||
|
{
|
||||||
|
"^http://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
|
||||||
|
"^ftp://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
|
||||||
|
"^gopher://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
|
||||||
|
"^mailto:[A-Za-z0-9!#$%*+-/=?^_`{|}~.]+@[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$", "",
|
||||||
|
"^news:[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$", "",
|
||||||
|
"^nntp://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
|
||||||
|
"^telnet://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
|
||||||
|
"^tn3270://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+", "",
|
||||||
|
"^www\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*", "http://",
|
||||||
|
"^ftp\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*", "ftp://",
|
||||||
|
"^gopher\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*", "gopher://"
|
||||||
|
};
|
||||||
|
|
||||||
|
private static final List KNOWN_ELEMENTS;
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
* Constructor
|
* Constructor
|
||||||
|
@ -39,40 +97,9 @@ public class URLRewriter implements Rewriter
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public URLRewriter()
|
public URLRewriter()
|
||||||
{
|
{ // do nothing
|
||||||
setUpPrefixes(); // make sure the prefix data is set up
|
|
||||||
|
|
||||||
} // end constructor
|
} // end constructor
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
|
||||||
* Internal functions
|
|
||||||
*--------------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
private static void setUpPrefixes()
|
|
||||||
{
|
|
||||||
if (set_up)
|
|
||||||
{ // allocate the hash table
|
|
||||||
set_up = false;
|
|
||||||
prefix_list = new Hashtable(10,0.9F);
|
|
||||||
|
|
||||||
// fill it with the proper URL prefixes
|
|
||||||
prefix_list.put("http:",NULLSTRING);
|
|
||||||
prefix_list.put("ftp:",NULLSTRING);
|
|
||||||
prefix_list.put("gopher:",NULLSTRING);
|
|
||||||
prefix_list.put("mailto:",NULLSTRING);
|
|
||||||
prefix_list.put("news:",NULLSTRING);
|
|
||||||
prefix_list.put("nntp:",NULLSTRING);
|
|
||||||
prefix_list.put("telnet:",NULLSTRING);
|
|
||||||
prefix_list.put("tn3270:",NULLSTRING);
|
|
||||||
prefix_list.put("www.",new String("http://"));
|
|
||||||
prefix_list.put("ftp.",new String("ftp://"));
|
|
||||||
prefix_list.put("gopher.",new String("gopher://"));
|
|
||||||
|
|
||||||
} // end if
|
|
||||||
|
|
||||||
} // end setUpPrefixes
|
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
* Implementations from interface Rewriter
|
* Implementations from interface Rewriter
|
||||||
*--------------------------------------------------------------------------------
|
*--------------------------------------------------------------------------------
|
||||||
|
@ -86,29 +113,53 @@ public class URLRewriter implements Rewriter
|
||||||
|
|
||||||
public MarkupData rewrite(String data, RewriterServices svc)
|
public MarkupData rewrite(String data, RewriterServices svc)
|
||||||
{
|
{
|
||||||
Enumeration prefixes = prefix_list.keys();
|
for (Iterator it=KNOWN_ELEMENTS.iterator(); it.hasNext(); )
|
||||||
while (prefixes.hasMoreElements())
|
{ // test each element in turn
|
||||||
{ // get the next prefix and compare against the beginning of the string
|
URLElement ue = (URLElement)(it.next());
|
||||||
String pfx = (String)(prefixes.nextElement());
|
String s = ue.eval(data);
|
||||||
if (data.regionMatches(true,0,pfx,0,pfx.length()))
|
if (s!=null)
|
||||||
{ // good enough! build the open <A> tag (the gnarliest part of the markup)
|
{ // got a match! record the external reference and build the open <A> tag
|
||||||
StringBuffer open_a = new StringBuffer("<A HREF=\"");
|
try
|
||||||
String catenate = (String)(prefix_list.get(pfx));
|
{ // create URL and add it
|
||||||
open_a.append(catenate).append(data).append("\"");
|
if (s.toLowerCase().startsWith("http:"))
|
||||||
catenate = svc.getRewriterAttrValue("ANCHORTAIL");
|
svc.addExternalReference(new URL(s));
|
||||||
if ((catenate!=null) && (catenate.length()>0))
|
|
||||||
|
} // end try
|
||||||
|
catch (MalformedURLException e)
|
||||||
|
{ // forget it
|
||||||
|
} // end catch
|
||||||
|
|
||||||
|
StringBuffer open_a = new StringBuffer("<a href=\"");
|
||||||
|
open_a.append(s).append("\"");
|
||||||
|
String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
|
||||||
|
if (!(StringUtil.isStringEmpty(catenate)))
|
||||||
open_a.append(' ').append(catenate);
|
open_a.append(' ').append(catenate);
|
||||||
open_a.append('>');
|
open_a.append('>');
|
||||||
|
|
||||||
// here's how you mark it up!
|
// here's how you mark it up!
|
||||||
return new MarkupData(open_a.toString(),data,"</A>");
|
return new MarkupData(open_a.toString(),data,"</a>");
|
||||||
|
|
||||||
} // end if
|
} // end if
|
||||||
|
|
||||||
} // end while
|
} // end for
|
||||||
|
|
||||||
return null; // sorry, no can do
|
return null; // sorry, no can do
|
||||||
|
|
||||||
} // end rewrite
|
} // end rewrite
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Static initializer
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
static
|
||||||
|
{
|
||||||
|
ArrayList tmp = new ArrayList();
|
||||||
|
for (int i=0; i<SETUP_DATA.length; i+=2)
|
||||||
|
tmp.add(new URLElement(SETUP_DATA[i],SETUP_DATA[i + 1]));
|
||||||
|
tmp.trimToSize();
|
||||||
|
KNOWN_ELEMENTS = Collections.unmodifiableList(tmp);
|
||||||
|
|
||||||
|
} // end static initializer
|
||||||
|
|
||||||
} // end class URLRewriter
|
} // end class URLRewriter
|
||||||
|
|
|
@ -9,20 +9,26 @@
|
||||||
*
|
*
|
||||||
* The Original Code is the Venice Web Community System.
|
* The Original Code is the Venice Web Community System.
|
||||||
*
|
*
|
||||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
|
||||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||||
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
* Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Contributor(s):
|
* Contributor(s):
|
||||||
*/
|
*/
|
||||||
package com.silverwrist.venice.htmlcheck.impl;
|
package com.silverwrist.venice.htmlcheck.impl;
|
||||||
|
|
||||||
|
import java.net.URL;
|
||||||
|
|
||||||
public interface HTMLCheckerBackend
|
public interface HTMLCheckerBackend
|
||||||
{
|
{
|
||||||
public abstract String getCheckerAttrValue(String name);
|
public String getCheckerAttrValue(String name);
|
||||||
|
|
||||||
public abstract void sendTagMessage(String msg);
|
public void sendTagMessage(String msg);
|
||||||
|
|
||||||
public abstract Object getCheckerContextValue(String name);
|
public Object getCheckerContextValue(String name);
|
||||||
|
|
||||||
|
public void addExternalReference(URL ref);
|
||||||
|
|
||||||
|
public void addInternalReference(String ref);
|
||||||
|
|
||||||
} // end interface HTMLCheckerBackend
|
} // end interface HTMLCheckerBackend
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
*/
|
*/
|
||||||
package com.silverwrist.venice.htmlcheck.impl;
|
package com.silverwrist.venice.htmlcheck.impl;
|
||||||
|
|
||||||
|
import java.net.URL;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import org.apache.log4j.*;
|
import org.apache.log4j.*;
|
||||||
import com.silverwrist.venice.htmlcheck.*;
|
import com.silverwrist.venice.htmlcheck.*;
|
||||||
|
@ -139,6 +140,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
||||||
private ArrayList m_tag_rewriters = new ArrayList(); // tag rewriter instances
|
private ArrayList m_tag_rewriters = new ArrayList(); // tag rewriter instances
|
||||||
private ArrayList m_paren_rewriters = new ArrayList(); // paren rewriter instances
|
private ArrayList m_paren_rewriters = new ArrayList(); // paren rewriter instances
|
||||||
private HashMap m_context_data = new HashMap(); // context variables
|
private HashMap m_context_data = new HashMap(); // context variables
|
||||||
|
private HashSet m_external_references = new HashSet(); // list of external references
|
||||||
|
private HashSet m_internal_references = new HashSet(); // list of internal references
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
* Constructor
|
* Constructor
|
||||||
|
@ -161,7 +164,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns <CODE>true</CODE> if this character belongs as part of a word, <CODE>false</CODE> if not.
|
* Returns <code>true</code> if this character belongs as part of a word, <code>false</code> if not.
|
||||||
*
|
*
|
||||||
* @param ch Character to be tested.
|
* @param ch Character to be tested.
|
||||||
* @return See above.
|
* @return See above.
|
||||||
|
@ -210,8 +213,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
||||||
} // end getRunLength
|
} // end getRunLength
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copies the <CODE>Rewriter</CODE> objects from an outside list to an internal list, wrapping
|
* Copies the <code>Rewriter</code> objects from an outside list to an internal list, wrapping
|
||||||
* named rewriters in <CODE>CountingRewriter</CODE> objects as appropriate.
|
* named rewriters in <code>CountingRewriter</code> objects as appropriate.
|
||||||
*
|
*
|
||||||
* @param dest Destination to copy rewriters to.
|
* @param dest Destination to copy rewriters to.
|
||||||
* @param source List to copy rewriters from.
|
* @param source List to copy rewriters from.
|
||||||
|
@ -290,8 +293,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
||||||
*
|
*
|
||||||
* @param ch Character to output.
|
* @param ch Character to output.
|
||||||
* @param filters List of filters to use to attempt to process the character.
|
* @param filters List of filters to use to attempt to process the character.
|
||||||
* @param count_cols <CODE>true</CODE> if the character output adds to the column counter,
|
* @param count_cols <code>true</code> if the character output adds to the column counter,
|
||||||
* <CODE>false</CODE> if not.
|
* <code>false</code> if not.
|
||||||
*/
|
*/
|
||||||
private final void emitChar(char ch, List filters, boolean count_cols)
|
private final void emitChar(char ch, List filters, boolean count_cols)
|
||||||
{
|
{
|
||||||
|
@ -841,7 +844,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
||||||
} // end handleAsHTML
|
} // end handleAsHTML
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns <CODE>true</CODE> if the temporary buffer contains the start of an HTML comment. (The
|
* Returns <code>true</code> if the temporary buffer contains the start of an HTML comment. (The
|
||||||
* leading and trailing angle brackets are assumed.)
|
* leading and trailing angle brackets are assumed.)
|
||||||
*
|
*
|
||||||
* @return See above.
|
* @return See above.
|
||||||
|
@ -853,7 +856,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
||||||
} // end containsHTMLComment
|
} // end containsHTMLComment
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns <CODE>true</CODE> if the temporary buffer contains a complete HTML comment. (The leading
|
* Returns <code>true</code> if the temporary buffer contains a complete HTML comment. (The leading
|
||||||
* and trailing angle brackets are assumed.)
|
* and trailing angle brackets are assumed.)
|
||||||
*
|
*
|
||||||
* @return See above.
|
* @return See above.
|
||||||
|
@ -869,7 +872,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
||||||
} // end containsCompleteHTMLComment
|
} // end containsCompleteHTMLComment
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns <CODE>true</CODE> if the temporary buffer contains an XML construct, i.e. a tag that
|
* Returns <code>true</code> if the temporary buffer contains an XML construct, i.e. a tag that
|
||||||
* contains a ':', and may or may not have a leading '/'. (The leading and trailing angle brackets
|
* contains a ':', and may or may not have a leading '/'. (The leading and trailing angle brackets
|
||||||
* are assumed.)
|
* are assumed.)
|
||||||
*
|
*
|
||||||
|
@ -1381,6 +1384,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
||||||
m_lines = 0;
|
m_lines = 0;
|
||||||
m_paren_level = 0;
|
m_paren_level = 0;
|
||||||
m_output_buffer = null;
|
m_output_buffer = null;
|
||||||
|
m_external_references.clear();
|
||||||
|
m_internal_references.clear();
|
||||||
killState();
|
killState();
|
||||||
|
|
||||||
// Also reset all the counters.
|
// Also reset all the counters.
|
||||||
|
@ -1441,6 +1446,28 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
||||||
|
|
||||||
} // end setContextValue
|
} // end setContextValue
|
||||||
|
|
||||||
|
public Set getExternalReferences() throws NotYetFinishedException
|
||||||
|
{
|
||||||
|
if (!m_finished)
|
||||||
|
throw new NotYetFinishedException();
|
||||||
|
if (m_external_references.isEmpty())
|
||||||
|
return Collections.EMPTY_SET;
|
||||||
|
HashSet rc = new HashSet(m_external_references);
|
||||||
|
return Collections.unmodifiableSet(rc);
|
||||||
|
|
||||||
|
} // end getExternalReferences
|
||||||
|
|
||||||
|
public Set getInternalReferences() throws NotYetFinishedException
|
||||||
|
{
|
||||||
|
if (!m_finished)
|
||||||
|
throw new NotYetFinishedException();
|
||||||
|
if (m_internal_references.isEmpty())
|
||||||
|
return Collections.EMPTY_SET;
|
||||||
|
HashSet rc = new HashSet(m_internal_references);
|
||||||
|
return Collections.unmodifiableSet(rc);
|
||||||
|
|
||||||
|
} // end getInternalReferences
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
* Implementations from interface HTMLCheckerBackend
|
* Implementations from interface HTMLCheckerBackend
|
||||||
*--------------------------------------------------------------------------------
|
*--------------------------------------------------------------------------------
|
||||||
|
@ -1488,6 +1515,18 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
||||||
|
|
||||||
} // end getCheckerContextValue
|
} // end getCheckerContextValue
|
||||||
|
|
||||||
|
public void addExternalReference(URL ref)
|
||||||
|
{
|
||||||
|
m_external_references.add(ref);
|
||||||
|
|
||||||
|
} // end addExternalReference
|
||||||
|
|
||||||
|
public void addInternalReference(String ref)
|
||||||
|
{
|
||||||
|
m_internal_references.add(ref);
|
||||||
|
|
||||||
|
} // end addInternalReference
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
* Implementations from interface RewriterServices
|
* Implementations from interface RewriterServices
|
||||||
*--------------------------------------------------------------------------------
|
*--------------------------------------------------------------------------------
|
||||||
|
@ -1505,4 +1544,6 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
|
||||||
|
|
||||||
} // end getRewriterContextValue
|
} // end getRewriterContextValue
|
||||||
|
|
||||||
|
// addExternalReference is implemented as part of HTMLCheckerBackend
|
||||||
|
|
||||||
} // end class HTMLCheckerImpl
|
} // end class HTMLCheckerImpl
|
||||||
|
|
|
@ -9,14 +9,19 @@
|
||||||
*
|
*
|
||||||
* The Original Code is the Venice Web Community System.
|
* The Original Code is the Venice Web Community System.
|
||||||
*
|
*
|
||||||
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
|
* The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
|
||||||
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
* for Silverwrist Design Studios. Portions created by Eric J. Bowersox are
|
||||||
* Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
* Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Contributor(s):
|
* Contributor(s):
|
||||||
*/
|
*/
|
||||||
package com.silverwrist.venice.htmlcheck.impl;
|
package com.silverwrist.venice.htmlcheck.impl;
|
||||||
|
|
||||||
|
import java.net.*;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.apache.regexp.*;
|
||||||
|
import com.silverwrist.util.*;
|
||||||
|
|
||||||
class TagA extends BalancedTag
|
class TagA extends BalancedTag
|
||||||
{
|
{
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
|
@ -24,21 +29,94 @@ class TagA extends BalancedTag
|
||||||
*--------------------------------------------------------------------------------
|
*--------------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private static final String TARGET_ATTR = "TARGET";
|
/** The instance of {@link org.apache.log4j.Logger Logger} for use by this class. */
|
||||||
|
private static Logger logger = Logger.getLogger(TagA.class);
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Attributes
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Regular expression program to look for "HREF=" attribute. */
|
||||||
|
private REProgram m_href = null;
|
||||||
|
|
||||||
|
/** Regular expression program to look for "TARGET=" attribute. */
|
||||||
|
private REProgram m_target = null;
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
* Constructor
|
* Constructor
|
||||||
*--------------------------------------------------------------------------------
|
*--------------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new instance of <code>TagA</code>.
|
||||||
|
*/
|
||||||
TagA()
|
TagA()
|
||||||
{
|
{
|
||||||
super("A",false);
|
super("A",false);
|
||||||
|
try
|
||||||
|
{ // compile regular expressions
|
||||||
|
RECompiler compiler = new RECompiler();
|
||||||
|
m_href = compiler.compile("href\\s*=");
|
||||||
|
m_target = compiler.compile("target\\s*=");
|
||||||
|
|
||||||
|
} // end try
|
||||||
|
catch (RESyntaxException e)
|
||||||
|
{ // shouldn't happen
|
||||||
|
logger.fatal("got RESyntaxException in TagA",e);
|
||||||
|
|
||||||
|
} // end catch
|
||||||
|
|
||||||
} // end constructor
|
} // end constructor
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------------
|
||||||
* External operations
|
* Internal operations
|
||||||
|
*--------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts an attribute value from the start of the string. The attribute value may be enclosed
|
||||||
|
* in quotes, or may simply be a series of nonblank characters delimited by blanks.
|
||||||
|
*
|
||||||
|
* @param s The string to extract the attribute value from.
|
||||||
|
* @return The attribute value extracted.
|
||||||
|
*/
|
||||||
|
private static final String extractAttribute(String s)
|
||||||
|
{
|
||||||
|
char[] a = s.toCharArray();
|
||||||
|
int i = 0;
|
||||||
|
while ((i<a.length) && Character.isWhitespace(a[i]))
|
||||||
|
i++;
|
||||||
|
if (i==a.length)
|
||||||
|
return "";
|
||||||
|
int st = i;
|
||||||
|
if ((a[st]=='\'') || (a[st]=='\"'))
|
||||||
|
{ // find quoted string boundaries
|
||||||
|
i++;
|
||||||
|
while ((i<a.length) && (a[i]!=a[st]))
|
||||||
|
i++;
|
||||||
|
if (i==a.length)
|
||||||
|
return "";
|
||||||
|
st++;
|
||||||
|
|
||||||
|
} // end if
|
||||||
|
else
|
||||||
|
{ // skip over non-whitespace
|
||||||
|
while ((i<a.length) && !(Character.isWhitespace(a[i])))
|
||||||
|
i++;
|
||||||
|
// if i==a.length, just take the "rest"
|
||||||
|
|
||||||
|
} // end else
|
||||||
|
|
||||||
|
if (i==a.length)
|
||||||
|
return s.substring(st);
|
||||||
|
else
|
||||||
|
return s.substring(st,i);
|
||||||
|
|
||||||
|
} // end extractAttribute
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------------
|
||||||
|
* Overrides from class SimpleTag
|
||||||
*--------------------------------------------------------------------------------
|
*--------------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -47,77 +125,44 @@ class TagA extends BalancedTag
|
||||||
if (is_closing)
|
if (is_closing)
|
||||||
return contents; // don't bother checking close tags
|
return contents; // don't bother checking close tags
|
||||||
|
|
||||||
// Skip over the initial word of the tag data, as that's the tag name.
|
// Pull out the HREF= attribute, as that's an "external reference" we need to keep track of.
|
||||||
int i = 0;
|
RE m = new RE(m_href,RE.MATCH_CASEINDEPENDENT);
|
||||||
while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i))))
|
if (m.match(contents))
|
||||||
i++;
|
{ // get the attribute value
|
||||||
|
try
|
||||||
|
{ // turn it into a URL and add it as an external reference
|
||||||
|
String s = extractAttribute(contents.substring(m.getParenEnd(0)));
|
||||||
|
if (!(StringUtil.isStringEmpty(s)) && s.toLowerCase().startsWith("http:"))
|
||||||
|
{ // turn it into the URL and add it
|
||||||
|
URL ref = new URL(s);
|
||||||
|
context.addExternalReference(ref);
|
||||||
|
|
||||||
// Search for the TARGET= attribute in the tag data.
|
} // end if
|
||||||
|
// else ignore me
|
||||||
|
|
||||||
|
} // end try
|
||||||
|
catch (MalformedURLException e)
|
||||||
|
{ // ignore this reference
|
||||||
|
} // end catch
|
||||||
|
|
||||||
|
} // end if
|
||||||
|
|
||||||
|
// Look for the TARGET= attribute.
|
||||||
boolean target_seen = false;
|
boolean target_seen = false;
|
||||||
while (i!=contents.length())
|
m = new RE(m_target,RE.MATCH_CASEINDEPENDENT);
|
||||||
{ // skip over any whitespace between one attribute (or the name) and the next one
|
if (m.match(contents))
|
||||||
while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
|
{ // get the attribute value
|
||||||
i++;
|
String s = extractAttribute(contents.substring(m.getParenEnd(0)));
|
||||||
if (i==contents.length())
|
if (!(StringUtil.isStringEmpty(s)))
|
||||||
break; // reached end of string, all done searching
|
target_seen = true;
|
||||||
|
|
||||||
// Mark the start of this attribute name and start skipping over it.
|
} // end if
|
||||||
int start_name = i;
|
|
||||||
while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i)))
|
|
||||||
&& (contents.charAt(i)!='='))
|
|
||||||
i++;
|
|
||||||
|
|
||||||
// We now know where the attribute name is, see if it's "TARGET".
|
|
||||||
if ((i-start_name)==TARGET_ATTR.length())
|
|
||||||
{ // compare the substring to see if it's right
|
|
||||||
String name = contents.substring(start_name,i);
|
|
||||||
if (name.equalsIgnoreCase(TARGET_ATTR))
|
|
||||||
{ // OK, we saw the TARGET tag in the list! Bail out!
|
|
||||||
target_seen = true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
} // end if
|
|
||||||
|
|
||||||
} // end if
|
|
||||||
|
|
||||||
while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
|
|
||||||
i++; // skip over whitespace at end of name but before the = sign
|
|
||||||
|
|
||||||
if ((i<contents.length()) && (contents.charAt(i)=='='))
|
|
||||||
{ // skip over the = sign first
|
|
||||||
i++;
|
|
||||||
while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
|
|
||||||
i++; // skip over whitespace after the = sign
|
|
||||||
|
|
||||||
if (i==contents.length())
|
|
||||||
break; // reached end of string, all done searching
|
|
||||||
|
|
||||||
if ((contents.charAt(i)=='\'') || (contents.charAt(i)=='\"'))
|
|
||||||
{ // this is a quoted string - swallow it
|
|
||||||
char quote_char = contents.charAt(i++); // skip the quote part
|
|
||||||
while ((i!=contents.length()) && (contents.charAt(i)!=quote_char))
|
|
||||||
i++; // skip over data between quotes
|
|
||||||
if (i!=contents.length())
|
|
||||||
i++; // skip over last quote
|
|
||||||
|
|
||||||
} // end if
|
|
||||||
else
|
|
||||||
{ // skip over a single word
|
|
||||||
while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i))))
|
|
||||||
i++;
|
|
||||||
|
|
||||||
} // end else
|
|
||||||
|
|
||||||
} // end if
|
|
||||||
// else this tag had no value - just go on to the next one
|
|
||||||
|
|
||||||
} // end while
|
|
||||||
|
|
||||||
if (target_seen)
|
if (target_seen)
|
||||||
return contents; // no need to alter the string
|
return contents; // no need to alter the string
|
||||||
|
|
||||||
String tail = (String)(context.getCheckerAttrValue("ANCHORTAIL"));
|
String tail = (String)(context.getCheckerAttrValue("ANCHORTAIL"));
|
||||||
return new String(contents + " " + tail);
|
return contents + " " + tail;
|
||||||
|
|
||||||
} // end rewriteTagContents
|
} // end rewriteTagContents
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user