venice-main-classic/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerImpl.java

/*
 * The contents of this file are subject to the Mozilla Public License Version 1.1
 * (the "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at <http://www.mozilla.org/MPL/>.
 *
 * Software distributed under the License is distributed on an "AS IS" basis, WITHOUT
 * WARRANTY OF ANY KIND, either express or implied. See the License for the specific
 * language governing rights and limitations under the License.
 *
 * The Original Code is the Venice Web Community System.
 *
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
 * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 *
 * Contributor(s):
 */
package com.silverwrist.venice.htmlcheck.impl;

import java.util.*;
import com.silverwrist.venice.htmlcheck.*;

class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServices
{
  /*--------------------------------------------------------------------------------
   * Wrapper for Rewriters to implement counting where necessary
   *--------------------------------------------------------------------------------
   */

  static class CountingRewriter implements Rewriter
  {
    private Rewriter inner;
    private int count = 0;

    public CountingRewriter(Rewriter inner)
    {
      this.inner = inner;

    } // end constructor

    public String getName()
    {
      return inner.getName();

    } // end getName

    public MarkupData rewrite(String data, RewriterServices svc)
    {
      MarkupData rc = inner.rewrite(data,svc);
      if (rc!=null)
	count++;
      return rc;

    } // end rewrite

    public int getCount()
    {
      return count;

    } // end getCount

    public void reset()
    {
      count = 0;

    } // end reset

  } // end class CountingRewriter

  /*--------------------------------------------------------------------------------
   * State machine constants
   *--------------------------------------------------------------------------------
   */

  private static final short ST_WHITESPACE = 0;
  private static final short ST_CHARS = 1;
  private static final short ST_LEFTANGLE = 2;
  private static final short ST_TAG = 3;
  private static final short ST_PAREN = 4;
  private static final short ST_TAGQUOTE = 5;

  /*--------------------------------------------------------------------------------
   * Attributes
   *--------------------------------------------------------------------------------
   */

  private HTMLCheckerConfigImpl config;             // the configuration we're working from
  private boolean started = false;                  // have we started parsing?
  private boolean finished = false;                 // have we finished parsing?
  private short state = ST_WHITESPACE;              // current parser state
  private char quote_char;                          // quote character to match
  private int paren_level = 0;                      // parenthesization level
  private short columns = 0;                        // current number of columns
  private int lines = 0;                            // current number of lines
  private int nobreak_count = 0;                    // "no-break" count
  private boolean trigger_WBR = false;              // word break trigger
  private StringBuffer output_buffer = null;        // output buffer for characters
  private StringBuffer temp_buffer = null;          // temporary buffer used within one state
  private Vector tag_stack = null;                  // stack of tags that have been opened
  private Hashtable counters = new Hashtable();     // the CountingRewriter instances
  private Vector string_rewriters = new Vector();   // string rewriter instances
  private Vector word_rewriters = new Vector();     // word rewriter instances
  private Vector tag_rewriters = new Vector();      // tag rewriter instances
  private Vector paren_rewriters = new Vector();    // paren rewriter instances
  private Hashtable context_data = new Hashtable(); // context variables

  /*--------------------------------------------------------------------------------
   * Constructor
   *--------------------------------------------------------------------------------
   */

  HTMLCheckerImpl(HTMLCheckerConfigImpl config)
  {
    TagRepository.init();
    this.config = config;
    copyRewriters(string_rewriters,config.getStringRewriters());
    copyRewriters(word_rewriters,config.getWordRewriters());
    copyRewriters(tag_rewriters,config.getTagRewriters());
    copyRewriters(paren_rewriters,config.getParenRewriters());

  } // end constructor

  /*--------------------------------------------------------------------------------
   * Internal functions
   *--------------------------------------------------------------------------------
   */

  private static final boolean isWordChar(char ch)
  {
    return (Character.isUpperCase(ch) || Character.isLowerCase(ch) || (ch=='-'));

  } // end isWordChar

  private static final int getRunLength(StringBuffer buf)
  {
    boolean word_char = isWordChar(buf.charAt(0));
    int l = 1;
    while (l<buf.length())
    { // see if there's a break from word characters to non-word characters
      if (isWordChar(buf.charAt(l))!=word_char)
	break;
      l++;

    } // end while

    return l;

  } // end getStringRunLength

  private void copyRewriters(Vector dest, List source)
  {
    Iterator it = source.iterator();
    while (it.hasNext())
    { // get each rewriter, and wrap it if it has a name
      Rewriter r = (Rewriter)(it.next());
      String name = r.getName();
      if (r!=null)
      { // wrap it in a CountingRewriter and hash it...
	CountingRewriter cr = new CountingRewriter(r);
	counters.put(name,cr);
	r = cr;

      } // end if

      dest.add(r);

    } // end while

    dest.trimToSize();

  } // end copyRewriters

  private MarkupData attemptRewrite(List rewriters, String data)
  {
    Iterator it = rewriters.iterator();
    MarkupData rc = null;
    while ((rc==null) && it.hasNext())
    { // look at each rewriter in turn and try seeing if it will mark this text up
      Rewriter r = (Rewriter)(it.next());
      rc = r.rewrite(data,this);

    } // end while

    return rc;

  } // end attemptRewrite

  private void initState()
  {
    output_buffer = new StringBuffer(1024);
    temp_buffer = new StringBuffer(64);
    tag_stack = new Vector();

  } // end initState

  private void killState()
  {
    temp_buffer = null;
    if (tag_stack!=null)
      tag_stack.removeAllElements();
    tag_stack = null;

  } // end killState

  private void emitChar(char ch, List filters, boolean count_cols)
  {
    boolean handled = false;
    if (filters.size()>0)
    { // look through all the output filters to see what we can do
      Iterator it = filters.iterator();
      while (!handled && it.hasNext())
      { // look for an output filter that matches this character
	OutputFilter of = (OutputFilter)(it.next());
	handled = of.tryOutputCharacter(output_buffer,ch);

      } // end while

    } // end if

    if (!handled)  // output the character
      output_buffer.append(ch);
    if (count_cols && (config.getWordWrapLength()>0))
      columns++;  // adjust column indicator

  } // end emitChar

  private void emitString(String str, List filters, boolean count_cols)
  {
    boolean real_count_cols = count_cols && (config.getWordWrapLength()>0);

    if (filters.size()==0)
    { // if there are no filters, just append the entire string directly
      output_buffer.append(str);
      if (real_count_cols)
	columns += (short)(str.length());
      return;

    } // end if

    String temp = str;
    while ((temp!=null) && (temp.length()>0))
    { // We output as much of the string as we possibly can at once.  For starters,
      // assume we're going to output the whole thing.
      int output_len = temp.length();

      // Now look at each of the output filters to see if we should try outputting
      // a lesser amount (i.e., does the string contain a "stopper" that one of the
      // output filters would like to mogrify?)
      Iterator it = filters.iterator();
      OutputFilter stopper = null;
      while (it.hasNext() && (output_len>0))
      { // look to find the length of characters that doesn't match this filter
	OutputFilter of = (OutputFilter)(it.next());
	int lnm = of.lengthNoMatch(temp);
	if ((lnm>=0) && (lnm<output_len))
	{ // we've found a new stopper - record the length and the filter
	  output_len = lnm;
	  stopper = of;

	} // end if

      } // end while (looking through filters)

      if (output_len>0)
      { // At least this many unaltered characters can be output, so copy them.
	output_buffer.append(temp.substring(0,output_len));
	if (real_count_cols)
	  columns += (short)output_len;

      } // end if

      if (stopper!=null)
      { // one of the output filters stopped us, so invoke it
	char tmpch = temp.charAt(output_len++);
	if (!(stopper.tryOutputCharacter(output_buffer,tmpch)))
	  output_buffer.append(tmpch);
	if (real_count_cols)
	  columns++;

      } // end if

      // Chop the string down the middle and go around again.
      if (output_len==temp.length())
	temp = null;
      else if (output_len>0)
	temp = temp.substring(output_len);

    } // end while (still data left to append)

  } // end emitString

  private void emitLineBreak()
  {
    emitString("\r\n",config.getRawOutputFilters(),false);
    if (config.getWordWrapLength()>0)
      columns = 0;
    lines++;

  } // end emitLineBreak

  private void emitPossibleLineBreak()
  {
    if ((config.getWordWrapLength()>0) && (nobreak_count<=0) && (columns>=config.getWordWrapLength()))
      emitLineBreak();

  } // end emitPossibleLineBreak

  private void ensureSpaceOnLine(int nchars)
  {
    if ((config.getWordWrapLength()>0) && (nobreak_count<=0))
    { // line break might be required here
      int remain_space = (int)(config.getWordWrapLength() - columns);
      if (remain_space<nchars)
	emitLineBreak();

    } // end if

  } // end ensureSpaceOnLine

  private void emitMarkupData(MarkupData md)
  {
    ensureSpaceOnLine(md.getText().length());
    emitString(md.getBeginMarkup(),config.getRawOutputFilters(),false);
    emitString(md.getText(),config.getOutputFilters(),true);
    emitString(md.getEndMarkup(),config.getRawOutputFilters(),false);

  } // end emitMarkupData

  private void emitMarkupData(MarkupData md, char prefix, char suffix)
  {
    ensureSpaceOnLine(md.getText().length() + 2);
    emitChar(prefix,config.getOutputFilters(),true);
    emitString(md.getBeginMarkup(),config.getRawOutputFilters(),false);
    emitString(md.getText(),config.getOutputFilters(),true);
    emitString(md.getEndMarkup(),config.getRawOutputFilters(),false);
    emitChar(suffix,config.getOutputFilters(),true);

  } // end emitMarkupData

  private void doFlushWhitespace()
  {
    while (temp_buffer.length()>0)
    { // calculate where the next line break is
      int line_break = temp_buffer.toString().indexOf('\n');
      int output_len = line_break;
      if (output_len<0)
	output_len = temp_buffer.length();

      if ((config.getWordWrapLength()>0) && (nobreak_count<=0))
      { // adjust output if necessary for word wrapping
	int remain_space = (int)(config.getWordWrapLength() - columns);
	if (remain_space<output_len)
	  output_len = remain_space;

      } // end if

      if (output_len>0)
	emitString(temp_buffer.substring(0,output_len),config.getOutputFilters(),true);

      if (line_break>=0)
      { // there's a line break present - emit the line break
	emitLineBreak();  // output the line break character
	if (++line_break<temp_buffer.length())
	  temp_buffer.delete(0,line_break);
	else
	  temp_buffer.setLength(0);

      } // end if
      else // no more line breaks on this line - clear out the buffer
	temp_buffer.setLength(0);

    } // end while (still data in temp buffer)

  } // end doFlushWhitespace

  private void emitFromStartOfTempBuffer(int nchars)
  {
    if (nchars<=0)
      return;

    if ((config.getWordWrapLength()>0) && (nobreak_count<=0))
    { // we can output the line break anywhere in the subsequence...
      while (nchars>0)
      { // figure out how many characters we can output WITHOUT causing a line break
	int curlen = nchars;
	int remaining_space = (int)(config.getWordWrapLength() - columns);
	if (curlen>remaining_space)
	  curlen = remaining_space;

	// output those characters
	emitString(temp_buffer.substring(0,curlen),config.getOutputFilters(),true);
	temp_buffer.delete(0,curlen);
	nchars -= curlen;

	if (columns==config.getWordWrapLength())
	  emitLineBreak();  // and line break us to get to the next line

      } // end while

    } // end if
    else
    { // just output the run of characters straight
      emitString(temp_buffer.substring(0,nchars),config.getOutputFilters(),true);
      temp_buffer.delete(0,nchars);

    } // end else

  } // end emitFromStartOfTempBuffer

  private void doFlushString()
  {
    MarkupData md = attemptRewrite(string_rewriters,temp_buffer.toString());
    if (md!=null)
    { // we've got something marked up!  output it...
      emitMarkupData(md);
      temp_buffer.setLength(0);
      return;

    } // end if

    while (temp_buffer.length()>0)
    { // find the length of the initial string of word or non-word characters
      int sublen = getRunLength(temp_buffer);

      if (isWordChar(temp_buffer.charAt(0)))
      { // we need to check the word...but first, we must eliminate leading hyphens
	int hyph_count = 0;
	while ((hyph_count<sublen) && (temp_buffer.charAt(hyph_count)=='-'))
	  hyph_count++;
	emitFromStartOfTempBuffer(hyph_count);
	sublen -= hyph_count;

	// now determine how many hyphens there are at the end of the word...
	int word_len = sublen;
	hyph_count = 0;
	while ((word_len>0) && (temp_buffer.charAt(word_len-1)=='-'))
	{ // decrement word length, increment hyphen count
	  hyph_count++;
	  word_len--;

	} // end while

	if (word_len>0)
	{ // extract the word from the start of the buffer
	  String word = temp_buffer.substring(0,word_len);
	  temp_buffer.delete(0,word_len);

	  // try to rewrite this word...
	  md = attemptRewrite(word_rewriters,word);
	  if (md!=null)
	    emitMarkupData(md);
	  else
	  { // just output the word normally
	    ensureSpaceOnLine(word.length());
	    emitString(word,config.getOutputFilters(),true);

	  } // end else

	} // end if

	// now emit the rest of the hyphens
	emitFromStartOfTempBuffer(hyph_count);

      } // end if
      else // just emit this many characters, line-breaking where required
	emitFromStartOfTempBuffer(sublen);

    } // end while

  } // end doFlushString

  private boolean handleAsHTML()
  {
    trigger_WBR = false;  // initialize

    // Figure out the place in the buffer where the command word starts.
    int start_cmd = 0;
    boolean closing_tag = false;
    if ((start_cmd<temp_buffer.length()) && (temp_buffer.charAt(start_cmd)=='/'))
    { // this is a closing tag - move the command start pointer...
      start_cmd++;
      closing_tag = true;

    } // end if

    // Now figure out where it ends.
    int end_cmd = start_cmd;
    while ((end_cmd<temp_buffer.length()) && !(Character.isWhitespace(temp_buffer.charAt(end_cmd))))
      end_cmd++;

    if ((end_cmd==start_cmd) || ((end_cmd-start_cmd)>TagRepository.getMaxTagLength()))
      return false;  // the command word is empty or is just too long to be an HTML tag

    // Look up the tag name to get a tag index from the repository.
    int tag_index = TagRepository.tagNameToIndex(temp_buffer.substring(start_cmd,end_cmd));
    if (tag_index<0)
      return false;  // not a known HTML tag

    // Look up the tag object that corresponds to the tag index.
    SimpleTag tagobj = TagRepository.tagIndexToObject(tag_index);
    if (closing_tag && !(tagobj.allowClose()))
      return false;  // this is a closing tag, and this tag doesn't permit the "close" form

    // Get the HTML tag set index for this tag, and see if we allow that set.
    int tag_set_id = TagRepository.tagIndexToSet(tag_index);
    if (!(config.isTagSetAllowed(tag_set_id)) && !(config.getDiscardHTMLTags()))
      return false;  // we're not allowing it, we're not discarding it, so punt!

    boolean valid = false;
    if (!(config.getDiscardHTMLTags()) && tagobj.balanceTags())
    { // this tag needs to be balanced - here is where we manipulate the stack
      if (closing_tag)
      { // hunt through the list to find the most recently-opened tag of this type
	int i = tag_stack.size() - 1;
	while (i>=0)
	{ // look through the stack...
	  Integer foo = (Integer)(tag_stack.get(i));
	  if (foo.intValue()==tag_index)
	  { // found it - remove it from the tag stack
	    tag_stack.remove(i);
	    valid = true;
	    break;

	  } // end if

	} // end while

      } // end if
      else
      { // push a new opening tag!
	tag_stack.add(new Integer(tag_index));
	valid = true;

      } // end else

    } // end if
    // else tag doesn't need to be auto-balanced, or is being discarded

    if (!valid && !(config.getDiscardHTMLTags()))
      return false;  // not validated by the stack code, and not being discarded

    // Give the tag object one last chance to dictate what we do with the tag.
    String real_tag_data = tagobj.rewriteTagContents(temp_buffer.toString(),closing_tag,this);
    if ((real_tag_data==null) || config.getDiscardHTMLTags())
      return true;  // tag is being erased by rewriter, or is being discarded anyway

    // Emit the tag to the output.
    emitChar('<',config.getRawOutputFilters(),false);
    emitString(real_tag_data,config.getRawOutputFilters(),false);
    emitChar('>',config.getRawOutputFilters(),false);

    // Determine whether this tag causes a "logical line break."
    boolean logical_line_break = false;
    if (trigger_WBR && !closing_tag && (nobreak_count>0))
      logical_line_break = true;
    else
      logical_line_break = tagobj.causeLineBreak(closing_tag);
    if (logical_line_break)
      columns = 0;

    return true;  // handled!

  } // end handleAsHTML()

  private void finishTag()
  {
    if (handleAsHTML())
    { // the tag has been handled as an HTML tag - bail out immediately
      temp_buffer.setLength(0);
      state = ST_WHITESPACE;
      return;

    } // end if

    // now try to handle it using a tag rewriter
    MarkupData md = attemptRewrite(tag_rewriters,temp_buffer.toString());
    if (md!=null)
    { // we've got something marked up!  output it...
      emitMarkupData(md,'<','>');
      temp_buffer.setLength(0);
      state = ST_WHITESPACE;
      return;

    } // end if

    // This tag has been rejected!  We need to process it normally, as character data.
    String rejection = temp_buffer.toString();
    temp_buffer.setLength(0);
    temp_buffer.append('<');
    state = ST_CHARS;
    if (rejection.length()>0)
      parse(rejection);   // just run it through the parser, now that we've fixed up the state
    parse(">");

  } // end finishTag

  private void finishParen()
  {
    // Try to handle the paren element using a paren rewriter.
    MarkupData md = attemptRewrite(paren_rewriters,temp_buffer.toString());
    if (md!=null)
    { // we've got something marked up!  output it...
      emitMarkupData(md,'(',')');
      temp_buffer.setLength(0);
      state = ST_WHITESPACE;
      paren_level = 0;
      return;

    } // end if

    // This tag has been rejected!  We need to process it normally, as character data.
    String rejection = temp_buffer.toString();
    temp_buffer.setLength(0);
    temp_buffer.append('(');
    state = ST_CHARS;
    paren_level = 0;
    if (rejection.length()>0)
      parse(rejection);   // just run it through the parser, now that we've fixed up the state
    parse(")");

  } // end finishParen

  private void parse(String str)
  {
    int i = 0;
    while (i<str.length())
    { // get the character from the string
      char ch = str.charAt(i);

      // now process differently based on the current parser state
      switch (state)
      {
	case ST_WHITESPACE:
	  { // Whitespace handling - look at the character
	    switch (ch)
	    {
	      case ' ':     // append spaces, tabs, and newlines verbatim to the temp buffer
	      case '\t':
	      case '\n':
		temp_buffer.append(ch);
		i++;
		break;

	      case '\r':    // compress 1 or more \r's followe by optional \n to a single \n
		if (   (i==(str.length()-1))
		    || ((str.charAt(i+1)!='\r') && (str.charAt(i+1)!='\n')))
		  temp_buffer.append('\n');
		i++;
		break;

	      case '<':
		doFlushWhitespace();  // flush the whitespace we've already got
		if (config.getProcessAngles())
		  state = ST_LEFTANGLE;
		else
		{ // just process as an ordinary character
		  state = ST_CHARS;
		  temp_buffer.append(ch);

		} // end else
		i++;
		break;

	      case '(':
		doFlushWhitespace();  // flush the whitespace we've already got
		if (config.getProcessParens())
		  state = ST_PAREN;
		else
		{ // just process as an ordinary character
		  state = ST_CHARS;
		  temp_buffer.append(ch);

		} // end else
		i++;
		break;

	      case '\\':  // backslash processing is complext - shift to ST_CHARS state to handle it
		doFlushWhitespace();
		state = ST_CHARS;
		break;

	      default:
		doFlushWhitespace();  // flush the whitespace we've already got
		state = ST_CHARS;
		temp_buffer.append(ch);
		i++;
		break;

	    } // end switch

	  } // end case
	  break;

	case ST_CHARS:
	  { // Character data handling - look at the character
	    switch (ch)
	    {
	      case ' ':   // whitespace - drop back to whitespace mode
	      case '\t':
	      case '\n':
		doFlushString();
		state = ST_WHITESPACE;
		temp_buffer.append(ch);
		i++;
		break;

	      case '\r':  // handle \r processing in ST_WHITESPACE 'cause it's complicated
		doFlushString();
		state = ST_WHITESPACE;
		break;

	      case '<':   // left angle bracket - may be a start-of-tag
		if (config.getProcessAngles())
		{ // this is a tag start - go to LEFTANGLE state
		  doFlushString();
		  state = ST_LEFTANGLE;

		} // end if
		else  // just handle it normally
		  temp_buffer.append(ch);
		i++;
		break;

	      case '(':   // left parenthesis - may be a start-of-paren
		if (config.getProcessParens())
		{ // we're going into Parens mode...
		  doFlushString();
		  state = ST_PAREN;

		} // end if
		else  // just handle it normally
		  temp_buffer.append(ch);
		i++;
		break;

	      case '\\':
		if (i<(str.length()-1))
		{ // look at the character following the backslash
		  ch = str.charAt(++i);
		  if (((ch=='(') && config.getProcessParens()) || ((ch=='<') && config.getProcessAngles()))
		  { // append the escaped character, omitting the backslash
		    temp_buffer.append(ch);
		    i++;

		  } // end if
		  else // append the backslash and hit the new character
		    temp_buffer.append('\\');

		} // end if
		else
		{ // just append the backslash normally
		  temp_buffer.append('\\');
		  i++;

		} // end else
		break;

	      default:  // just append the next non-white character
		temp_buffer.append(ch);
		i++;
		break;

	    } // end switch

	  } // end case
	  break;

	case ST_LEFTANGLE:
	  { // Left Angle processing - this isn't very difficult
	    switch (ch)
	    {
	      case ' ':
	      case '\t':
	      case '\r':
	      case '\n':  // output the < and click back to whitespace mode
		emitChar('<',config.getOutputFilters(),true);
		state = ST_WHITESPACE;
		break;

	      case '<':   // output the < and stay in this mode
		emitChar('<',config.getOutputFilters(),true);
		i++;
		break;

	      default:    // click over to TAG mode
		state = ST_TAG;
		temp_buffer.append(ch);
		i++;
		break;

	    } // end switch

	  } // end case
	  break;

	case ST_TAG:
	  { // inside a tag - process the character here
	    switch (ch)
	    {
	      case '>':  // end tag
		finishTag();  // this changes the state, and maybe calls parse() recursively
		i++;
		break;

	      case '\'':  // go into "quote string" mode inside tag
	      case '\"':
		temp_buffer.append(ch);
		state = ST_TAGQUOTE;
		quote_char = ch;
		i++;
		break;

	      default:   // just append more data to the temp buffer
		temp_buffer.append(ch);
		i++;
		break;

	    } // end switch

	  } // end case
	  break;

	case ST_PAREN:
	  { // inside parentheses - try to build something up
	    switch (ch)
	    {
	      case '(':  // append the open parenthesis and kick it up a notch!
		temp_buffer.append(ch);
		paren_level++;
		i++;
		break;

	      case ')':
		if (paren_level==0)
		  finishParen();  // will change the parser state
		else
		{ // append the close parenthesis and kick it DOWN a notch
		  temp_buffer.append(ch);
		  paren_level--;

		} // end else
		i++;
		break;

	      default:
		temp_buffer.append(ch);
		i++;
		break;

	    } // end switch

	  } // end case
	  break;

	case ST_TAGQUOTE:
	  temp_buffer.append(ch);
	  if (ch==quote_char) // close quote seen - go back to ST_TAG state
	    state = ST_TAG;
	  i++;
	  break;

	default:
	  throw new IllegalStateException("invalid parser state value");

      } // end switch

    } // end while (looking through string)

  } // end parse

  /*--------------------------------------------------------------------------------
   * Implementations from interface HTMLChecker
   *--------------------------------------------------------------------------------
   */

  public void append(String str) throws AlreadyFinishedException
  {
    if (finished)
      throw new AlreadyFinishedException();
    if (!started)
    { // initialize the parser state
      initState();
      started = true;

    } // end if

    parse(str);  // parse things

  } // end append

  public void finish() throws AlreadyFinishedException
  {
    if (finished)
      throw new AlreadyFinishedException();
    if (!started)
    { // set up the initial parser state (so we don't kill ourselves later)
      initState();
      started = true;

    } // end if

    // This is the "end parse" loop, in which we resolve any funny state the parser has
    // found itself in and clear out the internal buffers.
    boolean running = false;
    do
    { // what we do depends on the parser state...
      switch (state)
      {
	case ST_WHITESPACE:
	  break;    // discard any whitespace at the end of output

	case ST_CHARS:
	  doFlushString();  // flush out the temporary buffer
	  break;

	case ST_LEFTANGLE:  // just emit a '<' character
	  emitPossibleLineBreak();
	  emitChar('<',config.getOutputFilters(),true);
	  break;

	case ST_TAG:
	case ST_TAGQUOTE:
	  { // we won't finish this tag, so it's automagically rejected
	    String rejection = temp_buffer.toString();
	    temp_buffer.setLength(0);
	    temp_buffer.append('<');
	    state = ST_CHARS;

	    // now parse the tag contents again
	    if (rejection.length()>0)
	      parse(rejection);

	    running = true;  // go back around for another try

	  } // end case
	  break;

	case ST_PAREN:
	  { // we won't finish this paren tag, so it's automagically rejected
	    String rejection = temp_buffer.toString();
	    temp_buffer.setLength(0);
	    temp_buffer.append('(');
	    state = ST_CHARS;
	    paren_level = 0;

	    // now parse the parenthesis contents again
	    if (rejection.length()>0)
	      parse(rejection);

	    running = true;  // go back around for another try

	  } // end case
	  break;

      } // end switch

    } while (running);  // end do

    // Now close all the HTML tags that were left open.
    for (int i=(tag_stack.size()-1); i>=0; i--)
    { // get each element in the tag stack and append the appropriate closing tag
      Integer foo = (Integer)(tag_stack.get(i));
      SimpleTag tagobj = TagRepository.tagIndexToObject(foo.intValue());
      output_buffer.append(tagobj.makeClosingTag());

    } // end for

    // deallocate some excess crap and mark the object as finished
    killState();
    lines++;  // there's one extra line at the end
    finished = true;

  } // end finish

  public void reset()
  {
    started = false;
    finished = false;
    trigger_WBR = false;
    state = ST_WHITESPACE;
    quote_char = '\0';
    columns = 0;
    lines = 0;
    paren_level = 0;
    output_buffer = null;
    killState();

    // Also reset all the counters.
    Iterator it = counters.values().iterator();
    while (it.hasNext())
    { // reset each counter in turn
      CountingRewriter cr = (CountingRewriter)(it.next());
      cr.reset();

    } // end while

  } // end reset

  public String getValue() throws NotYetFinishedException
  {
    if (!finished)
      throw new NotYetFinishedException();
    return output_buffer.toString();

  } // end getValue

  public int getLength() throws NotYetFinishedException
  {
    if (!finished)
      throw new NotYetFinishedException();
    return output_buffer.length();

  } // end getLength

  public int getLines() throws NotYetFinishedException
  {
    if (!finished)
      throw new NotYetFinishedException();
    return lines;

  } // end getLines

  public int getCounter(String name) throws NotYetFinishedException
  {
    if (!finished)
      throw new NotYetFinishedException();
    CountingRewriter cr = (CountingRewriter)(counters.get(name));
    if (cr==null)
      return 0;
    else
      return cr.getCount();

  } // end getCounter

  public Object getContextValue(String name)
  {
    return context_data.get(name);

  } // end getContextValue

  public void setContextValue(String name, Object val)
  {
    context_data.put(name,val);

  } // end setContextValue

  /*--------------------------------------------------------------------------------
   * Implementations from interface HTMLCheckerBackend
   *--------------------------------------------------------------------------------
   */

  public String getCheckerAttrValue(String name)
  {
    if (name=="ANCHORTAIL")
      return config.getAnchorTail();

    throw new IllegalArgumentException("attribute \"" + name + "\" is not defined");

  } // end getCheckerAttrValue

  public void sendTagMessage(String msg)
  {
    if (msg=="NOBR")
    { // increment the no-break count
      nobreak_count++;
      return;

    } // end if

    if (msg=="/NOBR")
    { // decrement the no-break count
      nobreak_count--;
      return;

    } // end if

    if (msg=="WBR")
    { // trigger a word break
      trigger_WBR = true;
      return;

    } // end if

    throw new IllegalArgumentException("message \"" + msg + "\" is not defined");

  } // end sendTagMessage

  public Object getCheckerContextValue(String name)
  {
    return context_data.get(name);

  } // end getCheckerContextValue

  /*--------------------------------------------------------------------------------
   * Implementations from interface RewriterServices
   *--------------------------------------------------------------------------------
   */

  public String getRewriterAttrValue(String name)
  {
    return getCheckerAttrValue(name);

  } // end getRewriterAttrValue

  public Object getRewriterContextValue(String name)
  {
    return context_data.get(name);

  } // end getRewriterContextValue

} // end class HTMLCheckerImpl