From a459e02e79d8d30e41a0e4105050bc906ee37421 Mon Sep 17 00:00:00 2001 From: "Eric J. Bowersox" Date: Mon, 11 Mar 2002 06:47:01 +0000 Subject: [PATCH] added a mode for posting messages via XML-RPC that works better with HTML E-mail messages --- rpcscripts/conf/topic.js | 26 +- .../silverwrist/venice/core/TopicContext.java | 9 +- .../core/impl/PublishedMessageTopicImpl.java | 7 + .../core/impl/TopicUserContextImpl.java | 36 +- .../venice/core/impl/VeniceEngineImpl.java | 27 +- .../venice/core/internals/EngineBackend.java | 1 + .../venice/htmlcheck/HTMLCheckerConfig.java | 20 +- .../venice/htmlcheck/HTMLTagSets.java | 8 +- .../htmlcheck/impl/HTMLCheckerConfigImpl.java | 56 ++- .../htmlcheck/impl/HTMLCheckerImpl.java | 384 ++++++++++++++---- .../venice/htmlcheck/impl/TagRepository.java | 5 +- 11 files changed, 481 insertions(+), 98 deletions(-) diff --git a/rpcscripts/conf/topic.js b/rpcscripts/conf/topic.js index 4cf792b..22871d7 100644 --- a/rpcscripts/conf/topic.js +++ b/rpcscripts/conf/topic.js @@ -39,12 +39,32 @@ topic = xreq.getParamTopic(3,conf); if ("postMessage"==call_name) { // venice:conferencing.topic.postMessage + // [] // Posts a message, returns the message number within the topic - if (xreq.paramCount!=6) + if ((xreq.paramCount!=6) && (xreq.paramCount!=7)) vlib.output(new XmlRpcFault(XmlRpcFault.INVALID_PARAMS,"parameter count mismatch")); else - { // post the message - msg = topic.postNewMessage(0,xreq.getParamString(4),xreq.getParamString(5)); + { // determine the mode + mode = TopicContext.POST_MODE_NORMAL; + if (xreq.paramCount==7) + { // interpret the posting mode + s = xreq.getParamString(6); + if (s.equalsIgnoreCase("normal")) + mode = TopicContext.POST_MODE_NORMAL; + else if (s.equalsIgnoreCase("email")) + mode = TopicContext.POST_MODE_EMAIL; + else + { // invalid mode parameter + vlib.output(new XmlRpcFault(XmlRpcFault.INVALID_PARAMS,"invalid post mode")); + vlib.done(); + + } // end else + + } // end if + // else use the default of "normal" + + // post the message + msg = topic.postNewMessage(0,xreq.getParamString(4),xreq.getParamString(5),mode); vlib.output(vlib.createInteger(msg.postNumber)); } // end else diff --git a/src/com/silverwrist/venice/core/TopicContext.java b/src/com/silverwrist/venice/core/TopicContext.java index 1f62e87..5a9a00b 100644 --- a/src/com/silverwrist/venice/core/TopicContext.java +++ b/src/com/silverwrist/venice/core/TopicContext.java @@ -11,7 +11,7 @@ * * The Initial Developer of the Original Code is Eric J. Bowersox , * for Silverwrist Design Studios. Portions created by Eric J. Bowersox are - * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. + * Copyright (C) 2001-02 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. * * Contributor(s): */ @@ -26,6 +26,10 @@ import com.silverwrist.venice.htmlcheck.HTMLChecker; public interface TopicContext { + // Modes for postNewMessage() + public static final int POST_MODE_NORMAL = 0; + public static final int POST_MODE_EMAIL = 1; + public abstract void refresh() throws DataException; public abstract int getTopicID(); @@ -72,6 +76,9 @@ public interface TopicContext public abstract TopicMessageContext getMessage(int number) throws DataException, AccessError; + public abstract TopicMessageContext postNewMessage(long parent, String pseud, String text, int mode) + throws DataException, AccessError; + public abstract TopicMessageContext postNewMessage(long parent, String pseud, String text) throws DataException, AccessError; diff --git a/src/com/silverwrist/venice/core/impl/PublishedMessageTopicImpl.java b/src/com/silverwrist/venice/core/impl/PublishedMessageTopicImpl.java index c8600a2..dcbe5c5 100644 --- a/src/com/silverwrist/venice/core/impl/PublishedMessageTopicImpl.java +++ b/src/com/silverwrist/venice/core/impl/PublishedMessageTopicImpl.java @@ -254,6 +254,13 @@ class PublishedMessageTopicImpl implements TopicContext } // end getMessage + public TopicMessageContext postNewMessage(long parent, String pseud, String text, int mode) + throws AccessError + { + throw new AccessError("cannot perform this function from a read-only topic view"); + + } // end postNewMessage + public TopicMessageContext postNewMessage(long parent, String pseud, String text) throws AccessError { throw new AccessError("cannot perform this function from a read-only topic view"); diff --git a/src/com/silverwrist/venice/core/impl/TopicUserContextImpl.java b/src/com/silverwrist/venice/core/impl/TopicUserContextImpl.java index 6e6b661..fe7ee81 100644 --- a/src/com/silverwrist/venice/core/impl/TopicUserContextImpl.java +++ b/src/com/silverwrist/venice/core/impl/TopicUserContextImpl.java @@ -631,11 +631,14 @@ class TopicUserContextImpl implements TopicContext } // end getMessage - public TopicMessageContext postNewMessage(long parent, String pseud, String text) + public TopicMessageContext postNewMessage(long parent, String pseud, String text, int mode) throws DataException, AccessError { if (logger.isInfoEnabled()) - logger.info("postNewMessage(" + parent + ", '" + pseud + "',) entry"); + logger.info("postNewMessage(" + parent + ", '" + pseud + "',," + mode + ") entry"); + + if ((mode!=POST_MODE_NORMAL) && (mode!=POST_MODE_EMAIL)) + throw new IllegalArgumentException("invalid mode parameter"); if (!(env.getConference().userCanPost())) { // they can't post in this topic! @@ -665,9 +668,25 @@ class TopicUserContextImpl implements TopicContext } // end if + // figure out which HTML checkers to use + int pseud_ch_index = EngineBackend.HTMLC_POST_PSEUD; + int body_ch_index = EngineBackend.HTMLC_POST_BODY; + if (mode==POST_MODE_NORMAL) + { // configure for normal posting + pseud_ch_index = EngineBackend.HTMLC_POST_PSEUD; + body_ch_index = EngineBackend.HTMLC_POST_BODY; + + } // end if + else if (mode==POST_MODE_EMAIL) + { // configure for E-mail posting + pseud_ch_index = EngineBackend.HTMLC_POST_PSEUD; + body_ch_index = EngineBackend.HTMLC_POST_BODY_EMAIL; + + } // end else if + // preprocess the two arguments through HTML checkers - HTMLChecker pseud_ch = env.getEngine().createCheckerObject(EngineBackend.HTMLC_POST_PSEUD); - HTMLChecker text_ch = env.getEngine().createCheckerObject(EngineBackend.HTMLC_POST_BODY); + HTMLChecker pseud_ch = env.getEngine().createCheckerObject(pseud_ch_index); + HTMLChecker text_ch = env.getEngine().createCheckerObject(body_ch_index); text_ch.setContextValue("PostLinkDecoderContext",env.getConference().createDecoderContext(topicnum)); try { // run both arguments through the HTML checker @@ -887,7 +906,14 @@ class TopicUserContextImpl implements TopicContext return new TopicMessageUserContextImpl(env,new_post_id,parent,topicid,new_post_num,text_linecount, env.getUserID(),posted_date,real_pseud); - } // end postMessage + } // end postNewMessage + + public TopicMessageContext postNewMessage(long parent, String pseud, String text) + throws DataException, AccessError + { + return this.postNewMessage(parent,pseud,text,POST_MODE_NORMAL); + + } // end postNewMessage public HTMLChecker getPreviewChecker() { diff --git a/src/com/silverwrist/venice/core/impl/VeniceEngineImpl.java b/src/com/silverwrist/venice/core/impl/VeniceEngineImpl.java index edc052b..d0b0c32 100644 --- a/src/com/silverwrist/venice/core/impl/VeniceEngineImpl.java +++ b/src/com/silverwrist/venice/core/impl/VeniceEngineImpl.java @@ -739,7 +739,7 @@ public class VeniceEngineImpl implements VeniceEngine, EngineBackend LazyTreeLexicon lex = new LazyTreeLexicon((String[])(dictionary_tmp.toArray(new String[0]))); spell_rewriter.addDictionary(lex); - html_configs = new HTMLCheckerConfig[5]; // create the array + html_configs = new HTMLCheckerConfig[6]; // create the array // Create the HTML checker config used to post body text to the database. HTMLCheckerConfig cfg = HTMLCheckerCreator.create(); @@ -805,9 +805,34 @@ public class VeniceEngineImpl implements VeniceEngine, EngineBackend cfg.setProcessAngles(true); cfg.setProcessParens(false); cfg.setDiscardHTMLTags(true); + cfg.setDiscardRejectedHTML(true); cfg.configureNormalTagSet(); html_configs[HTMLC_MAIL_POST] = cfg; + // Create the HTML checker config used to post body text to the database from an E-mail message. + cfg = HTMLCheckerCreator.create(); + cfg.setWordWrapLength((short)55); + cfg.setRewrapLines(true); + cfg.setProcessAngles(true); + cfg.setProcessParens(true); + cfg.setDiscardHTMLTags(false); + cfg.setDiscardRejectedHTML(true); + cfg.setDiscardHTMLComments(true); + cfg.setDiscardXMLConstructs(true); + cfg.addOutputFilter(html_filter); + cfg.addOutputFilter(sql_filter); + cfg.addRawOutputFilter(sql_filter); + cfg.addStringRewriter(email_rewriter); + cfg.addStringRewriter(url_rewriter); + cfg.addTagRewriter(postlink_rewriter); + cfg.addTagRewriter(username_rewriter); + cfg.addTagRewriter(email_rewriter); + cfg.addTagRewriter(url_rewriter); + cfg.addParenRewriter(username_rewriter); + cfg.configureNormalTagSet(); + cfg.disallowTagSet(HTMLTagSets.FONT_FORMAT); + html_configs[HTMLC_POST_BODY_EMAIL] = cfg; + if (logger.isDebugEnabled()) logger.debug("initialize() complete :-)"); diff --git a/src/com/silverwrist/venice/core/internals/EngineBackend.java b/src/com/silverwrist/venice/core/internals/EngineBackend.java index 8f45f58..e7962e7 100644 --- a/src/com/silverwrist/venice/core/internals/EngineBackend.java +++ b/src/com/silverwrist/venice/core/internals/EngineBackend.java @@ -37,6 +37,7 @@ public interface EngineBackend public static final int HTMLC_PREVIEW_BODY = 2; public static final int HTMLC_ESCAPE_BODY_PSEUD = 3; public static final int HTMLC_MAIL_POST = 4; + public static final int HTMLC_POST_BODY_EMAIL = 5; // Integer parameter indexes public static final int IP_POSTSPERPAGE = 0; diff --git a/src/com/silverwrist/venice/htmlcheck/HTMLCheckerConfig.java b/src/com/silverwrist/venice/htmlcheck/HTMLCheckerConfig.java index 5869d1b..ac712b6 100644 --- a/src/com/silverwrist/venice/htmlcheck/HTMLCheckerConfig.java +++ b/src/com/silverwrist/venice/htmlcheck/HTMLCheckerConfig.java @@ -7,11 +7,11 @@ * WARRANTY OF ANY KIND, either express or implied. See the License for the specific * language governing rights and limitations under the License. * - * The Original Code is the Venice Web Community System. + * The Original Code is the Venice Web Communities System. * * The Initial Developer of the Original Code is Eric J. Bowersox , * for Silverwrist Design Studios. Portions created by Eric J. Bowersox are - * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. + * Copyright (C) 2001-02 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. * * Contributor(s): */ @@ -23,6 +23,10 @@ public interface HTMLCheckerConfig extends HTMLTagSets public abstract void setWordWrapLength(short val); + public abstract boolean getRewrapLines(); + + public abstract void setRewrapLines(boolean val); + public abstract boolean getProcessAngles(); public abstract void setProcessAngles(boolean val); @@ -35,6 +39,18 @@ public interface HTMLCheckerConfig extends HTMLTagSets public abstract void setDiscardHTMLTags(boolean val); + public abstract boolean getDiscardRejectedHTML(); + + public abstract void setDiscardRejectedHTML(boolean val); + + public abstract boolean getDiscardHTMLComments(); + + public abstract void setDiscardHTMLComments(boolean val); + + public abstract boolean getDiscardXMLConstructs(); + + public abstract void setDiscardXMLConstructs(boolean val); + public abstract String getAnchorTail(); public abstract void setAnchorTail(String s); diff --git a/src/com/silverwrist/venice/htmlcheck/HTMLTagSets.java b/src/com/silverwrist/venice/htmlcheck/HTMLTagSets.java index 30420e3..244d097 100644 --- a/src/com/silverwrist/venice/htmlcheck/HTMLTagSets.java +++ b/src/com/silverwrist/venice/htmlcheck/HTMLTagSets.java @@ -7,11 +7,11 @@ * WARRANTY OF ANY KIND, either express or implied. See the License for the specific * language governing rights and limitations under the License. * - * The Original Code is the Venice Web Community System. + * The Original Code is the Venice Web Communities System. * * The Initial Developer of the Original Code is Eric J. Bowersox , * for Silverwrist Design Studios. Portions created by Eric J. Bowersox are - * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. + * Copyright (C) 2001-02 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. * * Contributor(s): */ @@ -134,5 +134,9 @@ public interface HTMLTagSets * including server-side components. These are generally not allowed. */ public static final int JAVA_SERVER = 24; + /** + * Denotes HTML comments. These are generally not allowed. + */ + public static final int COMMENT = 25; } // end interface HTMLTagSets diff --git a/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerConfigImpl.java b/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerConfigImpl.java index 3a1c51d..2ae89de 100644 --- a/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerConfigImpl.java +++ b/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerConfigImpl.java @@ -11,7 +11,7 @@ * * The Initial Developer of the Original Code is Eric J. Bowersox , * for Silverwrist Design Studios. Portions created by Eric J. Bowersox are - * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. + * Copyright (C) 2001-02 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. * * Contributor(s): */ @@ -34,9 +34,13 @@ public class HTMLCheckerConfigImpl implements HTMLCheckerConfig *-------------------------------------------------------------------------------- */ + private boolean rewrap_lines = false; // re-word-wrap lines? private boolean process_angles = true; // process angle-bracketed strings? private boolean process_parens = true; // process parenthesized strings? private boolean discard_html_tags = false; // discard all HTML tags? + private boolean discard_rejected_html = false; // discard HTML tags that are rejected? + private boolean discard_html_comments = false; // discard HTML comments? + private boolean discard_xml_constructs = false; // discard XML constructs (namespaced tags)? private short word_wrap_length = 0; // word wrap length private String anchor_tail = DEFAULT_ANCHOR_TAIL; // the tail end of the anchor private BitSet allowed_tagsets = new BitSet(); // which tagsets are allowed? @@ -78,6 +82,18 @@ public class HTMLCheckerConfigImpl implements HTMLCheckerConfig } // end setWordWrapLength + public boolean getRewrapLines() + { + return rewrap_lines; + + } // end getRewrapLines + + public void setRewrapLines(boolean val) + { + rewrap_lines = val; + + } // end setRewrapLines + public boolean getProcessAngles() { return process_angles; @@ -114,6 +130,42 @@ public class HTMLCheckerConfigImpl implements HTMLCheckerConfig } // end setDiscardHTMLTags + public boolean getDiscardRejectedHTML() + { + return discard_rejected_html; + + } // end getDiscardRejectedHTML + + public void setDiscardRejectedHTML(boolean val) + { + discard_rejected_html = val; + + } // end setDiscardRejectedHTML + + public boolean getDiscardHTMLComments() + { + return discard_html_comments; + + } // end getDiscardHTMLComments + + public void setDiscardHTMLComments(boolean val) + { + discard_html_comments = val; + + } // end setDiscardHTMLComments + + public boolean getDiscardXMLConstructs() + { + return discard_xml_constructs; + + } // end getDiscardXMLConstructs + + public void setDiscardXMLConstructs(boolean val) + { + discard_xml_constructs = val; + + } // end setDiscardXMLConstructs + public String getAnchorTail() { return anchor_tail; @@ -158,6 +210,8 @@ public class HTMLCheckerConfigImpl implements HTMLCheckerConfig public void addParenRewriter(Rewriter rewriter) { + paren_rewriters.add(rewriter); + } // end addParenRewriter public boolean isTagSetAllowed(int setid) diff --git a/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerImpl.java b/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerImpl.java index 8edfa11..d763f6f 100644 --- a/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerImpl.java +++ b/src/com/silverwrist/venice/htmlcheck/impl/HTMLCheckerImpl.java @@ -11,7 +11,7 @@ * * The Initial Developer of the Original Code is Eric J. Bowersox , * for Silverwrist Design Studios. Portions created by Eric J. Bowersox are - * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. + * Copyright (C) 2001-02 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. * * Contributor(s): */ @@ -79,6 +79,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic private static final short ST_TAG = 3; private static final short ST_PAREN = 4; private static final short ST_TAGQUOTE = 5; + private static final short ST_NEWLINE = 6; /*-------------------------------------------------------------------------------- * Internal constants @@ -139,12 +140,26 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic *-------------------------------------------------------------------------------- */ + /** + * Returns true if this character belongs as part of a word, false if not. + * + * @param ch Character to be tested. + * @return See above. + */ private static final boolean isWordChar(char ch) { return (Character.isUpperCase(ch) || Character.isLowerCase(ch) || (ch=='-') || (ch=='\'')); } // end isWordChar + /** + * Returns the maximum length of a "run" of word characters or non-word characters in the + * buffer, beginning at the specified start point, before a character of the opposite classification. + * + * @param buf The buffer to look through. + * @param start The start position to look at. + * @return See above. + */ private static final int getRunLength(StringBuffer buf, int start) { boolean word_char = isWordChar(buf.charAt(start)); @@ -161,13 +176,27 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end getRunLength + /** + * Returns the maximum length of a "run" of word characters or non-word characters in the + * buffer, counting from the start of the buffer, before a character of the opposite classification. + * + * @param buf The buffer to look through. + * @return See above. + */ private static final int getRunLength(StringBuffer buf) { return getRunLength(buf,0); } // end getRunLength - private void copyRewriters(ArrayList dest, List source) + /** + * Copies the Rewriter objects from an outside list to an internal list, wrapping + * named rewriters in CountingRewriter objects as appropriate. + * + * @param dest Destination to copy rewriters to. + * @param source List to copy rewriters from. + */ + private final void copyRewriters(ArrayList dest, List source) { Iterator it = source.iterator(); while (it.hasNext()) @@ -190,7 +219,17 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end copyRewriters - private MarkupData attemptRewrite(List rewriters, String data) + /** + * Use the specified list of rewriters to attempt to rewrite the specified string data. + * The first rewriter in the list that returns a valid MarkupData object takes + * precedence. + * + * @param rewriters List of rewriters to try against the rewriter data. + * @param data String data to attempt to rewrite. + * @return A MarkupData object that contains the marked-up data to output, or + * null if no rewriter handled the data. + */ + private final MarkupData attemptRewrite(List rewriters, String data) { Iterator it = rewriters.iterator(); MarkupData rc = null; @@ -205,7 +244,10 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end attemptRewrite - private void initState() + /** + * Initializes the internal state of the parser. + */ + private final void initState() { output_buffer = new StringBuffer(1024); temp_buffer = new StringBuffer(64); @@ -213,7 +255,10 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end initState - private void killState() + /** + * Erases any temporary data that is no longer needed after the parser finishes. + */ + private final void killState() { temp_buffer = null; if (tag_stack!=null) @@ -222,7 +267,15 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end killState - private void emitChar(char ch, List filters, boolean count_cols) + /** + * Emits one character to the output of the HTML checker, running it through a list of defined filters. + * + * @param ch Character to output. + * @param filters List of filters to use to attempt to process the character. + * @param count_cols true if the character output adds to the column counter, + * false if not. + */ + private final void emitChar(char ch, List filters, boolean count_cols) { boolean handled = false; if (filters.size()>0) @@ -244,7 +297,15 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end emitChar - private void emitString(String str, List filters, boolean count_cols) + /** + * Emits a character string to the output of the HTML checker, running it through a list of defined filters. + * + * @param str String to output. + * @param filters List of filters to use to attempt to process the string. + * @param count_cols true if the characters output add to the column counter, + * false if not. + */ + private final void emitString(String str, List filters, boolean count_cols) { boolean real_count_cols = count_cols && (config.getWordWrapLength()>0); @@ -309,7 +370,11 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end emitString - private void emitLineBreak() + /** + * Emits a line break to the output of the HTML checker, resetting the column counter and advancing + * the line counter. + */ + private final void emitLineBreak() { emitString("\r\n",config.getRawOutputFilters(),false); if (config.getWordWrapLength()>0) @@ -318,14 +383,24 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end emitLineBreak - private void emitPossibleLineBreak() + /** + * If the current line of text contains more characters than will fit on the line, emit a line break. + * Disabled if the checker does not word-wrap. + */ + private final void emitPossibleLineBreak() { if ((config.getWordWrapLength()>0) && (nobreak_count<=0) && (columns>=config.getWordWrapLength())) emitLineBreak(); } // end emitPossibleLineBreak - private void ensureSpaceOnLine(int nchars) + /** + * Ensure that the current line has a certain number of characters of space left on it; if it does not, + * emit a line break. + * + * @param nchars Number of characters to reserve on the line. + */ + private final void ensureSpaceOnLine(int nchars) { if ((config.getWordWrapLength()>0) && (nobreak_count<=0)) { // line break might be required here @@ -337,71 +412,111 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end ensureSpaceOnLine - private void emitMarkupData(MarkupData md) + private final void emitMarkupData(MarkupData md) { ensureSpaceOnLine(md.getText().length()); - emitString(md.getBeginMarkup(),config.getRawOutputFilters(),false); + List raw_filters = config.getRawOutputFilters(); + emitString(md.getBeginMarkup(),raw_filters,false); emitString(md.getText(),config.getOutputFilters(),true); - emitString(md.getEndMarkup(),config.getRawOutputFilters(),false); + emitString(md.getEndMarkup(),raw_filters,false); } // end emitMarkupData - private void emitMarkupData(MarkupData md, char prefix, char suffix) + private final void emitMarkupData(MarkupData md, char prefix, char suffix) { ensureSpaceOnLine(md.getText().length() + 2); - emitChar(prefix,config.getOutputFilters(),true); - emitString(md.getBeginMarkup(),config.getRawOutputFilters(),false); - emitString(md.getText(),config.getOutputFilters(),true); - emitString(md.getEndMarkup(),config.getRawOutputFilters(),false); - emitChar(suffix,config.getOutputFilters(),true); + List raw_filters = config.getRawOutputFilters(); + List cooked_filters = config.getOutputFilters(); + emitChar(prefix,cooked_filters,true); + emitString(md.getBeginMarkup(),raw_filters,false); + emitString(md.getText(),cooked_filters,true); + emitString(md.getEndMarkup(),raw_filters,false); + emitChar(suffix,cooked_filters,true); } // end emitMarkupData - private void doFlushWhitespace() + private final void doFlushWhitespace() { - while (temp_buffer.length()>0) - { // calculate where the next line break is - int line_break = temp_buffer.toString().indexOf('\n'); - int output_len = line_break; - boolean force_line_break = false; - if (output_len<0) - output_len = temp_buffer.length(); + int output_len = temp_buffer.length(); + if (output_len<=0) + return; + boolean force_line_break = false; - if ((config.getWordWrapLength()>0) && (nobreak_count<=0)) - { // adjust output if necessary for word wrapping - int remain_space = (int)(config.getWordWrapLength() - columns); - if (remain_space0) && (nobreak_count<=0)) + { // adjust output if necessary for word wrapping + int remain_space = (int)(config.getWordWrapLength() - columns); + if (remain_space0) - emitString(temp_buffer.substring(0,output_len),config.getOutputFilters(),true); + } // end if - if (line_break>=0) - { // there's a line break present - emit the line break - emitLineBreak(); // output the line break character - if (++line_break0) + emitString(temp_buffer.substring(0,output_len),config.getOutputFilters(),true); - } // end if - else - { // no more line breaks on this line - clear out the buffer - if (force_line_break) - emitLineBreak(); // notice we can only force a line break if we didn't have one in the text - temp_buffer.setLength(0); - - } // end else - - } // end while (still data in temp buffer) + // clear out the buffer + temp_buffer.setLength(0); } // end doFlushWhitespace - private void emitFromStartOfTempBuffer(int nchars) + private final void doFlushNewlines() + { + //logger.debug("FlushNewLines"); + int line_breaks = 0, crs = 0; + for (int i=0; i0) + line_breaks++; + //logger.debug("Total line breaks: " + line_breaks); + + if (config.getRewrapLines()) + { // rewrap lines forces a lot of adjustment in lines... + if (line_breaks<2) + { // convert a single line break to whitespace + temp_buffer.setLength(0); + temp_buffer.append(' '); + state = ST_WHITESPACE; + return; + + } // end if + else + line_breaks = 2; // compress out multiple blank lines + + } // end if + + while ((line_breaks--)>0) + emitLineBreak(); // emit the line breaks + + temp_buffer.setLength(0); // clear out the buffer + state = ST_WHITESPACE; + + } // end doFlushNewlines + + private final void emitFromStartOfTempBuffer(int nchars) { if (nchars<=0) return; // can't emit less than 1 character! @@ -439,7 +554,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end emitFromStartOfTempBuffer - private void doFlushString() + private final void doFlushString() { MarkupData md = attemptRewrite(string_rewriters,temp_buffer.toString()); if (md!=null) @@ -529,7 +644,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end doFlushString - private boolean handleAsHTML() + private final boolean handleAsHTML() { if (logger.isDebugEnabled()) logger.debug("handleAsHTML(): candidate buffer = [" + temp_buffer.toString() + "]"); @@ -583,13 +698,24 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic // Get the HTML tag set index for this tag, and see if we allow that set. int tag_set_id = TagRepository.tagIndexToSet(tag_index); - if (!(config.isTagSetAllowed(tag_set_id)) && !(config.getDiscardHTMLTags())) - { // we're not allowing it, we're not discarding it, so punt! - if (logger.isDebugEnabled()) - logger.debug("<" + poss_tag_name + "> is not allowed in this context"); - return false; + if (!(config.isTagSetAllowed(tag_set_id))) + { // the tag is not allowed - either discard it or leave it in verbatim + if (config.getDiscardHTMLTags() || config.getDiscardRejectedHTML()) + { // throw this tag the hell away! + if (logger.isDebugEnabled()) + logger.debug("<" + poss_tag_name + "> tag rejected and discarded"); + return true; - } // end if + } // end if + else + { // kick the tag out and let some other code deal with it + if (logger.isDebugEnabled()) + logger.debug("<" + poss_tag_name + "> is not allowed in this context"); + return false; + + } // end if + + } // end if (tag rejected by HTML rules) if (!(config.getDiscardHTMLTags()) && tagobj.balanceTags()) { // this tag needs to be balanced - here is where we manipulate the stack @@ -648,9 +774,10 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic logger.debug("real tag data = [" + real_tag_data + "]"); // Emit the tag to the output. - emitChar('<',config.getRawOutputFilters(),false); - emitString(real_tag_data,config.getRawOutputFilters(),false); - emitChar('>',config.getRawOutputFilters(),false); + List filters = config.getRawOutputFilters(); + emitChar('<',filters,false); + emitString(real_tag_data,filters,false); + emitChar('>',filters,false); // Determine whether this tag causes a "logical line break." boolean logical_line_break = false; @@ -673,10 +800,85 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic return true; // handled! - } // end handleAsHTML() + } // end handleAsHTML - private void finishTag() + /** + * Returns true if the temporary buffer contains the start of an HTML comment. (The + * leading and trailing angle brackets are assumed.) + * + * @return See above. + */ + private final boolean containsHTMLComment() { + return ((temp_buffer.length()>=3) && (temp_buffer.substring(0,3).equals("!--"))); + + } // end containsHTMLComment + + /** + * Returns true if the temporary buffer contains a complete HTML comment. (The leading + * and trailing angle brackets are assumed.) + * + * @return See above. + */ + private final boolean containsCompleteHTMLComment() + { + int l = temp_buffer.length(); + // note that a minimum HTML comment is , i.e. "" with no characters + // in between... + return ((l>=5) && (temp_buffer.substring(0,3).equals("!--")) + && (temp_buffer.substring(l-2,l).equals("--"))); + + } // end containsCompleteHTMLComment + + /** + * Returns true if the temporary buffer contains an XML construct, i.e. a tag that + * contains a ':', and may or may not have a leading '/'. (The leading and trailing angle brackets + * are assumed.) + * + * @return See above. + */ + private final boolean containsXMLConstruct() + { + int ptr = 0; + if ((temp_buffer.length()>1) && (temp_buffer.charAt(0)=='/')) + ptr++; + while (ptr',filters,false); + + } // end if + + // clear our state and return to parsing + temp_buffer.setLength(0); + state = ST_WHITESPACE; + return; + + } // end if + if (handleAsHTML()) { // the tag has been handled as an HTML tag - bail out immediately temp_buffer.setLength(0); @@ -696,6 +898,14 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end if + if (config.getDiscardXMLConstructs() && containsXMLConstruct()) + { // the tag is an XML construct, and is to be discarded + temp_buffer.setLength(0); + state = ST_WHITESPACE; + return; + + } // end if + // This tag has been rejected! We need to process it normally, as character data. String rejection = temp_buffer.toString(); temp_buffer.setLength(0); @@ -707,7 +917,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end finishTag - private void finishParen() + private final void finishParen() { // Try to handle the paren element using a paren rewriter. MarkupData md = attemptRewrite(paren_rewriters,temp_buffer.toString()); @@ -733,7 +943,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic } // end finishParen - private void parse(String str) + private final void parse(String str) { int i = 0; while (i, * for Silverwrist Design Studios. Portions created by Eric J. Bowersox are - * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. + * Copyright (C) 2001-02 Eric J. Bowersox/Silverwrist Design Studios. All Rights Reserved. * * Contributor(s): */ @@ -124,8 +124,7 @@ class TagRepository implements HTMLTagSets */ static - { - // begin enshrining the tags! + { // begin enshrining the tags! enshrineTag(new SimpleTag("!DOCTYPE",false),DOC_FORMAT); enshrineTag(new SimpleTag("%",false),SERVER_PAGE); enshrineTag(new SimpleTag("%=",false),SERVER_PAGE);