org.apache.lenya.lucene.html
Class HTMLParser

java.lang.Object
  extended by org.apache.lenya.lucene.html.HTMLParser
All Implemented Interfaces:
HTMLParserConstants

public class HTMLParser
extends java.lang.Object
implements HTMLParserConstants

HTML Parser


Field Summary
 Token jj_nt
           
 boolean lookingAhead
           
static int SUMMARY_LENGTH
           
 Token token
           
 HTMLParserTokenManager token_source
           
 
Fields inherited from interface org.apache.lenya.lucene.html.HTMLParserConstants
AfterEquals, ArgEquals, ArgName, ArgQuote1, ArgQuote2, ArgValue, CloseQuote1, CloseQuote2, Comment1, Comment2, CommentEnd1, CommentEnd2, CommentText1, CommentText2, DeclName, DEFAULT, Entity, EOF, LET, NUM, Punct, Quote1Text, Quote2Text, SP, Space, TagEnd, TagName, tokenImage, WithinComment1, WithinComment2, WithinQuote1, WithinQuote2, WithinTag, Word
 
Constructor Summary
HTMLParser(java.io.File file)
          Creates a new HTMLParser object.
HTMLParser(HTMLParserTokenManager tm)
          Creates a new HTMLParser object.
HTMLParser(java.io.InputStream stream)
          Creates a new HTMLParser object.
HTMLParser(java.io.Reader stream)
          Creates a new HTMLParser object.
 
Method Summary
 Token ArgValue()
          DOCUMENT ME!
 void CommentTag()
          DOCUMENT ME!
 Token Decl()
          DOCUMENT ME!
 void disable_tracing()
          DOCUMENT ME!
 void enable_tracing()
          DOCUMENT ME!
 ParseException generateParseException()
          DOCUMENT ME!
 java.lang.String getKeywords()
          Get keywords
 Token getNextToken()
          DOCUMENT ME!
 java.io.Reader getReader()
          DOCUMENT ME!
 java.lang.String getSummary()
          DOCUMENT ME!
 java.lang.String getTitle()
          DOCUMENT ME!
 Token getToken(int index)
          DOCUMENT ME!
 void HTMLDocument()
          DOCUMENT ME!
 void ReInit(HTMLParserTokenManager tm)
          DOCUMENT ME!
 void ReInit(java.io.InputStream stream)
          DOCUMENT ME!
 void ReInit(java.io.Reader stream)
          DOCUMENT ME!
 void Tag()
          DOCUMENT ME!
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

SUMMARY_LENGTH

public static final int SUMMARY_LENGTH
See Also:
Constant Field Values

token_source

public HTMLParserTokenManager token_source

token

public Token token

jj_nt

public Token jj_nt

lookingAhead

public boolean lookingAhead
Constructor Detail

HTMLParser

public HTMLParser(java.io.File file)
           throws java.io.FileNotFoundException
Creates a new HTMLParser object.

Parameters:
file - DOCUMENT ME!
Throws:
java.io.FileNotFoundException - DOCUMENT ME!

HTMLParser

public HTMLParser(java.io.InputStream stream)
Creates a new HTMLParser object.

Parameters:
stream - DOCUMENT ME!

HTMLParser

public HTMLParser(java.io.Reader stream)
Creates a new HTMLParser object.

Parameters:
stream - DOCUMENT ME!

HTMLParser

public HTMLParser(HTMLParserTokenManager tm)
Creates a new HTMLParser object.

Parameters:
tm - DOCUMENT ME!
Method Detail

getTitle

public java.lang.String getTitle()
                          throws java.io.IOException,
                                 java.lang.InterruptedException
DOCUMENT ME!

Returns:
DOCUMENT ME!
Throws:
java.io.IOException - DOCUMENT ME!
java.lang.InterruptedException - DOCUMENT ME!

getKeywords

public java.lang.String getKeywords()
                             throws java.io.IOException,
                                    java.lang.InterruptedException
Get keywords

Returns:
keywords
Throws:
java.io.IOException - DOCUMENT ME!
java.lang.InterruptedException - DOCUMENT ME!

getSummary

public java.lang.String getSummary()
                            throws java.io.IOException,
                                   java.lang.InterruptedException
DOCUMENT ME!

Returns:
DOCUMENT ME!
Throws:
java.io.IOException - DOCUMENT ME!
java.lang.InterruptedException - DOCUMENT ME!

getReader

public java.io.Reader getReader()
                         throws java.io.IOException
DOCUMENT ME!

Returns:
DOCUMENT ME!
Throws:
java.io.IOException - DOCUMENT ME!

HTMLDocument

public final void HTMLDocument()
                        throws ParseException,
                               java.io.IOException
DOCUMENT ME!

Throws:
ParseException - DOCUMENT ME!
java.io.IOException - DOCUMENT ME!

Tag

public final void Tag()
               throws ParseException,
                      java.io.IOException
DOCUMENT ME!

Throws:
ParseException - DOCUMENT ME!
java.io.IOException - DOCUMENT ME!

ArgValue

public final Token ArgValue()
                     throws ParseException
DOCUMENT ME!

Returns:
DOCUMENT ME!
Throws:
ParseException - DOCUMENT ME!
java.lang.Error - DOCUMENT ME!

Decl

public final Token Decl()
                 throws ParseException
DOCUMENT ME!

Returns:
DOCUMENT ME!
Throws:
ParseException - DOCUMENT ME!
java.lang.Error - DOCUMENT ME!

CommentTag

public final void CommentTag()
                      throws ParseException
DOCUMENT ME!

Throws:
ParseException - DOCUMENT ME!

ReInit

public void ReInit(java.io.InputStream stream)
DOCUMENT ME!

Parameters:
stream - DOCUMENT ME!

ReInit

public void ReInit(java.io.Reader stream)
DOCUMENT ME!

Parameters:
stream - DOCUMENT ME!

ReInit

public void ReInit(HTMLParserTokenManager tm)
DOCUMENT ME!

Parameters:
tm - DOCUMENT ME!

getNextToken

public final Token getNextToken()
DOCUMENT ME!

Returns:
DOCUMENT ME!

getToken

public final Token getToken(int index)
DOCUMENT ME!

Parameters:
index - DOCUMENT ME!
Returns:
DOCUMENT ME!

generateParseException

public final ParseException generateParseException()
DOCUMENT ME!

Returns:
DOCUMENT ME!

enable_tracing

public final void enable_tracing()
DOCUMENT ME!


disable_tracing

public final void disable_tracing()
DOCUMENT ME!



Copyright © 1999-2005 Apache Software Foundation. All Rights Reserved.