org.apache.lenya.search.crawler
Class HTMLHandler

java.lang.Object
  extended by javax.swing.text.html.HTMLEditorKit.ParserCallback
      extended by org.apache.lenya.search.crawler.HTMLHandler
All Implemented Interfaces:
ContentHandler

public final class HTMLHandler
extends javax.swing.text.html.HTMLEditorKit.ParserCallback
implements ContentHandler

DOCUMENT ME!


Field Summary
 
Fields inherited from class javax.swing.text.html.HTMLEditorKit.ParserCallback
IMPLIED
 
Constructor Summary
HTMLHandler()
          Constructor - initializes variables
 
Method Summary
 java.lang.String getAuthor()
          Parse Content. [24] 320:1
 java.lang.String getCategories()
          Return categories (from META tags)
 java.lang.String getContents()
          Return contents
 java.lang.String getDescription()
          Return description (from META tags)
 java.lang.String getHREF()
          Return META HREF
 java.lang.String getKeywords()
          Return keywords (from META tags)
 java.util.List getLinks()
          Return links
 long getPublished()
          Return published date (from META tag)
 boolean getRobotFollow()
          Return boolean true if links are to be followed
 boolean getRobotIndex()
          Return boolean true if this is to be indexed
 java.lang.String getTitle()
          Return page title
 void handleAnchor(javax.swing.text.MutableAttributeSet attribs)
          Handle Anchor tags
 void handleEndTag(javax.swing.text.html.HTML.Tag tag, int pos)
          Closing tag
 void handleMeta(javax.swing.text.MutableAttributeSet attribs)
          Handle META tags
 void handleSimpleTag(javax.swing.text.html.HTML.Tag tag, javax.swing.text.MutableAttributeSet attribs, int pos)
          Handle standalone tags
 void handleStartTag(javax.swing.text.html.HTML.Tag tag, javax.swing.text.MutableAttributeSet attribs, int pos)
          Opening tag
 void handleText(char[] text, int pos)
          Handle page text
 void parse(java.io.InputStream in)
          Parse Content.
 
Methods inherited from class javax.swing.text.html.HTMLEditorKit.ParserCallback
flush, handleComment, handleEndOfLineString, handleError
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

HTMLHandler

public HTMLHandler()
Constructor - initializes variables

Method Detail

getAuthor

public java.lang.String getAuthor()
Parse Content. [24] 320:1

Specified by:
getAuthor in interface ContentHandler
Returns:
DOCUMENT ME!

getCategories

public java.lang.String getCategories()
Return categories (from META tags)

Specified by:
getCategories in interface ContentHandler
Returns:
DOCUMENT ME!

getContents

public java.lang.String getContents()
Return contents

Specified by:
getContents in interface ContentHandler
Returns:
DOCUMENT ME!

getDescription

public java.lang.String getDescription()
Return description (from META tags)

Specified by:
getDescription in interface ContentHandler
Returns:
DOCUMENT ME!

getHREF

public java.lang.String getHREF()
Return META HREF

Specified by:
getHREF in interface ContentHandler
Returns:
DOCUMENT ME!

getKeywords

public java.lang.String getKeywords()
Return keywords (from META tags)

Specified by:
getKeywords in interface ContentHandler
Returns:
DOCUMENT ME!

getLinks

public java.util.List getLinks()
Return links

Specified by:
getLinks in interface ContentHandler
Returns:
DOCUMENT ME!

getPublished

public long getPublished()
Return published date (from META tag)

Specified by:
getPublished in interface ContentHandler
Returns:
DOCUMENT ME!

getRobotFollow

public boolean getRobotFollow()
Return boolean true if links are to be followed

Specified by:
getRobotFollow in interface ContentHandler
Returns:
DOCUMENT ME!

getRobotIndex

public boolean getRobotIndex()
Return boolean true if this is to be indexed

Specified by:
getRobotIndex in interface ContentHandler
Returns:
DOCUMENT ME!

getTitle

public java.lang.String getTitle()
Return page title

Specified by:
getTitle in interface ContentHandler
Returns:
DOCUMENT ME!

handleAnchor

public void handleAnchor(javax.swing.text.MutableAttributeSet attribs)
Handle Anchor tags

Parameters:
attribs - DOCUMENT ME!

handleEndTag

public void handleEndTag(javax.swing.text.html.HTML.Tag tag,
                         int pos)
Closing tag

Overrides:
handleEndTag in class javax.swing.text.html.HTMLEditorKit.ParserCallback
Parameters:
tag - DOCUMENT ME!
pos - DOCUMENT ME!

handleMeta

public void handleMeta(javax.swing.text.MutableAttributeSet attribs)
Handle META tags

Parameters:
attribs - DOCUMENT ME!

handleSimpleTag

public void handleSimpleTag(javax.swing.text.html.HTML.Tag tag,
                            javax.swing.text.MutableAttributeSet attribs,
                            int pos)
Handle standalone tags

Overrides:
handleSimpleTag in class javax.swing.text.html.HTMLEditorKit.ParserCallback
Parameters:
tag - DOCUMENT ME!
attribs - DOCUMENT ME!
pos - DOCUMENT ME!

handleStartTag

public void handleStartTag(javax.swing.text.html.HTML.Tag tag,
                           javax.swing.text.MutableAttributeSet attribs,
                           int pos)
Opening tag

Overrides:
handleStartTag in class javax.swing.text.html.HTMLEditorKit.ParserCallback
Parameters:
tag - DOCUMENT ME!
attribs - DOCUMENT ME!
pos - DOCUMENT ME!

handleText

public void handleText(char[] text,
                       int pos)
Handle page text

Overrides:
handleText in class javax.swing.text.html.HTMLEditorKit.ParserCallback
Parameters:
text - DOCUMENT ME!
pos - DOCUMENT ME!

parse

public void parse(java.io.InputStream in)
Parse Content.

Specified by:
parse in interface ContentHandler
Parameters:
in - DOCUMENT ME!


Copyright © 1999-2005 Apache Software Foundation. All Rights Reserved.