org.apache.lenya.search.crawler
Class CrawlerConfiguration

java.lang.Object
  extended by org.apache.lenya.search.crawler.CrawlerConfiguration

public class CrawlerConfiguration
extends java.lang.Object

Web-Crawler (it might make sense to replace this by Nutch)


Constructor Summary
CrawlerConfiguration(java.lang.String configurationFilePath)
          Creates a new CrawlerConfiguration object.
 
Method Summary
 void configure(org.w3c.dom.Element root)
          Extract parameters from configuration
 java.lang.String getBaseURL()
          DOCUMENT ME!
 java.lang.String getHTDocsDumpDir()
          Get htdocs-dump-dir/@src
 java.lang.String getHTDocsDumpDirResolved()
          Get htdocs-dump-dir/@src as absolute path
 java.lang.String getRobotsDomain()
          Get robots/@domain
 java.lang.String getRobotsFile()
          Get robots/@src
 java.lang.String getRobotsFileResolved()
          Get robots/@src as absolute path
 java.lang.String getScopeURL()
          DOCUMENT ME!
 java.lang.String getURIList()
          Get URI list path
 java.lang.String getURIListResolved()
          Get URI list path as absolute path
 java.lang.String getUserAgent()
          DOCUMENT ME!
static void main(java.lang.String[] args)
          DOCUMENT ME!
 java.lang.String resolvePath(java.lang.String path)
          Resolve path
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

CrawlerConfiguration

public CrawlerConfiguration(java.lang.String configurationFilePath)
Creates a new CrawlerConfiguration object.

Parameters:
configurationFilePath - DOCUMENT ME!
Method Detail

main

public static void main(java.lang.String[] args)
DOCUMENT ME!

Parameters:
args - DOCUMENT ME!

configure

public void configure(org.w3c.dom.Element root)
               throws java.lang.Exception
Extract parameters from configuration

Parameters:
configuration - DOCUMENT ME!
Throws:
java.lang.Exception - DOCUMENT ME!

getBaseURL

public java.lang.String getBaseURL()
DOCUMENT ME!

Returns:
DOCUMENT ME!

getScopeURL

public java.lang.String getScopeURL()
DOCUMENT ME!

Returns:
DOCUMENT ME!

getUserAgent

public java.lang.String getUserAgent()
DOCUMENT ME!

Returns:
DOCUMENT ME!

getURIList

public java.lang.String getURIList()
Get URI list path

Returns:
URI list path

getURIListResolved

public java.lang.String getURIListResolved()
Get URI list path as absolute path

Returns:
URI list path

getHTDocsDumpDir

public java.lang.String getHTDocsDumpDir()
Get htdocs-dump-dir/@src

Returns:
htdocs-dump-dir/@src

getHTDocsDumpDirResolved

public java.lang.String getHTDocsDumpDirResolved()
Get htdocs-dump-dir/@src as absolute path

Returns:
htdocs-dump-dir/@src

getRobotsFile

public java.lang.String getRobotsFile()
Get robots/@src

Returns:
robots/@src

getRobotsFileResolved

public java.lang.String getRobotsFileResolved()
Get robots/@src as absolute path

Returns:
robots/@src

getRobotsDomain

public java.lang.String getRobotsDomain()
Get robots/@domain

Returns:
robots/@domain

resolvePath

public java.lang.String resolvePath(java.lang.String path)
Resolve path

Parameters:
path - Original path
Returns:
Resolved path


Copyright © 1999-2005 Apache Software Foundation. All Rights Reserved.