auto-proxy
|
Specifies the proxy setting for the robot. It can be a proxy server
or a JavaScript file for automatically configuring the proxy.
|
auto-proxy=”http://
proxy_server/proxy.pac”
|
bindir
|
Specifies whether the robot will add a bind directory to the PATH environment.
This is an extra PATH for users to run an external program in a robot, such
as those specified by cmd-hook parameter.
|
bindir=path
|
cmd-hook
|
Specifies an external completion script to run after the robot completes
one run. This must be a full path to the command name. The robot will execute
this script from the /var/opt/SUNWportal/ directory. There
is no default.
There must be at least one RD registered for the command to run.
|
cmd-hook=”command-string”
|
command-port
|
Specifies the socket that the robot listens to in order to accept commands
from other programs, such as the Administration Interface or robot control
panels.
For security reasons, the robot can accept commands only from the local
host unless remote-access is set to yes.
|
command-port=port_number
|
connect-timeout
|
Specifies the maximum time allowed for a network to respond to a connection
request. The default is 120 seconds.
|
command-timeout=seconds
|
convert-timeout
|
Specifies the maximum time allowed for document conversion. The default
is 600 seconds.
|
convert-timeout=seconds
|
depth
|
Specifies the number of links from the seed URLs (also referred to as
starting point) that the robot will examine. This parameter sets the default
value for any seed URLs that do not specify a depth. The default is 10.
A value of negative one (depth=-1) indicates that the link depth is
infinite.
|
depth=integer
|
email
|
Specifies the email address of the person who runs the robot.
The email address is sent with the user-agent in the HTTP request header,
so that Web managers can contact the people who run robots at their sites.
The default is user@domain.
|
email=user@hostname
|
enable-ip
|
Generates an IP address for the URL for each RD that is created. The
default is true.
|
enable-ip=[true | yes | false | no]
|
enable-rdm-probe
|
Determines if the server supports RDM, the robot decides whether to
query each server it encounters by using this parameter. If the server supports
RDM, the robot will not attempt to enumerate the server’s resources,
since that server is able to act as its own resource description server. The
default is false.
|
enable-rdm-probe=
[true | false | yes | no]
|
enable-robots-txt
|
Determines if the robot should check the robots.txt file at each site
it visits, if available. The default is yes.
|
enable-robots-txt=
[true | false | yes | no]
|
engine-concurrent
|
Specifies the number of pre-created threads for the robot to use. The
default is 10.
This parameter cannot be set interactively through the administration
console.
|
engine-concurrent=[1..100]
|
enumeration-filter
|
Specifies the enumeration filter that is used by the robot to determine
if a resource should be enumerated. The value must be the name of a filter
defined in the file filter.conf. The default is enumeration-default.
This parameter cannot be set interactively through the administration
console.
|
enumeration-filter=
enumfiltername
|
generation-filter
|
Specifies the number of minutes that the robot should collect RDs before
batching them for the Search Engine.
If you do not specify this parameter, it is set to 256 minutes.
|
generation-filter=genfiltername
|
index-after-ngenerated
|
Specifies the number of minutes that the robot should collect RDs before
batching them for the Search Engine.
If you do not specify this parameter, it is set to 256 minutes.
|
index-after-ngenerated=30
|
loglevel
|
Specifies the levels of logging. The loglevel values are as follows:
-
Level 0: log nothing but serious errors
-
Level 1: also log RD generation (default)
-
Level 2: also log retrieval activity
-
Level 3: also log filtering activity
-
Level 4: also log spawning activity
-
Level 5: also log retrieval progress
The default
value is 1.
|
loglevel=[0...100]
|
max-connections
|
Specifies the maximum number of concurrent retrievals that a robot can
make. The default is 8.
|
max-connections=[1..100]
|
max-filesize-kb
|
Specifies the maximum file size in kilobytes for files retrieved by
the robot. The default is 10240.
|
max-filesize-kb=1024
|
max-memory-per-url / max-memory
|
Specifies the maximum memory in bytes used by each URL. If the URL needs
more memory, the RD is saved to disk. The default is 64000.
This parameter cannot be set interactively through the administration
console.
|
max-memory-per-url=n_bytes
|
max-working
|
Specifies the size of the robot working set, which is the maximum number
of URLs the robot can work on at one time.
This parameter cannot be set interactively through the administration
console.
|
max-working=1024
|
onCompletion
|
Determines what the robot does after it has completed a run. The robot
can either go into idle mode, loop back and start again, or quit. The default
is idle.
This parameter works with the cmd-hook parameter. When the robot is
done, it will do the action of onCompletion and then run the cmd-hook program.
|
OnCompletion=[idle | loop | quit]
|
password
|
Specifies the password is used for httpd authentication and ftp connection.
|
password=string
|
referer
|
Specifies the parameter sent in the HTTP request if it is set to identify
the robot as the referer when accessing Web pages
|
referer=string
|
remote-access
|
This parameter determines if the robot can accept commands from remote
hosts. The default is false.
|
remote-access=[true | false | yes | no]
|
robot-state-dir
|
Specifies the directory where the robot saves its state. In this working
directory, the robot can record the number of collected RDs and so on.
|
robot-state-dir="/var/opt/
SUNWportal/ins tance/
portal/robot”
|
server-delay
|
Specifies the time period between two visits to the same web site, thus
preventing the robot from accessing the same site too frequently.
|
server-delay=delay_in_seconds
|
site-max-connections
|
Indicates the maximum number of concurrent connections that a robot
can make to any one site. The default is 2.
|
site-max-connections=[1..100]
|
smart-host-heuristics
|
Enables the robot to change sites that are rotating their DNS canonical
host names. For example, www123.siroe.com is changed to www.siroe.com. The
default is false.
|
smart-host-heuristics=[true | false]
|
tmpdir
|
Specifies a place for the robot to create temporary files. Use this
value to set the environment variable TMPDIR.
|
tmpdir=path
|
user-agent
|
Specifies the parameter sent with the email address in the http-request
to the server.
|
user-agent=iPlanetRobot/4.0
|
username
|
Specifies the user name of the user who runs the robot and is used for
httpd authentication and ftp connection. The default is anonymous.
|
username=string
|