|
||||||||||
PREV NEXT | FRAMES NO FRAMES |
org.apache.* |
---|
org.apache.hadoop.fs.Path | ||
---|---|---|
public static final String |
SEPARATOR |
"/" |
public static final char |
SEPARATOR_CHAR |
47 |
org.apache.hadoop.io.MD5Hash | ||
---|---|---|
public static final int |
MD5_LEN |
16 |
org.apache.nutch.crawl.CrawlDatum | ||
---|---|---|
public static final String |
FETCH_DIR_NAME |
"crawl_fetch" |
public static final String |
GENERATE_DIR_NAME |
"crawl_generate" |
public static final String |
PARSE_DIR_NAME |
"crawl_parse" |
public static final byte |
STATUS_DB_FETCHED |
2 |
public static final byte |
STATUS_DB_GONE |
3 |
public static final byte |
STATUS_DB_MAX |
31 |
public static final byte |
STATUS_DB_REDIR_PERM |
5 |
public static final byte |
STATUS_DB_REDIR_TEMP |
4 |
public static final byte |
STATUS_DB_UNFETCHED |
1 |
public static final byte |
STATUS_FETCH_CONTENT_LIMIT_EXCEEDED |
38 |
public static final byte |
STATUS_FETCH_GONE |
37 |
public static final byte |
STATUS_FETCH_MAX |
63 |
public static final byte |
STATUS_FETCH_REDIR_PERM |
36 |
public static final byte |
STATUS_FETCH_REDIR_TEMP |
35 |
public static final byte |
STATUS_FETCH_RETRY |
34 |
public static final byte |
STATUS_FETCH_SUCCESS |
33 |
public static final byte |
STATUS_INJECTED |
66 |
public static final byte |
STATUS_LINKED |
67 |
public static final byte |
STATUS_SIGNATURE |
65 |
org.apache.nutch.metadata.CreativeCommons | ||
---|---|---|
public static final String |
LICENSE_LOCATION |
"License-Location" |
public static final String |
LICENSE_URL |
"License-Url" |
public static final String |
WORK_TYPE |
"Work-Type" |
org.apache.nutch.metadata.DublinCore | ||
---|---|---|
public static final String |
CONTRIBUTOR |
"contributor" |
public static final String |
COVERAGE |
"coverage" |
public static final String |
CREATOR |
"creator" |
public static final String |
DATE |
"date" |
public static final String |
DESCRIPTION |
"description" |
public static final String |
FORMAT |
"format" |
public static final String |
IDENTIFIER |
"identifier" |
public static final String |
LANGUAGE |
"language" |
public static final String |
MODIFIED |
"modified" |
public static final String |
PUBLISHER |
"publisher" |
public static final String |
RELATION |
"relation" |
public static final String |
RIGHTS |
"rights" |
public static final String |
SOURCE |
"source" |
public static final String |
SUBJECT |
"subject" |
public static final String |
TITLE |
"title" |
public static final String |
TYPE |
"type" |
org.apache.nutch.metadata.HttpHeaders | ||
---|---|---|
public static final String |
CONTENT_DISPOSITION |
"Content-Disposition" |
public static final String |
CONTENT_ENCODING |
"Content-Encoding" |
public static final String |
CONTENT_LANGUAGE |
"Content-Language" |
public static final String |
CONTENT_LENGTH |
"Content-Length" |
public static final String |
CONTENT_LOCATION |
"Content-Location" |
public static final String |
CONTENT_MD5 |
"Content-MD5" |
public static final String |
CONTENT_TYPE |
"Content-Type" |
public static final String |
LAST_MODIFIED |
"Last-Modified" |
public static final String |
LOCATION |
"Location" |
org.apache.nutch.metadata.Nutch | ||
---|---|---|
public static final String |
CACHING_FORBIDDEN_ALL |
"all" |
public static final String |
CACHING_FORBIDDEN_CONTENT |
"content" |
public static final String |
CACHING_FORBIDDEN_KEY |
"caching.forbidden" |
public static final String |
CACHING_FORBIDDEN_NONE |
"none" |
public static final String |
CHAR_ENCODING_FOR_CONVERSION |
"CharEncodingForConversion" |
public static final String |
GENERATE_TIME_KEY |
"_ngt_" |
public static final String |
ORIGINAL_CHAR_ENCODING |
"OriginalCharEncoding" |
public static final String |
PROTO_STATUS_KEY |
"_pst_" |
public static final String |
SCORE_KEY |
"nutch.crawl.score" |
public static final String |
SEGMENT_NAME_KEY |
"nutch.segment.name" |
public static final String |
SIGNATURE_KEY |
"nutch.content.digest" |
org.apache.nutch.metadata.Office | ||
---|---|---|
public static final String |
APPLICATION_NAME |
"Application-Name" |
public static final String |
AUTHOR |
"Author" |
public static final String |
CHARACTER_COUNT |
"Character Count" |
public static final String |
COMMENTS |
"Comments" |
public static final String |
KEYWORDS |
"Keywords" |
public static final String |
LAST_AUTHOR |
"Last-Author" |
public static final String |
LAST_PRINTED |
"Last-Printed" |
public static final String |
LAST_SAVED |
"Last-Save-Date" |
public static final String |
PAGE_COUNT |
"Page-Count" |
public static final String |
REVISION_NUMBER |
"Revision-Number" |
public static final String |
TEMPLATE |
"Template" |
public static final String |
WORD_COUNT |
"Word-Count" |
org.apache.nutch.net.URLFilters | ||
---|---|---|
public static final String |
URLFILTER_ORDER |
"urlfilter.order" |
org.apache.nutch.net.URLNormalizers | ||
---|---|---|
public static final String |
SCOPE_CRAWLDB |
"crawldb" |
public static final String |
SCOPE_DEFAULT |
"default" |
public static final String |
SCOPE_FETCHER |
"fetcher" |
public static final String |
SCOPE_GENERATE_HOST_COUNT |
"generate_host_count" |
public static final String |
SCOPE_INJECT |
"inject" |
public static final String |
SCOPE_LINKDB |
"linkdb" |
public static final String |
SCOPE_OUTLINK |
"outlink" |
public static final String |
SCOPE_PARTITION |
"partition" |
org.apache.nutch.parse.HtmlParseFilters | ||
---|---|---|
public static final String |
PARSEFILTER_ORDER |
"parser.filters.order" |
org.apache.nutch.parse.ParseData | ||
---|---|---|
public static final String |
DIR_NAME |
"parse_data" |
org.apache.nutch.parse.ParserFactory | ||
---|---|---|
public static final String |
DEFAULT_PLUGIN |
"*" |
org.apache.nutch.parse.ParseStatus | ||
---|---|---|
public static final byte |
FAILED |
2 |
public static final short |
FAILED_EXCEPTION |
200 |
public static final short |
FAILED_INVALID_FORMAT |
203 |
public static final short |
FAILED_MISSING_CONTENT |
205 |
public static final short |
FAILED_MISSING_PARTS |
204 |
public static final short |
FAILED_TRUNCATED |
202 |
public static final byte |
NOTPARSED |
0 |
public static final byte |
SUCCESS |
1 |
public static final short |
SUCCESS_REDIRECT |
100 |
org.apache.nutch.parse.ParseText | ||
---|---|---|
public static final String |
DIR_NAME |
"parse_text" |
org.apache.nutch.protocol.Content | ||
---|---|---|
public static final String |
DIR_NAME |
"content" |
org.apache.nutch.protocol.Protocol | ||
---|---|---|
public static final String |
CHECK_BLOCKING |
"protocol.plugin.check.blocking" |
public static final String |
CHECK_ROBOTS |
"protocol.plugin.check.robots" |
org.apache.nutch.protocol.ProtocolStatus | ||
---|---|---|
public static final int |
ACCESS_DENIED |
17 |
public static final int |
BLOCKED |
23 |
public static final int |
EXCEPTION |
16 |
public static final int |
FAILED |
2 |
public static final int |
GONE |
11 |
public static final int |
MOVED |
12 |
public static final int |
NOTFETCHING |
20 |
public static final int |
NOTFOUND |
14 |
public static final int |
NOTMODIFIED |
21 |
public static final int |
PROTO_NOT_FOUND |
10 |
public static final int |
REDIR_EXCEEDED |
19 |
public static final int |
RETRY |
15 |
public static final int |
ROBOTS_DENIED |
18 |
public static final int |
SUCCESS |
1 |
public static final int |
TEMP_MOVED |
13 |
public static final int |
WOULDBLOCK |
22 |
org.apache.nutch.util.NutchConfiguration | ||
---|---|---|
public static final String |
CONFIG_DEFAULT |
"default.xml" |
public static final String |
CONFIG_SITE |
"site.xml" |
org.apache.nutch.util.mime.MimeTypes | ||
---|---|---|
public static final String |
DEFAULT |
"application/octet-stream" |
|
||||||||||
PREV NEXT | FRAMES NO FRAMES |