|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.apache.nutch.crawl.CrawlDatum
public class CrawlDatum
Nested Class Summary | |
---|---|
static class |
CrawlDatum.Comparator
A Comparator optimized for CrawlDatum. |
Field Summary | |
---|---|
static String |
FETCH_DIR_NAME
|
static String |
GENERATE_DIR_NAME
|
static String |
PARSE_DIR_NAME
|
static HashMap<Byte,String> |
statNames
|
static byte |
STATUS_DB_FETCHED
Page was successfully fetched. |
static byte |
STATUS_DB_GONE
Page no longer exists. |
static byte |
STATUS_DB_MAX
Maximum value of DB-related status. |
static byte |
STATUS_DB_REDIR_PERM
Page permanently redirects to other page. |
static byte |
STATUS_DB_REDIR_TEMP
Page temporarily redirects to other page. |
static byte |
STATUS_DB_UNFETCHED
Page was not fetched yet. |
static byte |
STATUS_FETCH_CONTENT_LIMIT_EXCEEDED
Fetching was successful but content was truncated |
static byte |
STATUS_FETCH_GONE
Fetching unsuccessful - page is gone. |
static byte |
STATUS_FETCH_MAX
Maximum value of fetch-related status. |
static byte |
STATUS_FETCH_REDIR_PERM
Fetching permanently redirected to other page. |
static byte |
STATUS_FETCH_REDIR_TEMP
Fetching temporarily redirected to other page. |
static byte |
STATUS_FETCH_RETRY
Fetching unsuccessful, needs to be retried (transient errors). |
static byte |
STATUS_FETCH_SUCCESS
Fetching was successful. |
static byte |
STATUS_INJECTED
Page was newly injected. |
static byte |
STATUS_LINKED
Page discovered through a link. |
static byte |
STATUS_SIGNATURE
Page signature. |
Constructor Summary | |
---|---|
CrawlDatum()
|
|
CrawlDatum(int status,
float fetchInterval)
|
|
CrawlDatum(int status,
float fetchInterval,
float score)
|
Method Summary | |
---|---|
Object |
clone()
|
int |
compareTo(Object o)
Sort by decreasing score. |
boolean |
equals(Object o)
|
float |
getFetchInterval()
|
long |
getFetchTime()
|
MapWritable |
getMetaData()
returns a MapWritable if it was set or read in @see readFields(DataInput), returns empty map in case CrawlDatum was freshly created (lazily instantiated). |
long |
getModifiedTime()
|
long |
getResponseCode()
|
byte |
getRetriesSinceFetch()
|
long |
getRobotsDelay()
|
float |
getScore()
|
byte[] |
getSignature()
|
byte |
getStatus()
|
static String |
getStatusName(byte value)
|
static boolean |
hasDbStatus(CrawlDatum datum)
|
static boolean |
hasFetchStatus(CrawlDatum datum)
|
int |
hashCode()
|
static CrawlDatum |
read(DataInput in)
|
void |
readFields(DataInput in)
Reads the fields of this object from in . |
void |
set(CrawlDatum that)
Copy the contents of another instance into this instance. |
void |
setFetchInterval(float fetchInterval)
|
void |
setFetchTime(long fetchTime)
|
void |
setMetaData(MapWritable mapWritable)
|
void |
setModifiedTime(long modifiedTime)
|
void |
setNextFetchTime()
|
void |
setResponseCode(int responseCode)
|
void |
setRetriesSinceFetch(int retries)
|
void |
setRobotsDelay(long robotsDelay)
|
void |
setScore(float score)
|
void |
setSignature(byte[] signature)
|
void |
setStatus(int status)
|
String |
toString()
|
void |
write(DataOutput out)
Writes the fields of this object to out . |
Methods inherited from class java.lang.Object |
---|
finalize, getClass, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
public static final String GENERATE_DIR_NAME
public static final String FETCH_DIR_NAME
public static final String PARSE_DIR_NAME
public static final byte STATUS_DB_UNFETCHED
public static final byte STATUS_DB_FETCHED
public static final byte STATUS_DB_GONE
public static final byte STATUS_DB_REDIR_TEMP
public static final byte STATUS_DB_REDIR_PERM
public static final byte STATUS_DB_MAX
public static final byte STATUS_FETCH_SUCCESS
public static final byte STATUS_FETCH_RETRY
public static final byte STATUS_FETCH_REDIR_TEMP
public static final byte STATUS_FETCH_REDIR_PERM
public static final byte STATUS_FETCH_GONE
public static final byte STATUS_FETCH_CONTENT_LIMIT_EXCEEDED
public static final byte STATUS_FETCH_MAX
public static final byte STATUS_SIGNATURE
public static final byte STATUS_INJECTED
public static final byte STATUS_LINKED
public static final HashMap<Byte,String> statNames
Constructor Detail |
---|
public CrawlDatum()
public CrawlDatum(int status, float fetchInterval)
public CrawlDatum(int status, float fetchInterval, float score)
Method Detail |
---|
public static boolean hasDbStatus(CrawlDatum datum)
public static boolean hasFetchStatus(CrawlDatum datum)
public byte getStatus()
public static String getStatusName(byte value)
public void setStatus(int status)
public long getFetchTime()
public void setFetchTime(long fetchTime)
public long getRobotsDelay()
public void setRobotsDelay(long robotsDelay)
public long getResponseCode()
public void setResponseCode(int responseCode)
public void setNextFetchTime()
public long getModifiedTime()
public void setModifiedTime(long modifiedTime)
public byte getRetriesSinceFetch()
public void setRetriesSinceFetch(int retries)
public float getFetchInterval()
public void setFetchInterval(float fetchInterval)
public float getScore()
public void setScore(float score)
public byte[] getSignature()
public void setSignature(byte[] signature)
public void setMetaData(MapWritable mapWritable)
public MapWritable getMetaData()
public static CrawlDatum read(DataInput in) throws IOException
IOException
public void readFields(DataInput in) throws IOException
Writable
in
. For efficiency,
implementations should attempt to re-use storage in the existing object
where possible.
readFields
in interface Writable
IOException
public void write(DataOutput out) throws IOException
Writable
out
.
write
in interface Writable
IOException
public void set(CrawlDatum that)
public int compareTo(Object o)
compareTo
in interface Comparable
public String toString()
toString
in class Object
public boolean equals(Object o)
equals
in class Object
public int hashCode()
hashCode
in class Object
public Object clone()
clone
in class Object
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |