public class CrawlDatum extends Object implements WritableComparable, Cloneable
Modifier and Type | Class and Description |
---|---|
static class |
CrawlDatum.Comparator
A Comparator optimized for CrawlDatum.
|
Modifier and Type | Field and Description |
---|---|
static String |
FETCH_DIR_NAME |
static String |
GENERATE_DIR_NAME |
static String |
PARSE_DIR_NAME |
static HashMap<Byte,String> |
statNames |
static byte |
STATUS_DB_FETCHED
Page was successfully fetched.
|
static byte |
STATUS_DB_GONE
Page no longer exists.
|
static byte |
STATUS_DB_MAX
Maximum value of DB-related status.
|
static byte |
STATUS_DB_REDIR_PERM
Page permanently redirects to other page.
|
static byte |
STATUS_DB_REDIR_TEMP
Page temporarily redirects to other page.
|
static byte |
STATUS_DB_UNFETCHED
Page was not fetched yet.
|
static byte |
STATUS_FETCH_CONTENT_LIMIT_EXCEEDED
Fetching was successful but content was truncated
|
static byte |
STATUS_FETCH_GONE
Fetching unsuccessful - page is gone.
|
static byte |
STATUS_FETCH_MAX
Maximum value of fetch-related status.
|
static byte |
STATUS_FETCH_REDIR_PERM
Fetching permanently redirected to other page.
|
static byte |
STATUS_FETCH_REDIR_TEMP
Fetching temporarily redirected to other page.
|
static byte |
STATUS_FETCH_RETRY
Fetching unsuccessful, needs to be retried (transient errors).
|
static byte |
STATUS_FETCH_SUCCESS
Fetching was successful.
|
static byte |
STATUS_INJECTED
Page was newly injected.
|
static byte |
STATUS_LINKED
Page discovered through a link.
|
static byte |
STATUS_SIGNATURE
Page signature.
|
Constructor and Description |
---|
CrawlDatum() |
CrawlDatum(int status,
float fetchInterval) |
CrawlDatum(int status,
float fetchInterval,
float score) |
Modifier and Type | Method and Description |
---|---|
Object |
clone() |
int |
compareTo(Object o)
Sort by decreasing score.
|
boolean |
equals(Object o) |
float |
getFetchInterval() |
long |
getFetchTime() |
MapWritable |
getMetaData()
returns a MapWritable if it was set or read in @see readFields(DataInput),
returns empty map in case CrawlDatum was freshly created (lazily instantiated).
|
long |
getModifiedTime() |
long |
getResponseCode() |
byte |
getRetriesSinceFetch() |
long |
getRobotsDelay() |
float |
getScore() |
byte[] |
getSignature() |
byte |
getStatus() |
static String |
getStatusName(byte value) |
static boolean |
hasDbStatus(CrawlDatum datum) |
static boolean |
hasFetchStatus(CrawlDatum datum) |
int |
hashCode() |
static CrawlDatum |
read(DataInput in) |
void |
readFields(DataInput in)
Reads the fields of this object from
in . |
void |
set(CrawlDatum that)
Copy the contents of another instance into this instance.
|
void |
setFetchInterval(float fetchInterval) |
void |
setFetchTime(long fetchTime) |
void |
setMetaData(MapWritable mapWritable) |
void |
setModifiedTime(long modifiedTime) |
void |
setNextFetchTime() |
void |
setResponseCode(int responseCode) |
void |
setRetriesSinceFetch(int retries) |
void |
setRobotsDelay(long robotsDelay) |
void |
setScore(float score) |
void |
setSignature(byte[] signature) |
void |
setStatus(int status) |
String |
toString() |
void |
write(DataOutput out)
Writes the fields of this object to
out . |
public static final String GENERATE_DIR_NAME
public static final String FETCH_DIR_NAME
public static final String PARSE_DIR_NAME
public static final byte STATUS_DB_UNFETCHED
public static final byte STATUS_DB_FETCHED
public static final byte STATUS_DB_GONE
public static final byte STATUS_DB_REDIR_TEMP
public static final byte STATUS_DB_REDIR_PERM
public static final byte STATUS_DB_MAX
public static final byte STATUS_FETCH_SUCCESS
public static final byte STATUS_FETCH_RETRY
public static final byte STATUS_FETCH_REDIR_TEMP
public static final byte STATUS_FETCH_REDIR_PERM
public static final byte STATUS_FETCH_GONE
public static final byte STATUS_FETCH_CONTENT_LIMIT_EXCEEDED
public static final byte STATUS_FETCH_MAX
public static final byte STATUS_SIGNATURE
public static final byte STATUS_INJECTED
public static final byte STATUS_LINKED
public CrawlDatum()
public CrawlDatum(int status, float fetchInterval)
public CrawlDatum(int status, float fetchInterval, float score)
public static boolean hasDbStatus(CrawlDatum datum)
public static boolean hasFetchStatus(CrawlDatum datum)
public byte getStatus()
public static String getStatusName(byte value)
public void setStatus(int status)
public long getFetchTime()
public void setFetchTime(long fetchTime)
public long getRobotsDelay()
public void setRobotsDelay(long robotsDelay)
public long getResponseCode()
public void setResponseCode(int responseCode)
public void setNextFetchTime()
public long getModifiedTime()
public void setModifiedTime(long modifiedTime)
public byte getRetriesSinceFetch()
public void setRetriesSinceFetch(int retries)
public float getFetchInterval()
public void setFetchInterval(float fetchInterval)
public float getScore()
public void setScore(float score)
public byte[] getSignature()
public void setSignature(byte[] signature)
public void setMetaData(MapWritable mapWritable)
public MapWritable getMetaData()
public static CrawlDatum read(DataInput in) throws IOException
IOException
public void readFields(DataInput in) throws IOException
Writable
in
. For efficiency,
implementations should attempt to re-use storage in the existing object
where possible.readFields
in interface Writable
IOException
public void write(DataOutput out) throws IOException
Writable
out
.write
in interface Writable
IOException
public void set(CrawlDatum that)
public int compareTo(Object o)
compareTo
in interface Comparable
Copyright © 2007, 2014, Oracle and/or its affiliates. All rights reserved.