|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.apache.hadoop.io.VersionedWritable org.apache.nutch.parse.ParseData
public final class ParseData
Data extracted from a page's content.
Parse.getData()
Field Summary | |
---|---|
static String |
DIR_NAME
|
Constructor Summary | |
---|---|
ParseData()
|
|
ParseData(ParseStatus status,
String title,
Outlink[] outlinks,
Metadata contentMeta)
|
|
ParseData(ParseStatus status,
String title,
Outlink[] outlinks,
Metadata contentMeta,
Metadata parseMeta)
|
|
ParseData(ParseStatus status,
String title,
Outlink[] outlinks,
Metadata contentMeta,
Metadata parseMeta,
DocumentFragment root,
HTMLMetaTags metaTags)
|
Method Summary | |
---|---|
boolean |
equals(Object o)
|
Configuration |
getConf()
Return the configuration used by this object. |
Metadata |
getContentMeta()
The original Metadata retrieved from content |
DocumentFragment |
getDOMRoot()
Retrieve the DOM, if there is one. |
String |
getMeta(String name)
Get a metadata single value. |
HTMLMetaTags |
getMetaTag()
Returns the HTML meta tags which are populated by parsing the meta tags in the head of an HTML document. |
Outlink[] |
getOutlinks()
The outlinks of the page. |
Metadata |
getParseMeta()
Other content properties. |
ParseStatus |
getStatus()
The status of parsing the page. |
String |
getTitle()
The title of the page. |
byte |
getVersion()
Return the version number of the current implementation. |
int |
hashCode()
|
static ParseData |
read(DataInput in)
|
void |
readFields(DataInput in)
Reads the fields of this object from in . |
void |
setConf(Configuration conf)
Set the configuration to be used by this object. |
void |
setDOMRoot(DocumentFragment root)
Set the DOM. |
void |
setMetaTag(HTMLMetaTags metaTags)
|
void |
setParseMeta(Metadata parseMeta)
|
String |
toString()
|
void |
write(DataOutput out)
Writes the fields of this object to out . |
Methods inherited from class java.lang.Object |
---|
clone, finalize, getClass, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
public static final String DIR_NAME
Constructor Detail |
---|
public ParseData()
public ParseData(ParseStatus status, String title, Outlink[] outlinks, Metadata contentMeta)
public ParseData(ParseStatus status, String title, Outlink[] outlinks, Metadata contentMeta, Metadata parseMeta)
public ParseData(ParseStatus status, String title, Outlink[] outlinks, Metadata contentMeta, Metadata parseMeta, DocumentFragment root, HTMLMetaTags metaTags)
Method Detail |
---|
public ParseStatus getStatus()
public String getTitle()
public Outlink[] getOutlinks()
public Metadata getContentMeta()
public Metadata getParseMeta()
public void setParseMeta(Metadata parseMeta)
public String getMeta(String name)
getContentMeta()
,
getParseMeta()
public DocumentFragment getDOMRoot()
public void setDOMRoot(DocumentFragment root)
root
- public HTMLMetaTags getMetaTag()
public void setMetaTag(HTMLMetaTags metaTags)
public byte getVersion()
VersionedWritable
getVersion
in class VersionedWritable
public final void readFields(DataInput in) throws IOException
Writable
in
. For efficiency,
implementations should attempt to re-use storage in the existing object
where possible.
readFields
in interface Writable
readFields
in class VersionedWritable
IOException
public final void write(DataOutput out) throws IOException
Writable
out
.
write
in interface Writable
write
in class VersionedWritable
IOException
public static ParseData read(DataInput in) throws IOException
IOException
public boolean equals(Object o)
equals
in class Object
public int hashCode()
hashCode
in class Object
public String toString()
toString
in class Object
public void setConf(Configuration conf)
Configurable
setConf
in interface Configurable
public Configuration getConf()
Configurable
getConf
in interface Configurable
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |