K
- the type of the input keysV
- the type of the input valuespublic class KMeansClustering<K,V> extends BaseJob<K,V>
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
ARG_CLUSTERS_POINTS |
static java.lang.String |
ARG_CRIT_FUN_CLASS |
static java.lang.String |
ARG_DELETE_CLUSTER_FILES |
static java.lang.String |
ARG_K |
static java.lang.String |
ARG_MAX_ITERATIONS |
static java.lang.String |
ARG_MAX_MEMBER_DISTANCE |
static java.lang.String |
ARG_SHAPE_GEN_CLASS |
Constructor and Description |
---|
KMeansClustering() |
Modifier and Type | Method and Description |
---|---|
void |
configure(JobConf conf)
Validates and adds the current parameters to the job configuration
|
double[] |
getClustersPoints()
Gets the initial clusters points
|
java.lang.String |
getCmdOptions()
Gets a description of the arguments expected from command line.
|
java.lang.Class<? extends CriterionFunction> |
getCritetionFunctionClass()
Gets the
CriterionFunction subclass to be used |
java.util.Map<java.lang.String,java.lang.Object> |
getCurrentCmdArgs(Configuration conf)
Returns the current driver properties in a map where each key-value is a name and value of a command line argument.
|
Path |
getInClustersPath()
Gets the path where the input cluster information is located
|
int |
getIteration()
Gets the current iteration number
|
int |
getK()
Gets the number of clusters
|
int |
getMaxIterations()
Gets the maximum number of iterations allowed
|
java.lang.Class<? extends ClusterShapeGenerator> |
getShapeGeneratorClass()
Gets the
ClusterShapeGenerator subclass to be used |
Path |
getWorkDirPath()
Gets the path where the mapreduce output is stored at each iteration
|
boolean |
isDeletePreviousClusterFiles()
Specifies whether the output generated by previous iterations should be removed or not
|
static <K,V> void |
main(java.lang.String[] args) |
void |
processArgs(java.lang.String[] args, Configuration conf)
Extracts and validates arguments from the command line
|
int |
run(java.lang.String[] args) |
KMeans.KMeansIterationResult |
runIteration(int iteration, JobConf baseConf)
Launches a mapreduce job to run a single iteration
|
boolean |
runIterations(int maxIterations, JobConf baseConf)
Runs the K Means clustering algorithm using the given configuration
|
void |
setClustersPoints(double[] clustersPoints)
Sets the initial clusters points
|
void |
setCritetionFunctionClass(java.lang.Class<? extends CriterionFunction> critetionFunctionClass)
Sets the
CriterionFunction subclass to be used |
void |
setDeletePreviousClusterFiles(boolean deletePreviousClusterFiles)
Specifies whether the output generated by previous iterations should be removed or not
|
void |
setInClustersPath(Path inClustersPath)
Sets the path where the input cluster information is located
|
void |
setIteration(int iteration)
Sets the current iteration number
|
void |
setK(int k)
Sets the number of clusters
|
void |
setMaxIterations(int maxIterations)
Sets the maximum number of iterations allowed
|
void |
setOutput(java.lang.String output)
Sets the job output
|
void |
setShapeGeneratorClass(java.lang.Class<? extends ClusterShapeGenerator> shapeGeneratorClass)
Sets the
ClusterShapeGenerator subclass to be used |
getCmdOptionsWithInputDataSets, getCurrentCmdArgsAsString, getInput, getInputDataSet, getInputFormatClass, getJarClass, getOutput, getRecordInfoProviderClass, getSpatialConfig, setInput, setInputDataSet, setInputFormatClass, setJarClass, setRecordInfoProviderClass, setSpatialConfig
public static final java.lang.String ARG_CLUSTERS_POINTS
public static final java.lang.String ARG_CRIT_FUN_CLASS
public static final java.lang.String ARG_DELETE_CLUSTER_FILES
public static final java.lang.String ARG_K
public static final java.lang.String ARG_MAX_ITERATIONS
public static final java.lang.String ARG_MAX_MEMBER_DISTANCE
public static final java.lang.String ARG_SHAPE_GEN_CLASS
public void configure(JobConf conf) throws java.lang.Exception
BaseJob
public double[] getClustersPoints()
public java.lang.String getCmdOptions()
BaseJob
getCmdOptions
in class BaseJob<K,V>
public java.lang.Class<? extends CriterionFunction> getCritetionFunctionClass()
CriterionFunction
subclass to be usedCriterionFunction
subclasspublic java.util.Map<java.lang.String,java.lang.Object> getCurrentCmdArgs(Configuration conf)
BaseJob
getCurrentCmdArgs
in class BaseJob<K,V>
conf
- a job configurationpublic Path getInClustersPath()
public int getIteration()
public int getK()
public int getMaxIterations()
public java.lang.Class<? extends ClusterShapeGenerator> getShapeGeneratorClass()
ClusterShapeGenerator
subclass to be usedClusterShapeGenerator
subclasspublic Path getWorkDirPath()
public boolean isDeletePreviousClusterFiles()
public static <K,V> void main(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
public void processArgs(java.lang.String[] args, Configuration conf) throws java.lang.Exception
BaseJob
processArgs
in class BaseJob<K,V>
args
- arguments from the command lineconf
- the job configurationjava.lang.Exception
public int run(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
public KMeans.KMeansIterationResult runIteration(int iteration, JobConf baseConf) throws java.lang.Exception
iteration
- the iteration numberbaseConf
- the job configurationKMeans.KMeansIterationResult
containing the results of the iterationjava.lang.Exception
public boolean runIterations(int maxIterations, JobConf baseConf) throws java.lang.Exception
maxIterations
- the maximum number of iterations. If zero is passed, a default value based on the number of clusters will be usedbaseConf
- the job configurationjava.lang.Exception
public void setClustersPoints(double[] clustersPoints)
clustersPoints
- an array of point ordinates in the form x1,y1,x2,y2,...,xK,yKpublic void setCritetionFunctionClass(java.lang.Class<? extends CriterionFunction> critetionFunctionClass)
CriterionFunction
subclass to be usedcritetionFunctionClass
- a CriterionFunction
subclasspublic void setDeletePreviousClusterFiles(boolean deletePreviousClusterFiles)
deletePreviousClusterFiles
- true if intermediate output should be removedpublic void setInClustersPath(Path inClustersPath)
inClustersPath
- a pathpublic void setIteration(int iteration)
iteration
- the current iteration numberpublic void setK(int k)
k
- the number of clusterspublic void setMaxIterations(int maxIterations)
maxIterations
- the maximum number of iterations allowedpublic void setOutput(java.lang.String output)
BaseJob
public void setShapeGeneratorClass(java.lang.Class<? extends ClusterShapeGenerator> shapeGeneratorClass)
ClusterShapeGenerator
subclass to be usedshapeGeneratorClass
- a ClusterShapeGenerator
subclassCopyright © 2017 Oracle and/or its affiliates. All Rights Reserved.