public class Partitioning extends MultipleInputsJob
setSamplingRatio(double)
property (by default it is set to 0.1).Modifier and Type | Field and Description |
---|---|
protected int |
defaultNumReduceTasks |
protected static java.lang.String |
HV_RESULT_FOLDER |
protected org.apache.hadoop.fs.Path |
hvResultPath |
static java.lang.String |
PARTITION_RESULT_FILE |
protected org.apache.hadoop.fs.Path |
partitionsPath |
protected static java.lang.String |
SAMPLE_FILE_NAME |
protected org.apache.hadoop.fs.Path |
samplePath |
protected double |
samplingRatio |
inputDataSets, miConf
argsp, inputDataSet, jarClass, jobRegEntryPath, proxyIDS
Constructor and Description |
---|
Partitioning() |
Modifier and Type | Method and Description |
---|---|
protected void |
clean(org.apache.hadoop.conf.Configuration conf) |
void |
configure(org.apache.hadoop.mapred.JobConf jobConf)
Validates and adds the current parameters to the job configuration
|
protected AbstractInputDataSet |
createSampleInputDataSet(org.apache.hadoop.conf.Configuration conf)
Samples the input data and create a file containing the sampled geometries
|
protected void |
defineGlobalBounds()
Defines the dimension boundaries for the partitioning space based on the
SpatialOperationConfig and the SpatialConfig defined for each input data set. |
java.lang.String |
getCmdOptions()
Gets a description of the arguments expected from command line.
|
java.util.Map<java.lang.String,java.lang.Object> |
getCurrentCmdArgs(org.apache.hadoop.conf.Configuration conf)
Returns the current driver properties in a map where each key-value is a name and value of a command line argument.
|
protected void |
getCurrentCmdArgsAsString(java.util.Map<java.lang.String,java.lang.Object> cmdArgs, java.lang.StringBuilder buff) |
protected org.apache.hadoop.fs.Path |
getHVResultPartPath(org.apache.hadoop.fs.Path sampleDir, org.apache.hadoop.conf.Configuration conf) |
protected InputDataSetCmdArgsParserHandler |
getInputDataSetCmdParserHandler(org.apache.hadoop.conf.Configuration conf)
Gets the current instance of
InputDataSetCmdArgsParserHandler used to parse command line parameters for the input data set |
protected InputDataSetConfiguratorHandler |
getInputDataSetConfiguratorHandler(org.apache.hadoop.conf.Configuration conf)
Returns the current instance of
InputDataSetConfiguratorHandler used to configure the input data set |
org.apache.hadoop.fs.Path |
getPartitionsPath() |
protected long |
getPathsLength(org.apache.hadoop.fs.Path[] paths, org.apache.hadoop.conf.Configuration conf)
Deprecated.
|
double |
getSamplingRatio()
Gets the ratio of the sample size to the input data size
|
static void |
main(java.lang.String[] args) |
void |
processArgs(java.lang.String[] args, org.apache.hadoop.conf.Configuration conf)
Extracts and validates arguments from the command line
|
int |
run(java.lang.String[] args) |
boolean |
runFullPartitioningProcess(org.apache.hadoop.mapred.JobConf jobConf)
Runs the full partitioning process.
|
void |
setSamplingRatio(double samplingRatio)
Sets the ratio of the sample size to the input data size so only a fraction of the whole input data is used for partitioning.
|
addInputDataSet, asMultiInputDataSet, configureInputs, configureInputs, getInputListCmdOptions, getInputs, getMultipleInputDataSetsParams, removeInputDataSet, setInputDataSets, updateInputDataSet
addJobRegistryEntry, addJobRegistryEntry, addJobRegistryEntry, configure, createJob, createJob, createJob, createJob, createJobConf, createJobConf, createJobConf, getCmdOptionsWithInputDataSets, getCmdOptionsWithInputDataSets, getCurrentCmdArgsAsString, getInput, getInputDataSet, getInputFormatClass, getJarClass, getOutput, getRecordInfoProviderClass, getSpatialConfig, runJob, runJob, setInput, setInputDataSet, setInputFormatClass, setJarClass, setOutput, setRecordInfoProviderClass, setSpatialConfig
public static final java.lang.String PARTITION_RESULT_FILE
protected static final java.lang.String SAMPLE_FILE_NAME
protected static final java.lang.String HV_RESULT_FOLDER
protected double samplingRatio
protected int defaultNumReduceTasks
protected org.apache.hadoop.fs.Path samplePath
protected org.apache.hadoop.fs.Path hvResultPath
protected org.apache.hadoop.fs.Path partitionsPath
protected InputDataSetConfiguratorHandler getInputDataSetConfiguratorHandler(org.apache.hadoop.conf.Configuration conf)
BaseJob
InputDataSetConfiguratorHandler
used to configure the input data setgetInputDataSetConfiguratorHandler
in class BaseJob<java.lang.Object,java.lang.Object>
conf
- a job configurationInputDataSetConfiguratorHandler
protected InputDataSetCmdArgsParserHandler getInputDataSetCmdParserHandler(org.apache.hadoop.conf.Configuration conf)
BaseJob
InputDataSetCmdArgsParserHandler
used to parse command line parameters for the input data setgetInputDataSetCmdParserHandler
in class BaseJob<java.lang.Object,java.lang.Object>
conf
- a job configurationInputDataSetCmdArgsParserHandler
public org.apache.hadoop.fs.Path getPartitionsPath()
public void setSamplingRatio(double samplingRatio)
samplingRatio
-public double getSamplingRatio()
public void processArgs(java.lang.String[] args, org.apache.hadoop.conf.Configuration conf) throws java.lang.Exception
BaseJob
processArgs
in class MultipleInputsJob
args
- arguments from the command lineconf
- the job configurationjava.lang.Exception
protected void getCurrentCmdArgsAsString(java.util.Map<java.lang.String,java.lang.Object> cmdArgs, java.lang.StringBuilder buff)
getCurrentCmdArgsAsString
in class BaseJob<java.lang.Object,java.lang.Object>
public java.util.Map<java.lang.String,java.lang.Object> getCurrentCmdArgs(org.apache.hadoop.conf.Configuration conf)
BaseJob
getCurrentCmdArgs
in class BaseJob<java.lang.Object,java.lang.Object>
conf
- a job configurationpublic java.lang.String getCmdOptions()
BaseJob
getCmdOptions
in class BaseJob<java.lang.Object,java.lang.Object>
public void configure(org.apache.hadoop.mapred.JobConf jobConf) throws java.lang.Exception
BaseJob
protected void defineGlobalBounds()
SpatialOperationConfig
and the SpatialConfig
defined for each input data set.protected AbstractInputDataSet createSampleInputDataSet(org.apache.hadoop.conf.Configuration conf) throws java.io.IOException
conf
- the job configurationjava.io.IOException
public boolean runFullPartitioningProcess(org.apache.hadoop.mapred.JobConf jobConf) throws java.lang.Exception
jobConf
-java.lang.Exception
public int run(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
@Deprecated protected long getPathsLength(org.apache.hadoop.fs.Path[] paths, org.apache.hadoop.conf.Configuration conf) throws java.io.IOException
java.io.IOException
protected org.apache.hadoop.fs.Path getHVResultPartPath(org.apache.hadoop.fs.Path sampleDir, org.apache.hadoop.conf.Configuration conf) throws java.io.IOException
java.io.IOException
public static void main(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
protected void clean(org.apache.hadoop.conf.Configuration conf) throws java.io.IOException
java.io.IOException
Copyright © 2016 Oracle and/or its affiliates. All Rights Reserved.