public class SpatialJoin extends MultipleInputsJob
Partitioning
is required to perform the spatial join. A spatial join can be executed as follows:
SpatialJoin
instancepreprocess(JobConf)
configure(JobConf)
preprocess(JobConf)
.Modifier and Type | Field and Description |
---|---|
protected static java.lang.String |
JOIN_FOLDER |
protected static java.lang.String |
PARTITIONING_FOLDER |
protected org.apache.hadoop.fs.Path |
partitioningResultPath |
protected double |
samplingRatio |
protected SpatialOperationConfig |
spatialOperationConfig |
inputDataSets, miConf
argsp, inputDataSet, jarClass, jobRegEntryPath, proxyIDS
Constructor and Description |
---|
SpatialJoin() |
Modifier and Type | Method and Description |
---|---|
void |
configure(org.apache.hadoop.mapred.JobConf jobConf)
Validates and adds the current parameters to the job configuration
|
protected void |
defineGlobaBounds() |
java.lang.String |
getCmdOptions()
Gets a description of the arguments expected from command line.
|
java.util.Map<java.lang.String,java.lang.Object> |
getCurrentCmdArgs(org.apache.hadoop.conf.Configuration conf)
Returns the current driver properties in a map where each key-value is a name and value of a command line argument.
|
protected InputDataSetCmdArgsParserHandler |
getInputDataSetCmdParserHandler(org.apache.hadoop.conf.Configuration conf)
Gets the current instance of
InputDataSetCmdArgsParserHandler used to parse command line parameters for the input data set |
protected InputDataSetConfiguratorHandler |
getInputDataSetConfiguratorHandler(org.apache.hadoop.conf.Configuration conf)
Returns the current instance of
InputDataSetConfiguratorHandler used to configure the input data set |
java.lang.String |
getOutput()
Gets the job output path
|
org.apache.hadoop.fs.Path |
getPartitioningResultPath()
Sets the location of a previously generated partitioning result file for the input data sets
|
protected java.lang.String |
getRootOutput() |
double |
getSamplingRatio()
Gets the ratio of the sample size to the input data size used to sample when a partitioning result file is not set
|
SpatialOperationConfig |
getSpatialOperationConfig()
Gets the spatial operation configuration used to perform the spatial join
|
protected boolean |
isPartitioningRequired(org.apache.hadoop.conf.Configuration conf) |
static void |
main(java.lang.String[] args) |
boolean |
preprocess(org.apache.hadoop.mapred.JobConf jobConf)
Checks whether partitioning is required and if so, it runs the partitioning process.
|
void |
processArgs(java.lang.String[] args, org.apache.hadoop.conf.Configuration conf)
Extracts and validates arguments from the command line
|
int |
run(java.lang.String[] args) |
void |
setPartitioningResultPath(org.apache.hadoop.fs.Path partitioningResultPath)
Gets the location of a previously generated partitioning result file for the input data sets
|
void |
setSamplingRatio(double samplingRatio)
Sets the ratio of the sample size to the input data size used to sample when a partitioning result file is not set
|
void |
setSpatialOperationConfig(SpatialOperationConfig spatialOperationConfig)
Sets the spatial operation configuration used to perform the spatial join
|
protected void |
setupPartitioningResult(org.apache.hadoop.fs.Path partitioningResultPath, org.apache.hadoop.conf.Configuration conf) |
addInputDataSet, asMultiInputDataSet, configureInputs, configureInputs, getInputListCmdOptions, getInputs, getMultipleInputDataSetsParams, removeInputDataSet, setInputDataSets, updateInputDataSet
addJobRegistryEntry, addJobRegistryEntry, addJobRegistryEntry, configure, createJob, createJob, createJob, createJob, createJobConf, createJobConf, createJobConf, getCmdOptionsWithInputDataSets, getCmdOptionsWithInputDataSets, getCurrentCmdArgsAsString, getCurrentCmdArgsAsString, getInput, getInputDataSet, getInputFormatClass, getJarClass, getRecordInfoProviderClass, getSpatialConfig, runJob, runJob, setInput, setInputDataSet, setInputFormatClass, setJarClass, setOutput, setRecordInfoProviderClass, setSpatialConfig
protected static final java.lang.String PARTITIONING_FOLDER
protected static final java.lang.String JOIN_FOLDER
protected org.apache.hadoop.fs.Path partitioningResultPath
protected SpatialOperationConfig spatialOperationConfig
protected double samplingRatio
protected InputDataSetConfiguratorHandler getInputDataSetConfiguratorHandler(org.apache.hadoop.conf.Configuration conf)
BaseJob
InputDataSetConfiguratorHandler
used to configure the input data setgetInputDataSetConfiguratorHandler
in class BaseJob<java.lang.Object,java.lang.Object>
conf
- a job configurationInputDataSetConfiguratorHandler
protected InputDataSetCmdArgsParserHandler getInputDataSetCmdParserHandler(org.apache.hadoop.conf.Configuration conf)
BaseJob
InputDataSetCmdArgsParserHandler
used to parse command line parameters for the input data setgetInputDataSetCmdParserHandler
in class BaseJob<java.lang.Object,java.lang.Object>
conf
- a job configurationInputDataSetCmdArgsParserHandler
public double getSamplingRatio()
public void setSamplingRatio(double samplingRatio)
samplingRatio
-public org.apache.hadoop.fs.Path getPartitioningResultPath()
public void setPartitioningResultPath(org.apache.hadoop.fs.Path partitioningResultPath)
partitioningResultPath
-public SpatialOperationConfig getSpatialOperationConfig()
public void setSpatialOperationConfig(SpatialOperationConfig spatialOperationConfig)
spatialOperationConfig
-public java.lang.String getOutput()
BaseJob
protected java.lang.String getRootOutput()
public void processArgs(java.lang.String[] args, org.apache.hadoop.conf.Configuration conf) throws java.lang.Exception
BaseJob
processArgs
in class MultipleInputsJob
args
- arguments from the command lineconf
- the job configurationjava.lang.Exception
public java.util.Map<java.lang.String,java.lang.Object> getCurrentCmdArgs(org.apache.hadoop.conf.Configuration conf)
BaseJob
getCurrentCmdArgs
in class BaseJob<java.lang.Object,java.lang.Object>
conf
- a job configurationpublic java.lang.String getCmdOptions()
BaseJob
getCmdOptions
in class BaseJob<java.lang.Object,java.lang.Object>
public void configure(org.apache.hadoop.mapred.JobConf jobConf) throws java.lang.Exception
BaseJob
protected void setupPartitioningResult(org.apache.hadoop.fs.Path partitioningResultPath, org.apache.hadoop.conf.Configuration conf)
protected boolean isPartitioningRequired(org.apache.hadoop.conf.Configuration conf) throws java.io.IOException
java.io.IOException
public boolean preprocess(org.apache.hadoop.mapred.JobConf jobConf) throws java.lang.Exception
jobConf
- the job configurationjava.lang.Exception
protected void defineGlobaBounds()
public int run(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
public static void main(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
Copyright © 2016 Oracle and/or its affiliates. All Rights Reserved.