public class SpatialJoin extends MultipleInputsJob
Partitioning
is required to perform the spatial join. A spatial join can be executed as follows:
SpatialJoin
instancepreprocess(JobConf)
configure(JobConf)
preprocess(JobConf)
.Modifier and Type | Field and Description |
---|---|
protected static java.lang.String |
JOIN_FOLDER |
protected static java.lang.String |
PARTITIONING_FOLDER |
protected org.apache.hadoop.fs.Path |
partitioningResultPath |
protected double |
samplingRatio |
protected SpatialOperationConfig |
spatialOperationConfig |
inputDataSets, miConf
Constructor and Description |
---|
SpatialJoin() |
Modifier and Type | Method and Description |
---|---|
void |
configure(org.apache.hadoop.mapred.JobConf jobConf)
Validates and adds the current parameters to the job configuration
|
protected void |
defineGlobaBounds() |
java.lang.String |
getCmdOptions()
Gets a description of the arguments expected from command line.
|
java.lang.String |
getOutput()
Gets the job output path
|
org.apache.hadoop.fs.Path |
getPartitioningResultPath()
Sets the location of a previously generated partitioning result file for the input data sets
|
protected java.lang.String |
getRootOutput() |
double |
getSamplingRatio()
Gets the ratio of the sample size to the input data size used to sample when a partitioning result file is not set
|
SpatialOperationConfig |
getSpatialOperationConfig()
Gets the spatial operation configuration used to perform the spatial join
|
protected boolean |
isPartitioningRequired(org.apache.hadoop.conf.Configuration conf) |
static void |
main(java.lang.String[] args) |
boolean |
preprocess(org.apache.hadoop.mapred.JobConf jobConf)
Checks whether partitioning is required and if so, it runs the partitioning process.
|
void |
processArgs(java.lang.String[] args, org.apache.hadoop.conf.Configuration conf)
Extracts and validates arguments from the command line
|
int |
run(java.lang.String[] args) |
void |
setPartitioningResultPath(org.apache.hadoop.fs.Path partitioningResultPath)
Gets the location of a previously generated partitioning result file for the input data sets
|
void |
setSamplingRatio(double samplingRatio)
Sets the ratio of the sample size to the input data size used to sample when a partitioning result file is not set
|
void |
setSpatialOperationConfig(SpatialOperationConfig spatialOperationConfig)
Sets the spatial operation configuration used to perform the spatial join
|
protected void |
setupPartitioningResult(org.apache.hadoop.fs.Path partitioningResultPath, org.apache.hadoop.conf.Configuration conf) |
addInputDataSet, configure, configureInputs, configureInputs, getInputListCmdOptions, getInputs, getMultipleInputDataSetsParams, removeInputDataSet, setInputDataSets
createJob, createJob, createJob, createJob, createJobConf, createJobConf, createJobConf, getInput, getInputFormatClass, getJarClass, getRecordInfoProviderClass, getSpatialConfig, setInput, setInputFormatClass, setJarClass, setOutput, setRecordInfoProviderClass, setSpatialConfig
protected static final java.lang.String PARTITIONING_FOLDER
protected static final java.lang.String JOIN_FOLDER
protected org.apache.hadoop.fs.Path partitioningResultPath
protected SpatialOperationConfig spatialOperationConfig
protected double samplingRatio
public double getSamplingRatio()
public void setSamplingRatio(double samplingRatio)
samplingRatio
-public org.apache.hadoop.fs.Path getPartitioningResultPath()
public void setPartitioningResultPath(org.apache.hadoop.fs.Path partitioningResultPath)
partitioningResultPath
-public SpatialOperationConfig getSpatialOperationConfig()
public void setSpatialOperationConfig(SpatialOperationConfig spatialOperationConfig)
spatialOperationConfig
-public java.lang.String getOutput()
BaseJob
protected java.lang.String getRootOutput()
public void processArgs(java.lang.String[] args, org.apache.hadoop.conf.Configuration conf) throws java.lang.Exception
BaseJob
processArgs
in class MultipleInputsJob
args
- arguments from the command lineconf
- the job configurationjava.lang.Exception
public java.lang.String getCmdOptions()
BaseJob
getCmdOptions
in class MultipleInputsJob
public void configure(org.apache.hadoop.mapred.JobConf jobConf) throws java.lang.Exception
BaseJob
configure
in class MultipleInputsJob
jobConf
- the job configurationjava.lang.Exception
protected void setupPartitioningResult(org.apache.hadoop.fs.Path partitioningResultPath, org.apache.hadoop.conf.Configuration conf)
protected boolean isPartitioningRequired(org.apache.hadoop.conf.Configuration conf) throws java.io.IOException
java.io.IOException
public boolean preprocess(org.apache.hadoop.mapred.JobConf jobConf) throws java.lang.Exception
jobConf
- the job configurationjava.lang.Exception
protected void defineGlobaBounds()
public int run(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
public static void main(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception