public class ShapeFileInputFormat
extends org.apache.hadoop.mapreduce.lib.input.FileInputFormat<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.MapWritable>
implements org.apache.hadoop.conf.Configurable
FileInputFormat
for ESRI Shapefiles. The input has to be the main file (.shp). The directories in HDFS where the index files and DBF files are located can be specified. As well a directory to find .cpg files can be specified to define the encoding. By default the same directory as the main shapefile will be used. DBF and CPG files are optional. Keys are the position in the main file, and values are Maps with the attributes of the Shapefile record including the ShapeFileInputFormat.GEOMETRY_ATTRIBUTE.Modifier and Type | Field and Description |
---|---|
static java.lang.String |
GEOMETRY_ATTRIBUTE
The name of the geometry attribute.
|
Constructor and Description |
---|
ShapeFileInputFormat() |
Modifier and Type | Method and Description |
---|---|
org.apache.hadoop.mapreduce.RecordReader<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.MapWritable> |
createRecordReader(org.apache.hadoop.mapreduce.InputSplit split, org.apache.hadoop.mapreduce.TaskAttemptContext context) |
org.apache.hadoop.conf.Configuration |
getConf() |
static java.lang.String |
getCPGFilesPath(org.apache.hadoop.conf.Configuration conf)
Returns the optional CPG files path.
|
static java.lang.String |
getDBFFilesPath(org.apache.hadoop.conf.Configuration conf)
Returns the optional DBF files path.
|
static java.lang.String |
getIndexFilesPath(org.apache.hadoop.conf.Configuration conf)
Returns the index files path.
|
protected boolean |
isSplitable(org.apache.hadoop.mapreduce.JobContext context, org.apache.hadoop.fs.Path fileName) |
void |
setConf(org.apache.hadoop.conf.Configuration conf) |
static void |
setCPGFilesPath(org.apache.hadoop.conf.Configuration conf, java.lang.String cpgFilePath)
Sets the CPG files path.
|
static void |
setDBFFilesPath(org.apache.hadoop.conf.Configuration conf, java.lang.String dbfFilePath)
Sets the DBF files path.
|
static void |
setIndexFilesPath(org.apache.hadoop.conf.Configuration conf, java.lang.String indexFilePath)
Sets the index files path.
|
addInputPath, addInputPaths, computeSplitSize, getBlockIndex, getFormatMinSplitSize, getInputPathFilter, getInputPaths, getMaxSplitSize, getMinSplitSize, getSplits, listStatus, setInputPathFilter, setInputPaths, setInputPaths, setMaxInputSplitSize, setMinInputSplitSize
public static java.lang.String GEOMETRY_ATTRIBUTE
public static java.lang.String getIndexFilesPath(org.apache.hadoop.conf.Configuration conf)
conf
- the job configurationpublic static void setIndexFilesPath(org.apache.hadoop.conf.Configuration conf, java.lang.String indexFilePath)
conf
- the job configurationindexFilePath
- the index files pathpublic static java.lang.String getDBFFilesPath(org.apache.hadoop.conf.Configuration conf)
conf
- the job configurationpublic static void setDBFFilesPath(org.apache.hadoop.conf.Configuration conf, java.lang.String dbfFilePath)
conf
- the job configurationdbfFilePath
- the DBF files pathpublic static java.lang.String getCPGFilesPath(org.apache.hadoop.conf.Configuration conf)
conf
- the job configurationpublic static void setCPGFilesPath(org.apache.hadoop.conf.Configuration conf, java.lang.String cpgFilePath)
conf
- the job configurationcpgFilePath
- the CPG files pathprotected boolean isSplitable(org.apache.hadoop.mapreduce.JobContext context, org.apache.hadoop.fs.Path fileName)
isSplitable
in class org.apache.hadoop.mapreduce.lib.input.FileInputFormat<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.MapWritable>
public org.apache.hadoop.mapreduce.RecordReader<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.MapWritable> createRecordReader(org.apache.hadoop.mapreduce.InputSplit split, org.apache.hadoop.mapreduce.TaskAttemptContext context) throws java.io.IOException, java.lang.InterruptedException
createRecordReader
in class org.apache.hadoop.mapreduce.InputFormat<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.MapWritable>
java.io.IOException
java.lang.InterruptedException
public org.apache.hadoop.conf.Configuration getConf()
getConf
in interface org.apache.hadoop.conf.Configurable
public void setConf(org.apache.hadoop.conf.Configuration conf)
setConf
in interface org.apache.hadoop.conf.Configurable
Copyright © 2016 Oracle and/or its affiliates. All Rights Reserved.