-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from thaerkh/master
Add CombineManifestTextInputFormat class implementation.
- Loading branch information
Showing
4 changed files
with
123 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
25 changes: 25 additions & 0 deletions
25
src/main/java/nicknack/CombineManifestTextInputFormat.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package nicknack; | ||
|
||
import org.apache.hadoop.fs.FileStatus; | ||
import org.apache.hadoop.mapred.JobConf; | ||
import org.apache.hadoop.mapred.lib.CombineTextInputFormat; | ||
import org.apache.hadoop.mapreduce.JobContext; | ||
|
||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
|
||
/** | ||
* Convert manifest file inputs into FileStatus objects and combines splits based on maxSplitSize. | ||
*/ | ||
public class CombineManifestTextInputFormat extends CombineTextInputFormat { | ||
|
||
/** | ||
* @return List of FileStatus objects from manifest file. | ||
*/ | ||
@Override | ||
protected List<FileStatus> listStatus(JobContext job) throws IOException { | ||
ManifestTextInputFormat manifestInput = new ManifestTextInputFormat(); | ||
return Arrays.asList(manifestInput.listStatus(new JobConf(job.getConfiguration()))); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
51 changes: 51 additions & 0 deletions
51
src/test/java/nicknack/CombineManifestTextInputFormatTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
package nicknack; | ||
|
||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.FileStatus; | ||
import org.apache.hadoop.mapreduce.JobContext; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import static org.junit.Assert.assertArrayEquals; | ||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.when; | ||
|
||
|
||
public class CombineManifestTextInputFormatTest { | ||
private static String[] FILES = {"file-a.txt", "file-b.txt"}; | ||
private JobContext jobContext; | ||
|
||
private String stripPath(FileStatus status) { | ||
String pathStr = status.getPath().toString(); | ||
return pathStr.substring(pathStr.lastIndexOf('/') + 1); | ||
} | ||
|
||
@Before | ||
public void setup() { | ||
URL url = this.getClass().getResource("/manifest.txt"); | ||
|
||
File testManifest = new File(url.getFile()); | ||
Configuration conf = new Configuration(); | ||
conf.set("mapreduce.input.fileinputformat.inputdir", testManifest.toString()); | ||
|
||
jobContext = mock(JobContext.class); | ||
when(jobContext.getConfiguration()).thenReturn(conf); | ||
} | ||
|
||
@Test | ||
public void testManifestFile() throws IOException { | ||
CombineManifestTextInputFormat inputFormat = new CombineManifestTextInputFormat(); | ||
List<FileStatus> fileStatuses = inputFormat.listStatus(jobContext); | ||
ArrayList<String> actual = new ArrayList<String>(); | ||
for (FileStatus status : fileStatuses) { | ||
actual.add(stripPath(status)); | ||
} | ||
assertArrayEquals(FILES, actual.toArray(new String[0])); | ||
} | ||
} |