Skip to content
Snippets Groups Projects
Commit fe2549b1 authored by František Dvořák's avatar František Dvořák
Browse files

Switch tar archiver to commons-compress + enable big files

* switch archiver ant -> commons-compress
* enable big files (posix mode)
* remove assembly jar with dependencies (everything in Hadoop)
* bump major version
parent 556476a7
No related branches found
No related tags found
No related merge requests found
Pipeline #775 passed
......@@ -5,7 +5,7 @@
<artifactId>hadoop-tar</artifactId>
<groupId>org.apache.hadoop.tar</groupId>
<version>1.0.2-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Hadoop Tar</name>
......@@ -40,30 +40,6 @@
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>${exec.mainClass}</mainClass>
<classpathPrefix>lib/</classpathPrefix>
</manifest>
</archive>
<descriptors>
<descriptor>src/main/assembly/bin.xml</descriptor>
</descriptors>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
......@@ -108,11 +84,6 @@
</build>
<dependencies>
<dependency>
<groupId>org.apache.ant</groupId>
<artifactId>ant</artifactId>
<version>1.10.11</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
......@@ -123,6 +94,11 @@
<artifactId>commons-io</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
......
<?xml version="1.0" encoding="UTF-8"?>
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0
http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>bin</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>target/classes</directory>
<outputDirectory></outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<outputDirectory>lib</outputDirectory>
<useProjectArtifact>false</useProjectArtifact>
<scope>runtime</scope>
<includes>
<include>org.apache.ant:ant</include>
</includes>
</dependencySet>
</dependencySets>
</assembly>
......@@ -50,9 +50,9 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.tools.tar.TarEntry;
import org.apache.tools.tar.TarInputStream;
import org.apache.tools.tar.TarOutputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
public class HadoopTar extends Configured implements Tool {
......@@ -176,14 +176,14 @@ public class HadoopTar extends Configured implements Tool {
} while (bytesToBeCopied > 0 ) ;
}
private void archive(TarOutputStream os, Path baseDir, Path p,
private void archive(TarArchiveOutputStream os, Path baseDir, Path p,
boolean keepAbsolutePath, boolean optionVerbose )
throws IOException {
if( optionVerbose ) {
System.err.println(p.toString() );
}
TarEntry entry = new TarEntry(p.getName());
TarArchiveEntry entry = new TarArchiveEntry(p.getName());
Path absolutePath = p.isAbsolute() ? p : new Path(baseDir, p);
FileSystem fs = absolutePath.getFileSystem(getConf());
......@@ -201,7 +201,7 @@ public class HadoopTar extends Configured implements Tool {
if( fileStatus.isDirectory() ) {
entry.setName(name + "/");
entry.setSize(0);
os.putNextEntry(entry);
os.putArchiveEntry(entry);
for( FileStatus child : fs.listStatus(absolutePath) ) {
archive(os, baseDir, new Path(p, child.getPath().getName()),
keepAbsolutePath, optionVerbose );
......@@ -209,7 +209,7 @@ public class HadoopTar extends Configured implements Tool {
} else {
entry.setName(name);
entry.setSize(fileStatus.getLen());
os.putNextEntry(entry);
os.putArchiveEntry(entry);
InputStream in = fs.open(absolutePath);
try {
copyBytes(in, os, getConf().getInt("io.file.buffer.size", 4096),
......@@ -219,7 +219,7 @@ public class HadoopTar extends Configured implements Tool {
in.close();
}
}
os.closeEntry();
os.closeArchiveEntry();
}
}
......@@ -261,11 +261,11 @@ public class HadoopTar extends Configured implements Tool {
boolean keepAbsolutePath, boolean optionVerbose )
throws IOException {
TarOutputStream tos = new TarOutputStream(os);
TarArchiveOutputStream tos = new TarArchiveOutputStream(os);
// GNU tar extensions are used to store long file names in the archive.
try {
tos.setBigNumberMode(TarOutputStream.BIGNUMBER_POSIX);
tos.setLongFileMode(TarOutputStream.LONGFILE_GNU);
tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);
tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
for (Path path : getTopSrcPaths(curDirPath, args, keepAbsolutePath ) ) {
archive(tos, curDirPath, path, keepAbsolutePath, optionVerbose );
}
......@@ -285,7 +285,7 @@ public class HadoopTar extends Configured implements Tool {
boolean dryrun )
throws IOException {
boolean warningPrinted = false;
TarInputStream tarin;
TarArchiveInputStream tarin;
if( keeppermission ) {
FsPermission.setUMask(getConf(), new FsPermission((short)0));
}
......@@ -298,13 +298,13 @@ public class HadoopTar extends Configured implements Tool {
}
}
tarin = new TarInputStream(in);
tarin = new TarArchiveInputStream(in);
try {
TarEntry entry;
TarArchiveEntry entry;
String name;
while ((entry = tarin.getNextEntry()) != null) {
while ((entry = tarin.getNextTarEntry()) != null) {
name = entry.getName();
if( optionVerbose ) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment