Skip to content
Snippets Groups Projects
Commit fe2549b1 authored by František Dvořák's avatar František Dvořák
Browse files

Switch tar archiver to commons-compress + enable big files

* switch archiver ant -> commons-compress
* enable big files (posix mode)
* remove assembly jar with dependencies (everything in Hadoop)
* bump major version
parent 556476a7
No related branches found
No related tags found
No related merge requests found
Pipeline #775 passed
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
<artifactId>hadoop-tar</artifactId> <artifactId>hadoop-tar</artifactId>
<groupId>org.apache.hadoop.tar</groupId> <groupId>org.apache.hadoop.tar</groupId>
<version>1.0.2-SNAPSHOT</version> <version>2.0.0-SNAPSHOT</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<name>Hadoop Tar</name> <name>Hadoop Tar</name>
...@@ -40,30 +40,6 @@ ...@@ -40,30 +40,6 @@
<build> <build>
<plugins> <plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>${exec.mainClass}</mainClass>
<classpathPrefix>lib/</classpathPrefix>
</manifest>
</archive>
<descriptors>
<descriptor>src/main/assembly/bin.xml</descriptor>
</descriptors>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId> <artifactId>maven-compiler-plugin</artifactId>
...@@ -108,11 +84,6 @@ ...@@ -108,11 +84,6 @@
</build> </build>
<dependencies> <dependencies>
<dependency>
<groupId>org.apache.ant</groupId>
<artifactId>ant</artifactId>
<version>1.10.11</version>
</dependency>
<dependency> <dependency>
<groupId>commons-cli</groupId> <groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId> <artifactId>commons-cli</artifactId>
...@@ -123,6 +94,11 @@ ...@@ -123,6 +94,11 @@
<artifactId>commons-io</artifactId> <artifactId>commons-io</artifactId>
<version>2.6</version> <version>2.6</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.4.1</version>
</dependency>
<dependency> <dependency>
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId> <artifactId>hadoop-common</artifactId>
......
<?xml version="1.0" encoding="UTF-8"?>
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0
http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>bin</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>target/classes</directory>
<outputDirectory></outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<outputDirectory>lib</outputDirectory>
<useProjectArtifact>false</useProjectArtifact>
<scope>runtime</scope>
<includes>
<include>org.apache.ant:ant</include>
</includes>
</dependencySet>
</dependencySets>
</assembly>
...@@ -50,9 +50,9 @@ import org.apache.hadoop.fs.FileSystem; ...@@ -50,9 +50,9 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.tools.tar.TarEntry; import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.tools.tar.TarInputStream; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.tools.tar.TarOutputStream; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
public class HadoopTar extends Configured implements Tool { public class HadoopTar extends Configured implements Tool {
...@@ -176,14 +176,14 @@ public class HadoopTar extends Configured implements Tool { ...@@ -176,14 +176,14 @@ public class HadoopTar extends Configured implements Tool {
} while (bytesToBeCopied > 0 ) ; } while (bytesToBeCopied > 0 ) ;
} }
private void archive(TarOutputStream os, Path baseDir, Path p, private void archive(TarArchiveOutputStream os, Path baseDir, Path p,
boolean keepAbsolutePath, boolean optionVerbose ) boolean keepAbsolutePath, boolean optionVerbose )
throws IOException { throws IOException {
if( optionVerbose ) { if( optionVerbose ) {
System.err.println(p.toString() ); System.err.println(p.toString() );
} }
TarEntry entry = new TarEntry(p.getName()); TarArchiveEntry entry = new TarArchiveEntry(p.getName());
Path absolutePath = p.isAbsolute() ? p : new Path(baseDir, p); Path absolutePath = p.isAbsolute() ? p : new Path(baseDir, p);
FileSystem fs = absolutePath.getFileSystem(getConf()); FileSystem fs = absolutePath.getFileSystem(getConf());
...@@ -201,7 +201,7 @@ public class HadoopTar extends Configured implements Tool { ...@@ -201,7 +201,7 @@ public class HadoopTar extends Configured implements Tool {
if( fileStatus.isDirectory() ) { if( fileStatus.isDirectory() ) {
entry.setName(name + "/"); entry.setName(name + "/");
entry.setSize(0); entry.setSize(0);
os.putNextEntry(entry); os.putArchiveEntry(entry);
for( FileStatus child : fs.listStatus(absolutePath) ) { for( FileStatus child : fs.listStatus(absolutePath) ) {
archive(os, baseDir, new Path(p, child.getPath().getName()), archive(os, baseDir, new Path(p, child.getPath().getName()),
keepAbsolutePath, optionVerbose ); keepAbsolutePath, optionVerbose );
...@@ -209,7 +209,7 @@ public class HadoopTar extends Configured implements Tool { ...@@ -209,7 +209,7 @@ public class HadoopTar extends Configured implements Tool {
} else { } else {
entry.setName(name); entry.setName(name);
entry.setSize(fileStatus.getLen()); entry.setSize(fileStatus.getLen());
os.putNextEntry(entry); os.putArchiveEntry(entry);
InputStream in = fs.open(absolutePath); InputStream in = fs.open(absolutePath);
try { try {
copyBytes(in, os, getConf().getInt("io.file.buffer.size", 4096), copyBytes(in, os, getConf().getInt("io.file.buffer.size", 4096),
...@@ -219,7 +219,7 @@ public class HadoopTar extends Configured implements Tool { ...@@ -219,7 +219,7 @@ public class HadoopTar extends Configured implements Tool {
in.close(); in.close();
} }
} }
os.closeEntry(); os.closeArchiveEntry();
} }
} }
...@@ -261,11 +261,11 @@ public class HadoopTar extends Configured implements Tool { ...@@ -261,11 +261,11 @@ public class HadoopTar extends Configured implements Tool {
boolean keepAbsolutePath, boolean optionVerbose ) boolean keepAbsolutePath, boolean optionVerbose )
throws IOException { throws IOException {
TarOutputStream tos = new TarOutputStream(os); TarArchiveOutputStream tos = new TarArchiveOutputStream(os);
// GNU tar extensions are used to store long file names in the archive. // GNU tar extensions are used to store long file names in the archive.
try { try {
tos.setBigNumberMode(TarOutputStream.BIGNUMBER_POSIX); tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);
tos.setLongFileMode(TarOutputStream.LONGFILE_GNU); tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
for (Path path : getTopSrcPaths(curDirPath, args, keepAbsolutePath ) ) { for (Path path : getTopSrcPaths(curDirPath, args, keepAbsolutePath ) ) {
archive(tos, curDirPath, path, keepAbsolutePath, optionVerbose ); archive(tos, curDirPath, path, keepAbsolutePath, optionVerbose );
} }
...@@ -285,7 +285,7 @@ public class HadoopTar extends Configured implements Tool { ...@@ -285,7 +285,7 @@ public class HadoopTar extends Configured implements Tool {
boolean dryrun ) boolean dryrun )
throws IOException { throws IOException {
boolean warningPrinted = false; boolean warningPrinted = false;
TarInputStream tarin; TarArchiveInputStream tarin;
if( keeppermission ) { if( keeppermission ) {
FsPermission.setUMask(getConf(), new FsPermission((short)0)); FsPermission.setUMask(getConf(), new FsPermission((short)0));
} }
...@@ -298,13 +298,13 @@ public class HadoopTar extends Configured implements Tool { ...@@ -298,13 +298,13 @@ public class HadoopTar extends Configured implements Tool {
} }
} }
tarin = new TarInputStream(in); tarin = new TarArchiveInputStream(in);
try { try {
TarEntry entry; TarArchiveEntry entry;
String name; String name;
while ((entry = tarin.getNextEntry()) != null) { while ((entry = tarin.getNextTarEntry()) != null) {
name = entry.getName(); name = entry.getName();
if( optionVerbose ) { if( optionVerbose ) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment