From fe2549b11aaf53538768e36ba77a51b455e8b12a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franti=C5=A1ek=20Dvo=C5=99=C3=A1k?= <valtri@civ.zcu.cz> Date: Fri, 15 Oct 2021 15:19:29 +0200 Subject: [PATCH] Switch tar archiver to commons-compress + enable big files * switch archiver ant -> commons-compress * enable big files (posix mode) * remove assembly jar with dependencies (everything in Hadoop) * bump major version --- pom.xml | 36 ++++--------------- src/main/assembly/bin.xml | 34 ------------------ .../java/org/apache/hadoop/tar/HadoopTar.java | 30 ++++++++-------- 3 files changed, 21 insertions(+), 79 deletions(-) delete mode 100644 src/main/assembly/bin.xml diff --git a/pom.xml b/pom.xml index af3726f..8c78620 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ <artifactId>hadoop-tar</artifactId> <groupId>org.apache.hadoop.tar</groupId> - <version>1.0.2-SNAPSHOT</version> + <version>2.0.0-SNAPSHOT</version> <packaging>jar</packaging> <name>Hadoop Tar</name> @@ -40,30 +40,6 @@ <build> <plugins> - <plugin> - <artifactId>maven-assembly-plugin</artifactId> - <version>3.3.0</version> - <configuration> - <archive> - <manifest> - <mainClass>${exec.mainClass}</mainClass> - <classpathPrefix>lib/</classpathPrefix> - </manifest> - </archive> - <descriptors> - <descriptor>src/main/assembly/bin.xml</descriptor> - </descriptors> - </configuration> - <executions> - <execution> - <id>make-assembly</id> - <phase>package</phase> - <goals> - <goal>single</goal> - </goals> - </execution> - </executions> - </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> @@ -108,11 +84,6 @@ </build> <dependencies> - <dependency> - <groupId>org.apache.ant</groupId> - <artifactId>ant</artifactId> - <version>1.10.11</version> - </dependency> <dependency> <groupId>commons-cli</groupId> <artifactId>commons-cli</artifactId> @@ -123,6 +94,11 @@ <artifactId>commons-io</artifactId> <version>2.6</version> </dependency> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-compress</artifactId> + <version>1.4.1</version> + </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> diff --git a/src/main/assembly/bin.xml b/src/main/assembly/bin.xml deleted file mode 100644 index c23aaa6..0000000 --- a/src/main/assembly/bin.xml +++ /dev/null @@ -1,34 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> - -<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 - http://maven.apache.org/xsd/assembly-1.1.0.xsd"> - - <id>bin</id> - - <formats> - <format>jar</format> - </formats> - - <includeBaseDirectory>false</includeBaseDirectory> - - <fileSets> - <fileSet> - <directory>target/classes</directory> - <outputDirectory></outputDirectory> - </fileSet> - </fileSets> - - <dependencySets> - <dependencySet> - <outputDirectory>lib</outputDirectory> - <useProjectArtifact>false</useProjectArtifact> - <scope>runtime</scope> - <includes> - <include>org.apache.ant:ant</include> - </includes> - </dependencySet> - </dependencySets> -</assembly> - diff --git a/src/main/java/org/apache/hadoop/tar/HadoopTar.java b/src/main/java/org/apache/hadoop/tar/HadoopTar.java index 38294b3..87a9373 100644 --- a/src/main/java/org/apache/hadoop/tar/HadoopTar.java +++ b/src/main/java/org/apache/hadoop/tar/HadoopTar.java @@ -50,9 +50,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.tools.tar.TarEntry; -import org.apache.tools.tar.TarInputStream; -import org.apache.tools.tar.TarOutputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; public class HadoopTar extends Configured implements Tool { @@ -176,14 +176,14 @@ public class HadoopTar extends Configured implements Tool { } while (bytesToBeCopied > 0 ) ; } - private void archive(TarOutputStream os, Path baseDir, Path p, + private void archive(TarArchiveOutputStream os, Path baseDir, Path p, boolean keepAbsolutePath, boolean optionVerbose ) throws IOException { if( optionVerbose ) { System.err.println(p.toString() ); } - TarEntry entry = new TarEntry(p.getName()); + TarArchiveEntry entry = new TarArchiveEntry(p.getName()); Path absolutePath = p.isAbsolute() ? p : new Path(baseDir, p); FileSystem fs = absolutePath.getFileSystem(getConf()); @@ -201,7 +201,7 @@ public class HadoopTar extends Configured implements Tool { if( fileStatus.isDirectory() ) { entry.setName(name + "/"); entry.setSize(0); - os.putNextEntry(entry); + os.putArchiveEntry(entry); for( FileStatus child : fs.listStatus(absolutePath) ) { archive(os, baseDir, new Path(p, child.getPath().getName()), keepAbsolutePath, optionVerbose ); @@ -209,7 +209,7 @@ public class HadoopTar extends Configured implements Tool { } else { entry.setName(name); entry.setSize(fileStatus.getLen()); - os.putNextEntry(entry); + os.putArchiveEntry(entry); InputStream in = fs.open(absolutePath); try { copyBytes(in, os, getConf().getInt("io.file.buffer.size", 4096), @@ -219,7 +219,7 @@ public class HadoopTar extends Configured implements Tool { in.close(); } } - os.closeEntry(); + os.closeArchiveEntry(); } } @@ -261,11 +261,11 @@ public class HadoopTar extends Configured implements Tool { boolean keepAbsolutePath, boolean optionVerbose ) throws IOException { - TarOutputStream tos = new TarOutputStream(os); + TarArchiveOutputStream tos = new TarArchiveOutputStream(os); // GNU tar extensions are used to store long file names in the archive. try { - tos.setBigNumberMode(TarOutputStream.BIGNUMBER_POSIX); - tos.setLongFileMode(TarOutputStream.LONGFILE_GNU); + tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX); + tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU); for (Path path : getTopSrcPaths(curDirPath, args, keepAbsolutePath ) ) { archive(tos, curDirPath, path, keepAbsolutePath, optionVerbose ); } @@ -285,7 +285,7 @@ public class HadoopTar extends Configured implements Tool { boolean dryrun ) throws IOException { boolean warningPrinted = false; - TarInputStream tarin; + TarArchiveInputStream tarin; if( keeppermission ) { FsPermission.setUMask(getConf(), new FsPermission((short)0)); } @@ -298,13 +298,13 @@ public class HadoopTar extends Configured implements Tool { } } - tarin = new TarInputStream(in); + tarin = new TarArchiveInputStream(in); try { - TarEntry entry; + TarArchiveEntry entry; String name; - while ((entry = tarin.getNextEntry()) != null) { + while ((entry = tarin.getNextTarEntry()) != null) { name = entry.getName(); if( optionVerbose ) { -- GitLab