From fe2549b11aaf53538768e36ba77a51b455e8b12a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franti=C5=A1ek=20Dvo=C5=99=C3=A1k?= <valtri@civ.zcu.cz>
Date: Fri, 15 Oct 2021 15:19:29 +0200
Subject: [PATCH] Switch tar archiver to commons-compress + enable big files

* switch archiver ant -> commons-compress
* enable big files (posix mode)
* remove assembly jar with dependencies (everything in Hadoop)
* bump major version
---
 pom.xml                                       | 36 ++++---------------
 src/main/assembly/bin.xml                     | 34 ------------------
 .../java/org/apache/hadoop/tar/HadoopTar.java | 30 ++++++++--------
 3 files changed, 21 insertions(+), 79 deletions(-)
 delete mode 100644 src/main/assembly/bin.xml

diff --git a/pom.xml b/pom.xml
index af3726f..8c78620 100644
--- a/pom.xml
+++ b/pom.xml
@@ -5,7 +5,7 @@
 
 	<artifactId>hadoop-tar</artifactId>
 	<groupId>org.apache.hadoop.tar</groupId>
-	<version>1.0.2-SNAPSHOT</version>
+	<version>2.0.0-SNAPSHOT</version>
 	<packaging>jar</packaging>
 
 	<name>Hadoop Tar</name>
@@ -40,30 +40,6 @@
 
 	<build>
 		<plugins>
-			<plugin>
-				<artifactId>maven-assembly-plugin</artifactId>
-				<version>3.3.0</version>
-				<configuration>
-					<archive>
-						<manifest>
-							<mainClass>${exec.mainClass}</mainClass>
-							<classpathPrefix>lib/</classpathPrefix>
-						</manifest>
-					</archive>
-					<descriptors>
-						<descriptor>src/main/assembly/bin.xml</descriptor>
-					</descriptors>
-				</configuration>
-				<executions>
-					<execution>
-						<id>make-assembly</id>
-						<phase>package</phase>
-						<goals>
-							<goal>single</goal>
-						</goals>
-					</execution>
-				</executions>
-			</plugin>
 			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-compiler-plugin</artifactId>
@@ -108,11 +84,6 @@
 	</build>
 
 	<dependencies>
-		<dependency>
-			<groupId>org.apache.ant</groupId>
-			<artifactId>ant</artifactId>
-			<version>1.10.11</version>
-		</dependency>
 		<dependency>
 			<groupId>commons-cli</groupId>
 			<artifactId>commons-cli</artifactId>
@@ -123,6 +94,11 @@
 			<artifactId>commons-io</artifactId>
 			<version>2.6</version>
 		</dependency>
+		<dependency>
+			<groupId>org.apache.commons</groupId>
+			<artifactId>commons-compress</artifactId>
+			<version>1.4.1</version>
+		</dependency>
 		<dependency>
 			<groupId>org.apache.hadoop</groupId>
 			<artifactId>hadoop-common</artifactId>
diff --git a/src/main/assembly/bin.xml b/src/main/assembly/bin.xml
deleted file mode 100644
index c23aaa6..0000000
--- a/src/main/assembly/bin.xml
+++ /dev/null
@@ -1,34 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
-		  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-		  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0
-							  http://maven.apache.org/xsd/assembly-1.1.0.xsd">
-
-	<id>bin</id>
-
-	<formats>
-		<format>jar</format>
-	</formats>
-
-	<includeBaseDirectory>false</includeBaseDirectory>
-
-	<fileSets>
-		<fileSet>
-			<directory>target/classes</directory>
-			<outputDirectory></outputDirectory>
-		</fileSet>
-	</fileSets>
-
-	<dependencySets>
-		<dependencySet>
-			<outputDirectory>lib</outputDirectory>
-			<useProjectArtifact>false</useProjectArtifact>
-			<scope>runtime</scope>
-			<includes>
-				<include>org.apache.ant:ant</include>
-			</includes>
-		</dependencySet>
-	</dependencySets>
-</assembly>
-
diff --git a/src/main/java/org/apache/hadoop/tar/HadoopTar.java b/src/main/java/org/apache/hadoop/tar/HadoopTar.java
index 38294b3..87a9373 100644
--- a/src/main/java/org/apache/hadoop/tar/HadoopTar.java
+++ b/src/main/java/org/apache/hadoop/tar/HadoopTar.java
@@ -50,9 +50,9 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 
-import org.apache.tools.tar.TarEntry;
-import org.apache.tools.tar.TarInputStream;
-import org.apache.tools.tar.TarOutputStream;
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
 
 
 public class HadoopTar extends Configured implements Tool {
@@ -176,14 +176,14 @@ public class HadoopTar extends Configured implements Tool {
     } while (bytesToBeCopied > 0 ) ;
   }
 
-  private void archive(TarOutputStream os, Path baseDir, Path p,
+  private void archive(TarArchiveOutputStream os, Path baseDir, Path p,
                        boolean keepAbsolutePath, boolean optionVerbose )
                        throws IOException {
     if( optionVerbose ) {
       System.err.println(p.toString() );
     }
 
-    TarEntry entry = new TarEntry(p.getName());
+    TarArchiveEntry entry = new TarArchiveEntry(p.getName());
 
     Path absolutePath = p.isAbsolute() ? p : new Path(baseDir, p);
     FileSystem fs = absolutePath.getFileSystem(getConf());
@@ -201,7 +201,7 @@ public class HadoopTar extends Configured implements Tool {
     if( fileStatus.isDirectory() ) {
       entry.setName(name + "/");
       entry.setSize(0);
-      os.putNextEntry(entry);
+      os.putArchiveEntry(entry);
       for( FileStatus child : fs.listStatus(absolutePath) ) {
         archive(os, baseDir, new Path(p, child.getPath().getName()),
                 keepAbsolutePath, optionVerbose );
@@ -209,7 +209,7 @@ public class HadoopTar extends Configured implements Tool {
     } else {
       entry.setName(name);
       entry.setSize(fileStatus.getLen());
-      os.putNextEntry(entry);
+      os.putArchiveEntry(entry);
       InputStream in = fs.open(absolutePath);
       try {
         copyBytes(in, os, getConf().getInt("io.file.buffer.size", 4096),
@@ -219,7 +219,7 @@ public class HadoopTar extends Configured implements Tool {
           in.close();
         }
       }
-      os.closeEntry();
+      os.closeArchiveEntry();
     }
   }
 
@@ -261,11 +261,11 @@ public class HadoopTar extends Configured implements Tool {
                       boolean keepAbsolutePath, boolean optionVerbose )
                       throws IOException {
 
-    TarOutputStream tos = new TarOutputStream(os);
+    TarArchiveOutputStream tos = new TarArchiveOutputStream(os);
     // GNU tar extensions are used to store long file names in the archive.
     try {
-      tos.setBigNumberMode(TarOutputStream.BIGNUMBER_POSIX);
-      tos.setLongFileMode(TarOutputStream.LONGFILE_GNU);
+      tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);
+      tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
       for (Path path : getTopSrcPaths(curDirPath, args, keepAbsolutePath ) ) {
         archive(tos, curDirPath, path, keepAbsolutePath, optionVerbose );
       }
@@ -285,7 +285,7 @@ public class HadoopTar extends Configured implements Tool {
                        boolean dryrun )
                        throws IOException {
     boolean warningPrinted = false;
-    TarInputStream tarin;
+    TarArchiveInputStream tarin;
     if( keeppermission ) {
       FsPermission.setUMask(getConf(), new FsPermission((short)0));
     }
@@ -298,13 +298,13 @@ public class HadoopTar extends Configured implements Tool {
       }
     }
 
-    tarin = new TarInputStream(in);
+    tarin = new TarArchiveInputStream(in);
 
 
     try {
-      TarEntry entry;
+      TarArchiveEntry entry;
       String name;
-      while ((entry = tarin.getNextEntry()) != null) {
+      while ((entry = tarin.getNextTarEntry()) != null) {
         name = entry.getName();
 
         if( optionVerbose ) {
-- 
GitLab