From d8951df897280187f60aac9cf80b2a3230e27aab Mon Sep 17 00:00:00 2001 From: fuchaohong Date: Mon, 14 Apr 2025 18:08:49 +0800 Subject: [PATCH 1/6] HADOOP-19541. Make HadoopArchives support human-friendly units about blocksize and partsize. --- .../apache/hadoop/tools/HadoopArchives.java | 4 +- .../hadoop/tools/TestHadoopArchives.java | 63 ++++++++++++++++++- 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java index 0773c79bdf992..fb400c8385fca 100644 --- a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java +++ b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java @@ -469,8 +469,8 @@ void archive(Path parentPath, List srcPaths, int numFiles = 0; long totalSize = 0; FileSystem fs = parentPath.getFileSystem(conf); - this.blockSize = conf.getLong(HAR_BLOCKSIZE_LABEL, blockSize); - this.partSize = conf.getLong(HAR_PARTSIZE_LABEL, partSize); + this.blockSize = conf.getLongBytes(HAR_BLOCKSIZE_LABEL, blockSize); + this.partSize = conf.getLongBytes(HAR_PARTSIZE_LABEL, partSize); conf.setLong(HAR_BLOCKSIZE_LABEL, blockSize); conf.setLong(HAR_PARTSIZE_LABEL, partSize); conf.set(DST_HAR_LABEL, archiveName); diff --git a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java index 3267a683c275c..80a58d9ff47c6 100644 --- a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java +++ b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java @@ -804,5 +804,66 @@ public void testCopyToLocal() throws Exception { localFs.delete(tmpPath, true); } } - + + @Test + public void testBlockSize() throws Exception { + conf.set(HadoopArchives.HAR_BLOCKSIZE_LABEL, "1m"); + + final String inputPathStr = inputPath.toUri().getPath(); + System.out.println("inputPathStr = " + inputPathStr); + + final String harName = "foo.har"; + final String[] args = + { "-archiveName", harName, "-p", inputPathStr, "*", + archivePath.toString() }; + System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH, + HADOOP_ARCHIVES_JAR); + final HadoopArchives har = new HadoopArchives(conf); + assertEquals(0, ToolRunner.run(har, args)); + + RemoteIterator listFiles = + fs.listFiles(new Path(archivePath.toString() + "/" + harName), false); + while (listFiles.hasNext()) { + LocatedFileStatus next = listFiles.next(); + if (next.getPath().toString().startsWith("part-")) { + // compare blockSize + Assert.assertEquals(1 * 1024 * 1024, next.getBlockSize()); + } + } + } + + @Test + public void testPartfileSize() throws Exception { + conf.set(HadoopArchives.HAR_PARTSIZE_LABEL, "2m"); + + final Path sub1 = new Path(inputPath, "dir1"); + fs.mkdirs(sub1); + for (int i = 0; i < 10; i++) { + createFile(sub1, fs, new byte[1 * 1024 * 1024], sub1.getName(), "file" + i); + } + final String inputPathStr = sub1.toUri().getPath(); + System.out.println("inputPathStr = " + inputPathStr); + + final String harName = "foo.har"; + final String[] args = + { "-archiveName", harName, "-p", inputPathStr, "*", + archivePath.toString() }; + System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH, + HADOOP_ARCHIVES_JAR); + final HadoopArchives har = new HadoopArchives(conf); + assertEquals(0, ToolRunner.run(har, args)); + + RemoteIterator listFiles = + fs.listFiles(new Path(archivePath.toString() + "/" + harName), false); + int i = 0; + while (listFiles.hasNext()) { + LocatedFileStatus next = listFiles.next(); + if (next.getPath().toString().startsWith("part-")) { + // compare partfileSize + assertEquals(2 * 1024 * 1024, next.getLen()); + i++; + } + } + assertEquals(5, i); + } } From 6558b9859437c77c56a2e89849cdcea28171f63f Mon Sep 17 00:00:00 2001 From: fuchaohong Date: Tue, 15 Apr 2025 10:13:34 +0800 Subject: [PATCH 2/6] fix ut. --- .../test/java/org/apache/hadoop/tools/TestHadoopArchives.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java index 80a58d9ff47c6..917c6ea3a6cc3 100644 --- a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java +++ b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java @@ -825,7 +825,7 @@ public void testBlockSize() throws Exception { fs.listFiles(new Path(archivePath.toString() + "/" + harName), false); while (listFiles.hasNext()) { LocatedFileStatus next = listFiles.next(); - if (next.getPath().toString().startsWith("part-")) { + if (next.getPath().toString().matches(".*/part-\\d+$")) { // compare blockSize Assert.assertEquals(1 * 1024 * 1024, next.getBlockSize()); } @@ -858,7 +858,7 @@ public void testPartfileSize() throws Exception { int i = 0; while (listFiles.hasNext()) { LocatedFileStatus next = listFiles.next(); - if (next.getPath().toString().startsWith("part-")) { + if (next.getPath().toString().matches(".*/part-\\d+$")) { // compare partfileSize assertEquals(2 * 1024 * 1024, next.getLen()); i++; From 185fe52b70cb43f1dcd7cc02991c226b8901c0ff Mon Sep 17 00:00:00 2001 From: fuchaohong Date: Tue, 15 Apr 2025 13:44:11 +0800 Subject: [PATCH 3/6] fix UT --- .../test/java/org/apache/hadoop/tools/TestHadoopArchives.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java index 917c6ea3a6cc3..20072b7fed742 100644 --- a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java +++ b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java @@ -841,12 +841,12 @@ public void testPartfileSize() throws Exception { for (int i = 0; i < 10; i++) { createFile(sub1, fs, new byte[1 * 1024 * 1024], sub1.getName(), "file" + i); } - final String inputPathStr = sub1.toUri().getPath(); + final String inputPathStr = inputPath.toUri().getPath(); System.out.println("inputPathStr = " + inputPathStr); final String harName = "foo.har"; final String[] args = - { "-archiveName", harName, "-p", inputPathStr, "*", + { "-archiveName", harName, "-p", inputPathStr, "dir1", archivePath.toString() }; System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH, HADOOP_ARCHIVES_JAR); From 42dfa612694f600f8e12b1cbefc68a40515fad75 Mon Sep 17 00:00:00 2001 From: fuchaohong Date: Fri, 18 Apr 2025 14:30:07 +0800 Subject: [PATCH 4/6] fix ut --- .../hadoop/tools/TestHadoopArchives.java | 35 ------------------- 1 file changed, 35 deletions(-) diff --git a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java index 20072b7fed742..d714468da163b 100644 --- a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java +++ b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java @@ -831,39 +831,4 @@ public void testBlockSize() throws Exception { } } } - - @Test - public void testPartfileSize() throws Exception { - conf.set(HadoopArchives.HAR_PARTSIZE_LABEL, "2m"); - - final Path sub1 = new Path(inputPath, "dir1"); - fs.mkdirs(sub1); - for (int i = 0; i < 10; i++) { - createFile(sub1, fs, new byte[1 * 1024 * 1024], sub1.getName(), "file" + i); - } - final String inputPathStr = inputPath.toUri().getPath(); - System.out.println("inputPathStr = " + inputPathStr); - - final String harName = "foo.har"; - final String[] args = - { "-archiveName", harName, "-p", inputPathStr, "dir1", - archivePath.toString() }; - System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH, - HADOOP_ARCHIVES_JAR); - final HadoopArchives har = new HadoopArchives(conf); - assertEquals(0, ToolRunner.run(har, args)); - - RemoteIterator listFiles = - fs.listFiles(new Path(archivePath.toString() + "/" + harName), false); - int i = 0; - while (listFiles.hasNext()) { - LocatedFileStatus next = listFiles.next(); - if (next.getPath().toString().matches(".*/part-\\d+$")) { - // compare partfileSize - assertEquals(2 * 1024 * 1024, next.getLen()); - i++; - } - } - assertEquals(5, i); - } } From 601d0ea18e67f3dbc7fe5449f748a1ebe8b07283 Mon Sep 17 00:00:00 2001 From: fuchaohong Date: Thu, 24 Apr 2025 13:48:21 +0800 Subject: [PATCH 5/6] remove sout. --- .../test/java/org/apache/hadoop/tools/TestHadoopArchives.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java index d714468da163b..37fad080305c8 100644 --- a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java +++ b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java @@ -810,8 +810,6 @@ public void testBlockSize() throws Exception { conf.set(HadoopArchives.HAR_BLOCKSIZE_LABEL, "1m"); final String inputPathStr = inputPath.toUri().getPath(); - System.out.println("inputPathStr = " + inputPathStr); - final String harName = "foo.har"; final String[] args = { "-archiveName", harName, "-p", inputPathStr, "*", From dcaa2bccf221be1d12e4f5802713df92edce6ee5 Mon Sep 17 00:00:00 2001 From: fuchaohong Date: Thu, 24 Apr 2025 16:54:11 +0800 Subject: [PATCH 6/6] fix. --- .../test/java/org/apache/hadoop/tools/TestHadoopArchives.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java index 37fad080305c8..a4ddf35679733 100644 --- a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java +++ b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java @@ -825,7 +825,7 @@ public void testBlockSize() throws Exception { LocatedFileStatus next = listFiles.next(); if (next.getPath().toString().matches(".*/part-\\d+$")) { // compare blockSize - Assert.assertEquals(1 * 1024 * 1024, next.getBlockSize()); + assertEquals(1 * 1024 * 1024, next.getBlockSize()); } } }