From 267de619ba6d2f07a0d28790059f746e0bf83f1e Mon Sep 17 00:00:00 2001
From: Yorhel <git@yorhel.nl>
Date: Thu, 6 Sep 2012 12:41:50 +0200
Subject: [PATCH] dir_import.c: Only call input_handle() once every 32 read
 items

Some measurements importing a gzip-compressed file (zcat .. | ncdu -f -)
containing a bit under 6 million items and a few choices of how often to
call input_handle():

Called on every item:

  real    0m13.745s
  user    0m12.576s
  sys     0m4.566s

Called on every 8 items:

  real    0m7.932s
  user    0m9.636s
  sys     0m1.623s

Called on every 16 items:

  real    0m7.559s
  user    0m9.553s
  sys     0m1.323s

Called on every 32 items:

  real    0m7.279s
  user    0m9.353s
  sys     0m1.277s

Called on every 64 items:

  real    0m7.166s
  user    0m9.389s
  sys     0m1.117s

Called on every 256 items:

  real    0m7.073s
  user    0m9.439s
  sys     0m1.027s

32 seemed like a good compromise.
---
 src/dir_import.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/dir_import.c b/src/dir_import.c
index 872ff6d..5d365f5 100644
--- a/src/dir_import.c
+++ b/src/dir_import.c
@@ -502,7 +502,11 @@ static int iteminfo(struct dir **item, uint64_t dev, int isdir) {
   E(!*d->name, "No name field present in item information object");
   *item = d;
   ctx->items++;
-  return input_handle(1);
+  /* Only call input_handle() once for every 32 items. Importing items is so
+   * fast that the time spent in input_handle() dominates when called every
+   * time. Don't set this value too high, either, as feedback should still be
+   * somewhat responsive when our import data comes from a slow-ish source. */
+  return !(ctx->items & 31) ? input_handle(1) : 0;
 }
 
 
-- 
GitLab