From 6de0a8ec00cfab1a7664bc5a934a9d4e7207e77d Mon Sep 17 00:00:00 2001
From: Yorhel <git@yorhel.nl>
Date: Mon, 11 May 2009 19:48:32 +0200
Subject: [PATCH] Use correct hard link information after partial recalculation
 or deletion

Hard link detection is now done in a separate pass on the in-memory tree,
and duplicates can be 'removed' and 're-added' on the fly. When making any
changes in the tree, all hard links are re-added before the operation and
removed again afterwards.

While this guarantees that all hard link information is correct, it does
have a few drawbacks. I can currently think of two:

 1. It's not the most efficient way to do it, and may be quite slow on
    large trees. Will have to do some benchmarks later to see whether
    it is anything to be concerned about.

 2. The first encountered item is considered as 'counted' and all items
    encountered after that are considered as 'duplicate'. Because the
    order in which we traverse the tree doesn't always have to be the
    same, the items that will be considered as 'duplicate' can vary with
    each deletion or re-calculation. This might cause confusion for
    people who aren't aware of how hard links work.
---
 ChangeLog    |  3 ++
 src/calc.c   | 48 ++++++++-----------------------
 src/delete.c |  9 +++++-
 src/global.h |  8 ++++--
 src/util.c   | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/util.h   |  6 ++++
 6 files changed, 116 insertions(+), 39 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 18b2d27..e78d551 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,6 @@
+git - ?
+  - Implemented hard link detection
+
 1.5 - 2009-05-02
 	- Fixed incorrect apparent size on directory refresh
 	- Browsing keys now work while file info window is displayed
diff --git a/src/calc.c b/src/calc.c
index 0c05a05..a09ab6c 100644
--- a/src/calc.c
+++ b/src/calc.c
@@ -53,11 +53,7 @@ struct dir *root;        /* root directory struct we're calculating */
 struct dir *orig;        /* original directory, when recalculating */
 dev_t curdev;            /* current device we're calculating on */
 int anpos;               /* position of the animation string */
-struct link_inode {      /* list of all non-dirs with nlink > 1 */
-  dev_t dev;
-  ino_t ino;
-} *links = NULL;
-int curpathl = 0, lasterrl = 0, linksl = 0, linkst = 0;
+int curpathl = 0, lasterrl = 0;
 
 
 
@@ -136,33 +132,14 @@ int calc_item(struct dir *par, char *name) {
     for(t=d->parent; t!=NULL; t=t->parent)
       t->items++;
 
-  /* check for hard links.
-     An item is only considered a hard link if it's not a directory,
-     has st_nlink > 1, and is already present in the links array */
-  if(!S_ISDIR(fs.st_mode) && fs.st_nlink > 1) {
-    for(i=0; i<linkst; i++)
-      if(links[i].dev == fs.st_dev && links[i].ino == fs.st_ino)
-        break;
-    /* found in the list, set link flag (so the size won't get counted) */
-    if(i != linkst)
-      d->flags |= FF_HLNK;
-    /* not found, add to the list */
-    else {
-      if(++linkst > linksl) {
-        linksl *= 2;
-        if(!linksl) {
-          linksl = 64;
-          links = malloc(linksl*sizeof(struct link_inode));
-        } else
-          links = realloc(links, linksl*sizeof(struct link_inode));
-      }
-      links[i].dev = fs.st_dev;
-      links[i].ino = fs.st_ino;
-    }
-  }
+  /* Provide the necessary information for hard link checking */
+  d->ino = fs.st_ino;
+  d->dev = fs.st_dev;
+  if(!S_ISDIR(fs.st_mode) && fs.st_nlink > 1)
+    d->flags |= FF_HLNKC;
 
   /* count the size */
-  if(!(d->flags & FF_EXL || d->flags & FF_OTHFS || d->flags & FF_HLNK)) {
+  if(!(d->flags & FF_EXL || d->flags & FF_OTHFS)) {
     d->size = fs.st_blocks * S_BLKSIZE;
     d->asize = fs.st_size;
     for(t=d->parent; t!=NULL; t=t->parent) {
@@ -404,10 +381,6 @@ void calc_process() {
   if(!path[1] && strcmp(name, "."))
     free(name);
   free(path);
-  if(linksl) {
-    linksl = linkst = 0;
-    free(links);
-  }
 
   /* success */
   if(!n && !failed) {
@@ -417,7 +390,6 @@ void calc_process() {
       strcpy(errmsg, "Directory empty.");
       goto calc_fail;
     }
-    browse_init(root->sub);
 
     /* update references and free original item */
     if(orig) {
@@ -440,6 +412,9 @@ void calc_process() {
       }
       freedir(orig);
     }
+
+    link_del(root);
+    browse_init(root->sub);
     return;
   }
 
@@ -455,7 +430,8 @@ calc_fail:
 
 void calc_init(char *dir, struct dir *org) {
   failed = anpos = 0;
-  orig = org;
+  if((orig = org) != NULL)
+    link_add(orig);
   if(curpathl == 0) {
     curpathl = strlen(dir)+1;
     curpath = malloc(curpathl);
diff --git a/src/delete.c b/src/delete.c
index a5b5b2d..8f5e30c 100644
--- a/src/delete.c
+++ b/src/delete.c
@@ -216,13 +216,19 @@ void delete_process() {
     if(input_handle(0))
       return browse_init(root);
 
+  /* temporarily re-add hard links, so we won't lose sizes in case we delete
+     a file of which another file outside this directory was marked as duplicate */
+  link_add(root);
+
   /* chdir */
   if(path_chdir(getpath(root->parent)) < 0) {
     state = DS_FAILED;
     lasterrno = errno;
     while(state == DS_FAILED)
-      if(input_handle(0))
+      if(input_handle(0)) {
+        link_del(root);
         return;
+      }
   }
 
   /* delete */
@@ -235,6 +241,7 @@ void delete_process() {
     if(nextsel)
       nextsel->flags |= FF_BSEL;
   }
+  link_del(root);
 }
 
 
diff --git a/src/global.h b/src/global.h
index dfa7ff6..43bbb3f 100644
--- a/src/global.h
+++ b/src/global.h
@@ -29,6 +29,7 @@
 #include "config.h"
 #include <stdio.h>
 #include <sys/types.h>
+#include <sys/stat.h>
 
 /* File Flags (struct dir -> flags) */
 #define FF_DIR    0x01
@@ -38,7 +39,8 @@
 #define FF_EXL    0x10 /* excluded using exlude patterns */
 #define FF_SERR   0x20 /* error in subdirectory */
 #define FF_HLNK   0x40 /* hard link (same file already encountered before) */
-#define FF_BSEL   0x80 /* selected */
+#define FF_HLNKC  0x80 /* hard link candidate (file with st_nlink > 1) */
+#define FF_BSEL  0x100 /* selected */
 
 /* Program states */
 #define ST_CALC   0
@@ -54,7 +56,9 @@ struct dir {
   char *name;
   off_t size, asize;
   unsigned long items;
-  unsigned char flags;
+  unsigned short flags;
+  dev_t dev;
+  ino_t ino;
 }; 
 
 /* program state */
diff --git a/src/util.c b/src/util.c
index a19fea0..6c95ba0 100644
--- a/src/util.c
+++ b/src/util.c
@@ -38,6 +38,9 @@ char fullsizedat[20]; /* max: 999.999.999.999.999 */
 char *getpathdat;
 int getpathdatl = 0;
 
+struct dir **links;
+int linksl = 0, linkst = 0;
+
 
 char *cropstr(const char *from, int s) {
   int i, j, o = strlen(from);
@@ -246,3 +249,81 @@ char *getpath(struct dir *cur) {
   return getpathdat;
 }
 
+
+/* act =  0  -> just fill the links array
+   act =  1  -> fill array and remove duplicates
+   act = -1  -> use array to re-add duplicates */
+void link_list_rec(struct dir *d, int act) {
+  struct dir *t;
+  int i;
+
+  /* recursion, check sub directories */
+  for(t=d->sub; t!=NULL; t=t->next)
+    link_list_rec(t, act);
+
+  /* not a link candidate? ignore */
+  if(!(d->flags & FF_HLNKC))
+    return;
+
+  /* check against what we've found so far */
+  for(i=0; i<linkst; i++)
+    if(links[i]->dev == d->dev && links[i]->ino == d->ino)
+      break;
+
+  /* found in the list, set link flag and set size to zero */
+  if(act == 1 && i != linkst) {
+    d->flags |= FF_HLNK;
+    for(t=d->parent; t!=NULL; t=t->parent) {
+      t->size -= d->size;
+      t->asize -= d->asize;
+    }
+    d->size = d->asize = 0;
+    return;
+  }
+
+  /* found in the list, reset flag and re-add size */
+  if(act == -1 && i != linkst && d->flags & FF_HLNK) {
+    d->flags -= FF_HLNK;
+    d->size = links[i]->size;
+    d->asize = links[i]->asize;
+    for(t=d->parent; t!=NULL; t=t->parent) {
+      t->size += d->size;
+      t->asize += d->asize;
+    }
+  }
+
+  /* not found, add to the list */
+  if(act == 1 || (act == 0 && !(d->flags & FF_HLNK))) {
+    if(++linkst > linksl) {
+      linksl *= 2;
+      if(!linksl) {
+        linksl = 64;
+        links = malloc(linksl*sizeof(struct dir *));
+      } else
+        links = realloc(links, linksl*sizeof(struct dir *));
+    }
+    links[i] = d;
+  }
+}
+
+
+void link_del(struct dir *par) {
+  while(par->parent != NULL)
+    par = par->parent;
+  link_list_rec(par, 1);
+  linkst = 0;
+}
+
+
+void link_add(struct dir *par) {
+  while(par->parent != NULL)
+    par = par->parent;
+  /* In order to correctly re-add the duplicates, we'll have to pass the entire
+     tree twice, one time to get a list of all links, second time to re-add them */
+  link_list_rec(par, 0);
+  link_list_rec(par, -1);
+  linkst = 0;
+}
+
+
+
diff --git a/src/util.h b/src/util.h
index e83c25e..95cf7e1 100644
--- a/src/util.h
+++ b/src/util.h
@@ -78,5 +78,11 @@ void freedir(struct dir *);
    returned pointer will be overwritten with a subsequent call */
 char *getpath(struct dir *);
 
+/* removes all hard links from a tree */
+void links_del(struct dir *);
+
+/* re-adds all hard links in a tree */
+void link_add(struct dir *);
+
 #endif
 
-- 
GitLab