From 7698bfd9807600ee164a896a44aad2417382cfac Mon Sep 17 00:00:00 2001
From: Yorhel <git@yorhel.nl>
Date: Thu, 23 Apr 2009 19:44:37 +0200
Subject: [PATCH] Split path handling into path.c and replaced rpath() with a
 better implementation

This is the first step into replacing all code that relies on PATH_MAX,
more changes will follow.
---
 src/Makefile.am |   4 +-
 src/calc.c      | 124 +-----------------------
 src/path.c      | 243 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/path.h      |  47 ++++++++++
 4 files changed, 296 insertions(+), 122 deletions(-)
 create mode 100644 src/path.c
 create mode 100644 src/path.h

diff --git a/src/Makefile.am b/src/Makefile.am
index e9d0b52..0a3764a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,5 +1,5 @@
 bin_PROGRAMS = ncdu
 
-ncdu_SOURCES = browser.c calc.c delete.c exclude.c help.c main.c util.c
+ncdu_SOURCES = browser.c calc.c delete.c exclude.c help.c main.c path.c util.c
 
-noinst_HEADERS = browser.h calc.h delete.h exclude.h help.h ncdu.h util.h
+noinst_HEADERS = browser.h calc.h delete.h exclude.h help.h ncdu.h path.h util.h
diff --git a/src/calc.c b/src/calc.c
index 8db2d91..ee97de7 100644
--- a/src/calc.c
+++ b/src/calc.c
@@ -28,6 +28,7 @@
 #include "exclude.h"
 #include "util.h"
 #include "browser.h"
+#include "path.h"
 
 #include <string.h>
 #include <stdlib.h>
@@ -45,21 +46,6 @@
 # define S_BLKSIZE 512
 #endif
 
-#ifndef LINK_MAX
-# ifdef _POSIX_LINK_MAX
-#  define LINK_MAX _POSIX_LINK_MAX
-# else
-#  define LINK_MAX 32
-# endif
-#endif
-
-#ifndef S_ISLNK
-# ifndef S_IFLNK
-#  define S_IFLNK 0120000
-# endif
-# define S_ISLNK(x) (x & S_IFLNK)
-#endif
-
 
 /* external vars */
 int  calc_delay = 100;
@@ -77,107 +63,6 @@ long lastupdate;         /* time of the last screen update */
 int anpos;               /* position of the animation string */
 
 
-/* My own implementation of realpath()
-    - assumes that *every* possible path fits in PATH_MAX bytes
-    - does not set errno on error
-    - has not yet been fully tested
-*/
-char *rpath(const char *from, char *to) {
-  char tmp[PATH_MAX], cwd[PATH_MAX], cur[PATH_MAX], app[PATH_MAX];
-  int i, j, l, k, last, ll = 0;
-  struct stat st;
-
-  getcwd(cwd, PATH_MAX);
-  strcpy(cur, from);
-  app[0] = 0;
-
-  loop:
-  /* not an absolute path, add current directory */
-  if(cur[0] != '/') {
-    if(!(cwd[0] == '/' && cwd[1] == 0))
-      strcpy(tmp, cwd);
-    else
-      tmp[0] = 0;
-    if(strlen(cur) + 2 > PATH_MAX - strlen(tmp))
-      return(NULL);
-    strcat(tmp, "/");
-    strcat(tmp, cur);
-  } else
-    strcpy(tmp, cur);
-
-  /* now fix things like '.' and '..' */
-  i = j = last = 0;
-  l = strlen(tmp);
-  while(1) {
-    if(tmp[i] == 0)
-      break;
-    /* . */
-    if(l >= i+2 && tmp[i] == '/' && tmp[i+1] == '.' && (tmp[i+2] == 0 || tmp[i+2] == '/')) {
-      i+= 2;
-      continue;
-    }
-    /* .. */
-    if(l >= i+3 && tmp[i] == '/' && tmp[i+1] == '.' && tmp[i+2] == '.' && (tmp[i+3] == 0 || tmp[i+3] == '/')) {
-      for(k=j; --k>0;)
-        if(to[k] == '/' && k != j-1)
-          break;
-      j -= j-k;
-      if(j < 1) j = 1;
-      i += 3;
-      continue;
-    }
-    /* remove double slashes */
-    if(tmp[i] == '/' && i>0 && tmp[i-1] == '/') {
-      i++;
-      continue;
-    }
-    to[j++] = tmp[i++];
-  }
-  /* remove leading slashes */
-  while(--j > 0) {
-    if(to[j] != '/')
-      break;
-  }
-  to[j+1] = 0;
-  /* make sure we do have something left in case our path is / */
-  if(to[0] == 0) {
-    to[0] = '/';
-    to[1] = 0;
-  }
-  /* append 'app' */
-  if(app[0] != 0)
-    strcat(to, app);
-
-  j = strlen(to);
-  /* check for symlinks */
-  for(i=1; i<=j; i++) {
-    if(to[i] == '/' || to[i] == 0) {
-      strncpy(tmp, to, i);
-      tmp[i] = 0;
-      if(lstat(tmp, &st) < 0)
-        return(NULL);
-      if(S_ISLNK(st.st_mode)) {
-        if(++ll > LINK_MAX || (k = readlink(tmp, cur, PATH_MAX)) < 0)
-          return(NULL);
-        cur[k] = 0;
-        if(to[i] != 0)
-          strcpy(app, &to[i]);
-        strcpy(cwd, tmp);
-        for(k=strlen(cwd); --k>0;)
-          if(cwd[k] == '/')
-            break;
-        cwd[k] = 0;
-        goto loop;
-      }
-      if(!S_ISDIR(st.st_mode))
-        return(NULL);
-    }
-  }
-
-  return(to);
-}
-
-
 int calc_item(struct dir *par, char *path, char *name) {
   char tmp[PATH_MAX];
   struct dir *t, *d;
@@ -400,12 +285,12 @@ int calc_key(int ch) {
 
 
 void calc_process() {
-  char tmp[PATH_MAX];
+  char *tmp;
   struct stat fs;
   struct dir *t;
 
   /* check root directory */
-  if(rpath(curpath, tmp) == NULL || lstat(tmp, &fs) != 0 || !S_ISDIR(fs.st_mode)) {
+  if((tmp = path_real(curpath)) == NULL || lstat(tmp, &fs) != 0 || !S_ISDIR(fs.st_mode)) {
     failed = 1;
     strcpy(errmsg, "Directory not found");
     goto fail;
@@ -416,8 +301,7 @@ void calc_process() {
   t->size = fs.st_blocks * S_BLKSIZE;
   t->asize = fs.st_size;
   t->flags |= FF_DIR;
-  t->name = (char *) malloc(strlen(tmp)+1);
-  strcpy(t->name, orig ? orig->name : tmp);
+  t->name = tmp;
   root = t;
   curdev = fs.st_dev;
 
diff --git a/src/path.c b/src/path.c
new file mode 100644
index 0000000..6d07d3d
--- /dev/null
+++ b/src/path.c
@@ -0,0 +1,243 @@
+/* ncdu - NCurses Disk Usage
+
+  Copyright (c) 2007-2009 Yoran Heling
+
+  Permission is hereby granted, free of charge, to any person obtaining
+  a copy of this software and associated documentation files (the
+  "Software"), to deal in the Software without restriction, including
+  without limitation the rights to use, copy, modify, merge, publish,
+  distribute, sublicense, and/or sell copies of the Software, and to
+  permit persons to whom the Software is furnished to do so, subject to
+  the following conditions:
+
+  The above copyright notice and this permission notice shall be included
+  in all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+
+#include "path.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <limits.h>
+#include <dirent.h>
+
+#ifndef LINK_MAX
+# ifdef _POSIX_LINK_MAX
+#  define LINK_MAX _POSIX_LINK_MAX
+# else
+#  define LINK_MAX 32
+# endif
+#endif
+
+
+#define RPATH_CNKSZ 256
+
+
+/* splits a path into components and does a bit of cannonicalization.
+  a pointer to a reversed array of components is stored in res and the
+  number of components is returned.
+  cur is modified, and res has to be free()d after use */
+int path_split(char *cur, char ***res) {
+  char **old;
+  int i, j, n;
+
+  cur += strspn(cur, "/");
+  n = strlen(cur);
+
+  /* replace slashes with zeros */
+  for(i=j=0; i<n; i++)
+    if(cur[i] == '/') {
+      cur[i] = 0;
+      if(cur[i-1] != 0)
+        j++;
+    }
+
+  /* create array of the components */
+  old = malloc((j+1)*sizeof(char *));
+  *res = malloc((j+1)*sizeof(char *));
+  for(i=j=0; i<n; i++)
+    if(i == 0 || (cur[i-1] == 0 && cur[i] != 0))
+      old[j++] = cur+i;
+
+  /* re-order and remove parts */
+  for(i=n=0; --j>=0; ) {
+    if(!strcmp(old[j], "..")) {
+      n++;
+      continue;
+    }
+    if(!strcmp(old[j], "."))
+      continue;
+    if(n) {
+      n--;
+      continue;
+    }
+    (*res)[i++] = old[j];
+  }
+  free(old);
+  return i;
+}
+
+
+/* copies path and prepends cwd if needed, to ensure an absolute path
+   return value has to be free()'d manually */
+char *path_absolute(const char *path) {
+  int i, n;
+  char *ret;
+
+  /* not an absolute path? prepend cwd */
+  if(path[0] != '/') {
+    n = RPATH_CNKSZ;
+    ret = malloc(n);
+    errno = 0;
+    while(!getcwd(ret, n) && errno == ERANGE) {
+      n += RPATH_CNKSZ;
+      ret = realloc(ret, n);
+      errno = 0;
+    }
+    if(errno) {
+      free(ret);
+      return NULL;
+    }
+
+    i = strlen(path) + strlen(ret) + 2;
+    if(i > n)
+      ret = realloc(ret, i);
+    strcat(ret, "/");
+    strcat(ret, path);
+  /* otherwise, just make a copy */
+  } else {
+    ret = malloc(strlen(path)+1);
+    strcpy(ret, path);
+  }
+  return ret;
+}
+
+
+/* NOTE: cwd and the memory cur points to are unreliable after calling this function */
+char *path_real_rec(char *cur, int *links) {
+  int i, j, n, tmpl, lnkl = 0;
+  char **arr, *tmp, *lnk, *ret = NULL;
+
+  tmpl = strlen(cur);
+  tmp = malloc(tmpl);
+
+  /* split path */
+  i = path_split(cur, &arr);
+
+  /* re-create full path */
+  strcpy(tmp, "/");
+  if(i > 0) {
+    j = 1;
+    lnkl = RPATH_CNKSZ;
+    lnk = malloc(lnkl);
+    if(chdir("/") < 0)
+      goto path_real_done;
+  }
+
+  while(--i>=0) {
+    if(arr[i][0] == 0)
+      continue;
+    /* check for symlink */
+    while((n = readlink(arr[i], lnk, lnkl)) == lnkl || (n < 0 && errno == ERANGE)) {
+      lnkl += RPATH_CNKSZ;
+      lnk = realloc(lnk, lnkl);
+    }
+    if(n < 0 && errno != EINVAL)
+      goto path_real_done;
+    if(n > 0) {
+      if(++*links > LINK_MAX) {
+        errno = ELOOP;
+        goto path_real_done;
+      }
+      lnk[n] = 0;
+      /* create new path and call path_real_rec() again */
+      if(lnk[0] != '/') {
+        n += strlen(tmp) + 1;
+        if(tmpl < n) {
+          tmpl = n;
+          tmp = realloc(tmp, tmpl);
+        }
+        strcat(tmp, lnk);
+      } else
+        strcpy(tmp, lnk);
+      ret = path_real_rec(tmp, links);
+      goto path_real_done;
+    }
+    /* not a symlink, append component and go to the next part */
+    strcat(tmp, arr[i]);
+    if(i) {
+      if(chdir(arr[i]) < 0)
+        goto path_real_done;
+      strcat(tmp, "/");
+    }
+  }
+  ret = tmp;
+
+path_real_done:
+  if(ret != tmp)
+    free(tmp);
+  if(lnkl > 0)
+    free(lnk);
+  free(arr);
+  return ret;
+}
+
+
+char *path_real(const char *orig) {
+  int links = 0;
+  char *tmp, *ret;
+  DIR *d;
+
+  if(orig == NULL)
+    return NULL;
+  if((d = opendir(".")) == NULL)
+    return NULL;
+
+  tmp = path_absolute(orig);
+  ret = path_real_rec(tmp, &links);
+  free(tmp);
+  fchdir(dirfd(d));
+  closedir(d);
+  return ret;
+}
+
+
+int path_chdir(const char *path) {
+  char **arr, *cur;
+  int i, r = -1;
+  DIR *d;
+
+  if((d = opendir(".")) == NULL)
+    return -1;
+
+  if((cur = path_absolute(path)) == NULL)
+    return -1;
+
+  i = path_split(cur, &arr);
+  if(chdir("/") < 0)
+    goto path_chdir_done;
+  while(--i >= 0)
+    if(chdir(arr[i]) < 0)
+      goto path_chdir_done;
+  r = 0;
+
+path_chdir_done:
+  if(r < 0)
+    fchdir(dirfd(d));
+  free(cur);
+  free(arr);
+  return r;
+}
+
diff --git a/src/path.h b/src/path.h
new file mode 100644
index 0000000..c3312eb
--- /dev/null
+++ b/src/path.h
@@ -0,0 +1,47 @@
+/* ncdu - NCurses Disk Usage
+
+  Copyright (c) 2007-2009 Yoran Heling
+
+  Permission is hereby granted, free of charge, to any person obtaining
+  a copy of this software and associated documentation files (the
+  "Software"), to deal in the Software without restriction, including
+  without limitation the rights to use, copy, modify, merge, publish,
+  distribute, sublicense, and/or sell copies of the Software, and to
+  permit persons to whom the Software is furnished to do so, subject to
+  the following conditions:
+
+  The above copyright notice and this permission notice shall be included
+  in all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+/*
+ path.c reimplements realpath() and chdir(), both functions accept
+ arbitrary long path names not limited by PATH_MAX.
+
+ Caveats/bugs:
+  - path_real uses chdir(), so it's not thread safe
+  - Process requires +x access for all directory components
+  - Potentionally slow
+  - Doesn't check return value of malloc() and realloc()
+  - path_real doesn't check for the existance of the last component
+*/
+
+#ifndef _path_h
+#define _path_h
+
+/* path_real reimplements realpath(). The returned string is allocated
+   by malloc() and should be manually free()d by the programmer. */
+extern char *path_real(const char *);
+
+/* works exactly the same as chdir() */
+extern int   path_chdir(const char *);
+
+#endif
-- 
GitLab