diff --git a/Makefile.am b/Makefile.am index 870f73f2c6dafd089e4b53e26ed60fbfd0b100ee..44633951a4f9b4af8a1cc973484e09a92e82d0ae 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6,6 +6,7 @@ ncdu_SOURCES=\ src/dirlist.c\ src/dir_common.c\ src/dir_export.c\ + src/dir_import.c\ src/dir_mem.c\ src/dir_scan.c\ src/exclude.c\ diff --git a/src/dir.h b/src/dir.h index a5070a3cda494417a06165f718bf9bdf955e8021..0ef7875bf5e856a87f4037dda42923ea04447dfc 100644 --- a/src/dir.h +++ b/src/dir.h @@ -101,10 +101,15 @@ void dir_mem_init(struct dir *); int dir_export_init(const char *fn); +/* Function set by input code. Returns dir_output.final(). */ +int (*dir_process)(); + /* Scanning a live directory */ extern int dir_scan_smfs; void dir_scan_init(const char *path); -int dir_scan_process(); + +/* Importing a file */ +int dir_import_init(const char *fn); /* The currently configured output functions. */ diff --git a/src/dir_import.c b/src/dir_import.c new file mode 100644 index 0000000000000000000000000000000000000000..17c8fcc3f324e6892b364854b136e78585a76867 --- /dev/null +++ b/src/dir_import.c @@ -0,0 +1,576 @@ +/* ncdu - NCurses Disk Usage + + Copyright (c) 2007-2012 Yoran Heling + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +*/ + +/* This JSON parser has the following limitations: + * - No support for character encodings incompatible with ASCII (e.g. + * UTF-16/32) + * - Doesn't validate UTF-8 correctness (in fact, besides the ASCII part this + * parser doesn't know anything about encoding). + * - Doesn't validate that there are no duplicate keys in JSON objects. + * - Isn't very strict with validating non-integer numbers. + * - Does not check nesting level, easily allows stack overflow. (TODO: FIX!) + */ + +#include "global.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <limits.h> + + +/* Max. length of any JSON string we're interested in. A string may of course + * be larger, we're not going to read more than MAX_VAL in memory. If a string + * we're interested in (e.g. a file name) is longer than this, reading the + * import will results in an error. */ +#define MAX_VAL (32*1024) + +/* Minimum number of bytes we request from fread() */ +#define MIN_READ_SIZE 1024 + +/* Read buffer size. Must be at least 2*MIN_READ_SIZE, everything larger + * improves performance. */ +#define READ_BUF_SIZE (32*1024) + + +/* Use a struct for easy batch-allocation and deallocation of state data. */ +struct ctx { + FILE *stream; + + int line; + int byte; + int eof; + int items; + char *buf; /* points into readbuf, always zero-terminated. */ + char *lastfill; /* points into readbuf, location of the zero terminator. */ + + char val[MAX_VAL]; + char readbuf[READ_BUF_SIZE]; +} *ctx; + + +/* Fills readbuf with data from the stream. *buf will have at least n (< + * READ_BUF_SIZE) bytes available, unless the stream reached EOF or an error + * occured. If the file data contains a null-type, this is considered an error. + * Returns 0 on success, non-zero on error. */ +static int fill(int n) { + int r; + + if(ctx->eof) + return 0; + + r = READ_BUF_SIZE-(ctx->lastfill - ctx->readbuf); /* number of bytes left in the buffer */ + if(n < r) + n = r-1; + if(n < MIN_READ_SIZE) { + r = ctx->lastfill - ctx->buf; /* number of unread bytes left in the buffer */ + memcpy(ctx->readbuf, ctx->buf, r); + ctx->lastfill = ctx->readbuf + r; + ctx->buf = ctx->readbuf; + n = READ_BUF_SIZE-r-1; + } + + do { + r = fread(ctx->lastfill, 1, n, ctx->stream); + if(r != n) { + if(feof(ctx->stream)) + ctx->eof = 1; + else if(ferror(ctx->stream)) { + dir_seterr("Read error: %s", strerror(errno)); + return 1; + } + } + + ctx->lastfill[r] = 0; + if(strlen(ctx->lastfill) != (size_t)r) { + dir_seterr("Zero-byte found in JSON stream"); + return 1; + } + ctx->lastfill += r; + n -= r; + } while(!ctx->eof && n > MIN_READ_SIZE); + + return 0; +} + + +/* Two macros that break function calling behaviour, but are damn convenient */ +#define E(_x, _m) do {\ + if((_x) && !dir_fatalerr) {\ + dir_seterr("Line %d byte %d: %s", ctx->line, ctx->byte, _m);\ + return 1;\ + }\ + } while(0) + +#define C(_x) do {\ + if(_x)\ + return 1;\ + } while(0) + + +/* Require at least n bytes in the buffer, throw an error on early EOF. + * (Macro to quickly handle the common case) */ +#define rfill1 (!*ctx->buf && _rfill(1)) +#define rfill(_n) ((ctx->lastfill - ctx->buf < (_n)) && _rfill(_n)) + +static int _rfill(int n) { + C(fill(n)); + E(ctx->lastfill - ctx->buf < n, "Unexpected EOF"); + return 0; +} + + +/* Consumes n bytes from the buffer. */ +static inline void con(int n) { + ctx->buf += n; + ctx->byte += n; +} + + +/* Consumes any whitespace. If *ctx->buf == 0 after this function, we've reached EOF. */ +static int cons() { + while(1) { + C(!*ctx->buf && fill(1)); + + switch(*ctx->buf) { + case 0x0A: + /* Special-case the newline-character with respect to consuming stuff + * from the buffer. This is the only function which *can* consume the + * newline character, so it's more efficient to handle it in here rather + * than in the more general con(). */ + ctx->buf++; + ctx->line++; + ctx->byte = 0; + break; + case 0x20: + case 0x09: + case 0x0D: + con(1); + break; + default: + return 0; + } + } +} + + +static int rstring_esc(char **dest, int *destlen) { + unsigned int n; + +#define ap(c) if(*destlen > 1) { *((*dest)++) = c; (*destlen)--; } + switch(*ctx->buf) { + case '"': ap('"'); con(1); break; + case '\\': ap('\\'); con(1); break; + case '/': ap('/'); con(1); break; + case 'b': ap(0x08); con(1); break; + case 'f': ap(0x0C); con(1); break; + case 'n': ap(0x0A); con(1); break; + case 'r': ap(0x0D); con(1); break; + case 't': ap(0x09); con(1); break; + case 'u': + C(rfill(5)); +#define hn(n) (n >= '0' && n <= '9' ? n-'0' : n >= 'A' && n <= 'F' ? n-'A'+10 : n >= 'a' && n <= 'f' ? n-'a'+10 : 1<<16) + n = (hn(ctx->buf[1])<<12) + (hn(ctx->buf[2])<<8) + (hn(ctx->buf[3])<<4) + hn(ctx->buf[4]); +#undef hn + if(n <= 0x007F) { + ap(n); + } else if(n <= 0x07FF) { + ap(0xC0 | (n>>6)); + ap(0x80 | (n & 0x3F)); + } else if(n <= 0xFFFF) { + ap(0xE0 | (n>>12)); + ap(0x80 | ((n>>6) & 0x3F)); + ap(0x80 | (n & 0x3F)); + } else // this happens if there was an invalid character (n >= (1<<16)) + E(1, "Invalid character in \\u escape"); + con(5); + break; + default: + E(1, "Invalid escape sequence"); + } +#undef ap + return 0; +} + + +/* Parse a JSON string and write it to *dest (max. destlen). Consumes but + * otherwise ignores any characters if the string is longer than destlen. *dest + * will be null-terminated, dest[destlen-1] = 0 if the string was cut just long + * enough of was cut off. That byte will be left untouched if the string is + * small enough. */ +static int rstring(char *dest, int destlen) { + C(rfill1); + E(*ctx->buf != '"', "Expected string"); + con(1); + + while(1) { + C(rfill1); + if(*ctx->buf == '"') + break; + if(*ctx->buf == '\\') { + con(1); + C(rstring_esc(&dest, &destlen)); + continue; + } + E((unsigned char)*ctx->buf <= 0x1F || (unsigned char)*ctx->buf == 0x7F, "Invalid character"); + if(destlen > 1) { + *(dest++) = *ctx->buf; + destlen--; + } + con(1); + } + con(1); + if(destlen > 0) + *dest = 0; + return 0; +} + + +/* Parse and consume a JSON integer. Throws an error if the value does not fit + * in an uint64_t, is not an integer or is larger than 'max'. */ +static int rint64(uint64_t *val, uint64_t max) { + uint64_t v; + int haschar = 0; + *val = 0; + while(1) { + C(!*ctx->buf && fill(1)); + if(*ctx->buf == '0' && !haschar) { + con(1); + break; + } + if(*ctx->buf >= '0' && *ctx->buf <= '9') { + haschar = 1; + v = (*val)*10 + (*ctx->buf-'0'); + E(v < *val, "Invalid (positive) integer"); + *val = v; + con(1); + continue; + } + E(!haschar, "Invalid (positive) integer"); + break; + } + E(*val > max, "Integer out of range"); + return 0; +} + + +/* Parse and consume a JSON number. The result is discarded. + * TODO: Improve validation. */ +static int rnum() { + int haschar = 0; + C(rfill1); + while(1) { + C(!*ctx->buf && fill(1)); + if(*ctx->buf == 'e' || *ctx->buf == 'E' || *ctx->buf == '-' || *ctx->buf == '+' || (*ctx->buf >= '0' && *ctx->buf <= '9')) { + haschar = 1; + con(1); + } else { + E(!haschar, "Invalid JSON value"); + break; + } + } + return 0; +} + + +static int rlit(const char *v, int len) { + C(rfill(len)); + E(strncmp(ctx->buf, v, len) != 0, "Invalid JSON value"); + con(len); + return 0; +} + + +/* Parse the "<space> <string> <space> : <space>" part of an object key. */ +static int rkey(char *dest, int destlen) { + C(cons() || rstring(dest, destlen) || cons()); + E(*ctx->buf != ':', "Expected ':'"); + con(1); + return cons(); +} + + +/* (Recursively) parse and consume any JSON value. The result is discarded. */ +static int rval() { + C(rfill1); + switch(*ctx->buf) { + case 't': /* true */ + C(rlit("true", 4)); + break; + case 'f': /* false */ + C(rlit("false", 5)); + break; + case 'n': /* null */ + C(rlit("null", 4)); + break; + case '"': /* string */ + C(rstring(NULL, 0)); + break; + case '{': /* object */ + con(1); + while(1) { + C(rkey(NULL, 0) || rval() || cons()); + if(*ctx->buf == '}') + break; + E(*ctx->buf != ',', "Expected ',' or '}'"); + con(1); + } + con(1); + break; + case '[': /* array */ + con(1); + while(1) { + C(cons() || rval() || cons()); + if(*ctx->buf == ']') + break; + E(*ctx->buf != ',', "Expected ',' or ']'"); + con(1); + } + con(1); + break; + default: /* assume number */ + C(rnum()); + break; + } + return 0; +} + + +/* Consumes everything up to the root item, and checks that this item is a dir. */ +static int header() { + uint64_t v; + + C(cons()); + E(*ctx->buf != '[', "Expected JSON array"); + con(1); + C(cons() || rint64(&v, 10000) || cons()); + E(v != 1, "Incompatible major format version"); + E(*ctx->buf != ',', "Expected ','"); + con(1); + C(cons() || rint64(&v, 10000) || cons()); /* Ignore the minor version for now */ + E(*ctx->buf != ',', "Expected ','"); + con(1); + /* Metadata block is currently ignored */ + C(cons() || rval() || cons()); + E(*ctx->buf != ',', "Expected ','"); + con(1); + + C(cons()); + E(*ctx->buf != '[', "Top-level item must be a directory"); + + return 0; +} + + +static int item(uint64_t); + +/* Read and add dir contents */ +static int itemdir(uint64_t dev) { + while(1) { + C(cons()); + if(*ctx->buf == ']') + break; + E(*ctx->buf != ',', "Expected ',' or ']'"); + con(1); + C(cons()); + item(dev); + } + con(1); + C(cons()); + return 0; +} + + +static int iteminfo(struct dir **item, uint64_t dev, int isdir) { + static struct dir dir; + struct dir *tmp, *d = &dir; + uint64_t iv; + + memset(d, 0, sizeof(struct dir)); + d->flags |= isdir ? FF_DIR : FF_FILE; + d->dev = dev; + + E(*ctx->buf != '{', "Expected JSON object"); + con(1); + + while(1) { + C(rkey(ctx->val, MAX_VAL)); + /* TODO: strcmp() in this fashion isn't very fast. */ + if(strcmp(ctx->val, "name") == 0) { /* name */ + ctx->val[MAX_VAL-1] = 1; + C(rstring(ctx->val, MAX_VAL)); + E(ctx->val[MAX_VAL-1] != 1, "Too large string value"); + tmp = dir_createstruct(ctx->val); + memcpy(tmp, d, SDIRSIZE-1); + d = tmp; + } else if(strcmp(ctx->val, "asize") == 0) { /* asize */ + C(rint64(&iv, INT64_MAX)); + d->asize = iv; + } else if(strcmp(ctx->val, "dsize") == 0) { /* dsize */ + C(rint64(&iv, INT64_MAX)); + d->size = iv; + } else if(strcmp(ctx->val, "dev") == 0) { /* dev */ + C(rint64(&iv, UINT64_MAX)); + d->dev = iv; + } else if(strcmp(ctx->val, "ino") == 0) { /* ino */ + C(rint64(&iv, UINT64_MAX)); + d->ino = iv; + } else if(strcmp(ctx->val, "hlnkc") == 0) { /* hlnkc */ + if(*ctx->buf == 't') { + C(rlit("true", 4)); + d->flags |= FF_HLNKC; + } else + C(rlit("false", 5)); + } else if(strcmp(ctx->val, "read_error") == 0) { /* read_error */ + if(*ctx->buf == 't') { + C(rlit("true", 4)); + d->flags |= FF_ERR; + } else + C(rlit("false", 5)); + } else if(strcmp(ctx->val, "excluded") == 0) { /* excluded */ + C(rstring(ctx->val, 8)); + if(strcmp(ctx->val, "otherfs") == 0) + d->flags |= FF_OTHFS; + else + d->flags |= FF_EXL; + } else if(strcmp(ctx->val, "notreg") == 0) { /* notreg */ + if(*ctx->buf == 't') { + C(rlit("true", 4)); + d->flags &= ~FF_FILE; + } else + C(rlit("false", 5)); + } else + C(rval()); + + C(cons()); + if(*ctx->buf == '}') + break; + E(*ctx->buf != ',', "Expected ',' or '}'"); + con(1); + } + con(1); + + E(!*d->name, "No name field present in item information object"); + *item = d; + ctx->items++; + return input_handle(1); +} + + +/* Recursively reads a file or directory item */ +static int item(uint64_t dev) { + int isdir = 0; + int isroot = ctx->items == 0; + struct dir *d = NULL; + + if(*ctx->buf == '[') { + isdir = 1; + con(1); + C(cons()); + } + + C(iteminfo(&d, dev, isdir)); + dev = d->dev; + + if(isroot) + dir_curpath_set(d->name); + else + dir_curpath_enter(d->name); + + if(isdir) { + if(dir_output.item(d)) { + dir_seterr("Output error: %s", strerror(errno)); + return 1; + } + C(itemdir(dev)); + if(dir_output.item(NULL)) { + dir_seterr("Output error: %s", strerror(errno)); + return 1; + } + } else if(dir_output.item(d)) { + dir_seterr("Output error: %s", strerror(errno)); + return 1; + } + + if(!isroot) + dir_curpath_leave(); + else /* The root item must not be empty. */ + E(ctx->items <= 1, "Empty directory"); + + return 0; +} + + +static int footer() { + C(cons()); + E(*ctx->buf != ']', "Expected ']'"); + con(1); + C(cons()); + E(*ctx->buf, "Trailing garbage"); + return 0; +} + + +static int process() { + int fail = 0; + + header(); + + if(!dir_fatalerr) + fail = item(0); + + if(!dir_fatalerr) + footer(); + + if(fclose(ctx->stream) && !dir_fatalerr) + dir_seterr("Error closing file: %s", strerror(errno)); + free(ctx); + + while(dir_fatalerr && !input_handle(0)) + ; + return dir_output.final(dir_fatalerr || fail); +} + + +int dir_import_init(const char *fn) { + FILE *stream; + if(strcmp(fn, "-") == 0) + stream = stdin; + else if((stream = fopen(fn, "r")) == NULL) + return 1; + + ctx = malloc(sizeof(struct ctx)); + ctx->stream = stream; + ctx->line = 1; + ctx->byte = ctx->eof = ctx->items = 0; + ctx->buf = ctx->lastfill = ctx->readbuf; + ctx->readbuf[0] = 0; + + dir_curpath_set(fn); + dir_process = process; + return 0; +} + diff --git a/src/dir_scan.c b/src/dir_scan.c index ee9b41dfc68bd5ee7380e7abd0b7e07ca0ff09f4..f3688c64d90b9c25ad371506361a9ecf26840113 100644 --- a/src/dir_scan.c +++ b/src/dir_scan.c @@ -234,8 +234,7 @@ static int dir_walk(char *dir) { } -/* Returns 0 to continue running ncdu, 1 to quit. */ -int dir_scan_process() { +static int process() { char *path; char *dir; int fail = 0; @@ -296,5 +295,6 @@ void dir_scan_init(const char *path) { dir_curpath_set(path); dir_setlasterr(NULL); dir_seterr(NULL); + dir_process = process; pstate = ST_CALC; } diff --git a/src/main.c b/src/main.c index 28ea62ed0e1e115b4609615fc2cc2256d180594f..24885f77bcf2dfaf7b2e1781a7d554ef07a1bf58 100644 --- a/src/main.c +++ b/src/main.c @@ -102,9 +102,10 @@ int input_handle(int wait) { /* parse command line */ -static char *argv_parse(int argc, char **argv) { +static void argv_parse(int argc, char **argv) { int i, j, len; char *export = NULL; + char *import = NULL; char *dir = NULL; dir_ui = -1; @@ -112,7 +113,7 @@ static char *argv_parse(int argc, char **argv) { for(i=1; i<argc; i++) { if(argv[i][0] == '-') { /* flags requiring arguments */ - if(!strcmp(argv[i], "-X") || !strcmp(argv[i], "-u") || !strcmp(argv[i], "-o") + if(!strcmp(argv[i], "-X") || !strcmp(argv[i], "-u") || !strcmp(argv[i], "-o") || !strcmp(argv[i], "-f") || !strcmp(argv[i], "--exclude-from") || !strcmp(argv[i], "--exclude")) { if(i+1 >= argc) { printf("Option %s requires an argument\n", argv[i]); @@ -126,6 +127,8 @@ static char *argv_parse(int argc, char **argv) { dir_ui = argv[i][0]-'0'; } else if(strcmp(argv[i], "-o") == 0) export = argv[++i]; + else if(strcmp(argv[i], "-f") == 0) + import = argv[++i]; else if(strcmp(argv[i], "--exclude") == 0) exclude_add(argv[++i]); else if(exclude_addfile(argv[++i])) { @@ -150,6 +153,7 @@ static char *argv_parse(int argc, char **argv) { printf(" -x Same filesystem\n"); printf(" -r Read only\n"); printf(" -o FILE Export scanned directory to FILE\n"); + printf(" -f FILE Import scanned directory from FILE\n"); printf(" -u <0-2> UI to use when scanning (0=minimal,2=verbose)\n"); printf(" --exclude PATTERN Exclude files that match PATTERN\n"); printf(" -X, --exclude-from FILE Exclude files that match any pattern in FILE\n"); @@ -175,12 +179,20 @@ static char *argv_parse(int argc, char **argv) { } else dir_mem_init(NULL); + if(import) { + if(dir_import_init(import)) { + printf("Can't open %s: %s\n", import, strerror(errno)); + exit(1); + } + if(strcmp(import, "-") == 0) + ncurses_tty = 1; + } else + dir_scan_init(dir ? dir : "."); + /* Use the single-line scan feedback by default when exporting to file, no * feedback when exporting to stdout. */ if(dir_ui == -1) dir_ui = export && strcmp(export, "-") == 0 ? 0 : export ? 1 : 2; - - return dir; } @@ -223,14 +235,8 @@ static void init_nc() { /* main program */ int main(int argc, char **argv) { - char *dir; - setlocale(LC_ALL, ""); - - if((dir = argv_parse(argc, argv)) == NULL) - dir = "."; - - dir_scan_init(dir); + argv_parse(argc, argv); if(dir_ui == 2) init_nc(); @@ -245,7 +251,7 @@ int main(int argc, char **argv) { } if(pstate == ST_CALC) { - if(dir_scan_process()) { + if(dir_process()) { if(dir_ui == 1) fputc('\n', stderr); break;