Skip to content
Snippets Groups Projects
Commit a28a0788 authored by Yorhel's avatar Yorhel
Browse files

Implement --exclude-kernfs and --exclude-pattern

Eaiser to implement now that we're linking against libc.

But exclude pattern matching is extremely slow, so that should really be
rewritten with a custom fnmatch implementation. It's exactly as slow as
in ncdu 1.x as well, I'm surprised nobody's complained about it yet.
And while I'm at it, supporting .gitignore-style patterns would be
pretty neat, too.
parent 826c2fc0
No related branches found
No related tags found
No related merge requests found
...@@ -12,10 +12,10 @@ pub const allocator = std.heap.c_allocator; ...@@ -12,10 +12,10 @@ pub const allocator = std.heap.c_allocator;
pub const Config = struct { pub const Config = struct {
same_fs: bool = true, same_fs: bool = true,
extended: bool = false, extended: bool = false,
exclude_caches: bool = false,
follow_symlinks: bool = false, follow_symlinks: bool = false,
exclude_caches: bool = false,
exclude_kernfs: bool = false, exclude_kernfs: bool = false,
// TODO: exclude patterns exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator),
update_delay: u32 = 100, update_delay: u32 = 100,
si: bool = false, si: bool = false,
...@@ -31,14 +31,14 @@ pub const Config = struct { ...@@ -31,14 +31,14 @@ pub const Config = struct {
pub var config = Config{}; pub var config = Config{};
// Simple generic argument parser, supports getopt_long() style arguments. // Simple generic argument parser, supports getopt_long() style arguments.
// T can be any type that has a 'fn next(T) ?[]const u8' method, e.g.: // T can be any type that has a 'fn next(T) ?[:0]const u8' method, e.g.:
// var args = Args(std.process.ArgIteratorPosix).init(std.process.ArgIteratorPosix.init()); // var args = Args(std.process.ArgIteratorPosix).init(std.process.ArgIteratorPosix.init());
fn Args(T: anytype) type { fn Args(T: anytype) type {
return struct { return struct {
it: T, it: T,
short: ?[]const u8 = null, // Remainder after a short option, e.g. -x<stuff> (which may be either more short options or an argument) short: ?[:0]const u8 = null, // Remainder after a short option, e.g. -x<stuff> (which may be either more short options or an argument)
last: ?[]const u8 = null, last: ?[]const u8 = null,
last_arg: ?[]const u8 = null, // In the case of --option=<arg> last_arg: ?[:0]const u8 = null, // In the case of --option=<arg>
shortbuf: [2]u8 = undefined, shortbuf: [2]u8 = undefined,
argsep: bool = false, argsep: bool = false,
...@@ -56,10 +56,10 @@ fn Args(T: anytype) type { ...@@ -56,10 +56,10 @@ fn Args(T: anytype) type {
return Self{ .it = it }; return Self{ .it = it };
} }
fn shortopt(self: *Self, s: []const u8) Option { fn shortopt(self: *Self, s: [:0]const u8) Option {
self.shortbuf[0] = '-'; self.shortbuf[0] = '-';
self.shortbuf[1] = s[0]; self.shortbuf[1] = s[0];
self.short = if (s.len > 1) s[1..] else null; self.short = if (s.len > 1) s[1.. :0] else null;
self.last = &self.shortbuf; self.last = &self.shortbuf;
return .{ .opt = true, .val = &self.shortbuf }; return .{ .opt = true, .val = &self.shortbuf };
} }
...@@ -87,11 +87,11 @@ fn Args(T: anytype) type { ...@@ -87,11 +87,11 @@ fn Args(T: anytype) type {
self.last = val; self.last = val;
return Option{ .opt = true, .val = val }; return Option{ .opt = true, .val = val };
} }
return self.shortopt(val[1..]); return self.shortopt(val[1..:0]);
} }
/// Returns the argument given to the last returned option. Dies with an error if no argument is provided. /// Returns the argument given to the last returned option. Dies with an error if no argument is provided.
pub fn arg(self: *Self) []const u8 { pub fn arg(self: *Self) [:0]const u8 {
if (self.short) |a| { if (self.short) |a| {
defer self.short = null; defer self.short = null;
return a; return a;
...@@ -175,6 +175,19 @@ fn help() noreturn { ...@@ -175,6 +175,19 @@ fn help() noreturn {
std.process.exit(0); std.process.exit(0);
} }
fn readExcludeFile(path: []const u8) !void {
const f = try std.fs.cwd().openFile(path, .{});
defer f.close();
var rd = std.io.bufferedReader(f.reader()).reader();
var buf = std.ArrayList(u8).init(allocator);
while (true) {
rd.readUntilDelimiterArrayList(&buf, '\n', 4096)
catch |e| if (e != error.EndOfStream) return e else if (buf.items.len == 0) break;
if (buf.items.len > 0)
try config.exclude_patterns.append(try buf.toOwnedSliceSentinel(0));
}
}
pub fn main() anyerror!void { pub fn main() anyerror!void {
// Grab thousands_sep from the current C locale. // Grab thousands_sep from the current C locale.
// (We can safely remove this when not linking against libc, it's a somewhat obscure feature) // (We can safely remove this when not linking against libc, it's a somewhat obscure feature)
...@@ -206,7 +219,11 @@ pub fn main() anyerror!void { ...@@ -206,7 +219,11 @@ pub fn main() anyerror!void {
else if(opt.is("-r")) config.read_only = true else if(opt.is("-r")) config.read_only = true
else if(opt.is("--si")) config.si = true else if(opt.is("--si")) config.si = true
else if(opt.is("-L") or opt.is("--follow-symlinks")) config.follow_symlinks = true else if(opt.is("-L") or opt.is("--follow-symlinks")) config.follow_symlinks = true
else if(opt.is("--exclude-caches")) config.exclude_caches = true else if(opt.is("--exclude")) try config.exclude_patterns.append(args.arg())
else if(opt.is("-X") or opt.is("--exclude-from")) {
const arg = args.arg();
readExcludeFile(arg) catch |e| ui.die("Error reading excludes from {s}: {}.\n", .{ arg, e });
} else if(opt.is("--exclude-caches")) config.exclude_caches = true
else if(opt.is("--exclude-kernfs")) config.exclude_kernfs = true else if(opt.is("--exclude-kernfs")) config.exclude_kernfs = true
else if(opt.is("--confirm-quit")) config.confirm_quit = true else if(opt.is("--confirm-quit")) config.confirm_quit = true
else if(opt.is("--color")) { else if(opt.is("--color")) {
...@@ -215,9 +232,12 @@ pub fn main() anyerror!void { ...@@ -215,9 +232,12 @@ pub fn main() anyerror!void {
else if (std.mem.eql(u8, val, "dark")) config.ui_color = .dark else if (std.mem.eql(u8, val, "dark")) config.ui_color = .dark
else ui.die("Unknown --color option: {s}.\n", .{val}); else ui.die("Unknown --color option: {s}.\n", .{val});
} else ui.die("Unrecognized option '{s}'.\n", .{opt.val}); } else ui.die("Unrecognized option '{s}'.\n", .{opt.val});
// TODO: -o, -f, -0, -1, -2, --exclude, -X, --exclude-from // TODO: -o, -f, -0, -1, -2
} }
if (std.builtin.os.tag != .linux and config.exclude_kernfs)
ui.die("The --exclude-kernfs tag is currently only supported on Linux.\n", .{});
try scan.scanRoot(scan_dir orelse "."); try scan.scanRoot(scan_dir orelse ".");
ui.init(); ui.init();
......
const std = @import("std"); const std = @import("std");
const main = @import("main.zig"); const main = @import("main.zig");
const model = @import("model.zig"); const model = @import("model.zig");
const c_statfs = @cImport(@cInclude("sys/vfs.h"));
const c_fnmatch = @cImport(@cInclude("fnmatch.h"));
// Concise stat struct for fields we're interested in, with the types used by the model. // Concise stat struct for fields we're interested in, with the types used by the model.
...@@ -14,108 +16,188 @@ const Stat = struct { ...@@ -14,108 +16,188 @@ const Stat = struct {
reg: bool, reg: bool,
symlink: bool, symlink: bool,
ext: model.Ext, ext: model.Ext,
};
// Cast any integer type to the target type, clamping the value to the supported maximum if necessary. // Cast any integer type to the target type, clamping the value to the supported maximum if necessary.
fn castClamp(comptime T: type, x: anytype) T { fn castClamp(comptime T: type, x: anytype) T {
// (adapted from std.math.cast) // (adapted from std.math.cast)
if (std.math.maxInt(@TypeOf(x)) > std.math.maxInt(T) and x > std.math.maxInt(T)) { if (std.math.maxInt(@TypeOf(x)) > std.math.maxInt(T) and x > std.math.maxInt(T)) {
return std.math.maxInt(T); return std.math.maxInt(T);
} else if (std.math.minInt(@TypeOf(x)) < std.math.minInt(T) and x < std.math.minInt(T)) { } else if (std.math.minInt(@TypeOf(x)) < std.math.minInt(T) and x < std.math.minInt(T)) {
return std.math.minInt(T); return std.math.minInt(T);
} else { } else {
return @intCast(T, x); return @intCast(T, x);
}
} }
}
// Cast any integer type to the target type, truncating if necessary. // Cast any integer type to the target type, truncating if necessary.
fn castTruncate(comptime T: type, x: anytype) T { fn castTruncate(comptime T: type, x: anytype) T {
const Ti = @typeInfo(T).Int; const Ti = @typeInfo(T).Int;
const Xi = @typeInfo(@TypeOf(x)).Int; const Xi = @typeInfo(@TypeOf(x)).Int;
const nx = if (Xi.signedness != Ti.signedness) @bitCast(std.meta.Int(Ti.signedness, Xi.bits), x) else x; const nx = if (Xi.signedness != Ti.signedness) @bitCast(std.meta.Int(Ti.signedness, Xi.bits), x) else x;
return if (Xi.bits > Ti.bits) @truncate(T, nx) else nx; return if (Xi.bits > Ti.bits) @truncate(T, nx) else nx;
} }
fn clamp(comptime T: type, comptime field: anytype, x: anytype) std.meta.fieldInfo(T, field).field_type { fn clamp(comptime T: type, comptime field: anytype, x: anytype) std.meta.fieldInfo(T, field).field_type {
return castClamp(std.meta.fieldInfo(T, field).field_type, x); return castClamp(std.meta.fieldInfo(T, field).field_type, x);
} }
fn truncate(comptime T: type, comptime field: anytype, x: anytype) std.meta.fieldInfo(T, field).field_type { fn truncate(comptime T: type, comptime field: anytype, x: anytype) std.meta.fieldInfo(T, field).field_type {
return castTruncate(std.meta.fieldInfo(T, field).field_type, x); return castTruncate(std.meta.fieldInfo(T, field).field_type, x);
} }
fn readStat(parent: std.fs.Dir, name: [:0]const u8, follow: bool) !Stat { fn read(parent: std.fs.Dir, name: [:0]const u8, follow: bool) !Stat {
const stat = try std.os.fstatatZ(parent.fd, name, if (follow) 0 else std.os.AT_SYMLINK_NOFOLLOW); const stat = try std.os.fstatatZ(parent.fd, name, if (follow) 0 else std.os.AT_SYMLINK_NOFOLLOW);
return Stat{ return Stat{
.blocks = clamp(Stat, .blocks, stat.blocks), .blocks = clamp(Stat, .blocks, stat.blocks),
.size = clamp(Stat, .size, stat.size), .size = clamp(Stat, .size, stat.size),
.dev = truncate(Stat, .dev, stat.dev), .dev = truncate(Stat, .dev, stat.dev),
.ino = truncate(Stat, .ino, stat.ino), .ino = truncate(Stat, .ino, stat.ino),
.nlink = clamp(Stat, .nlink, stat.nlink), .nlink = clamp(Stat, .nlink, stat.nlink),
.dir = std.os.system.S_ISDIR(stat.mode), .dir = std.os.system.S_ISDIR(stat.mode),
.reg = std.os.system.S_ISREG(stat.mode), .reg = std.os.system.S_ISREG(stat.mode),
.symlink = std.os.system.S_ISLNK(stat.mode), .symlink = std.os.system.S_ISLNK(stat.mode),
.ext = .{ .ext = .{
.mtime = clamp(model.Ext, .mtime, stat.mtime().tv_sec), .mtime = clamp(model.Ext, .mtime, stat.mtime().tv_sec),
.uid = truncate(model.Ext, .uid, stat.uid), .uid = truncate(model.Ext, .uid, stat.uid),
.gid = truncate(model.Ext, .gid, stat.gid), .gid = truncate(model.Ext, .gid, stat.gid),
.mode = truncate(model.Ext, .mode, stat.mode), .mode = truncate(model.Ext, .mode, stat.mode),
}, },
};
}
};
var kernfs_cache: std.AutoHashMap(u64,bool) = std.AutoHashMap(u64,bool).init(main.allocator);
// This function only works on Linux
fn isKernfs(dir: std.fs.Dir, dev: u64) bool {
if (kernfs_cache.get(dev)) |e| return e;
var buf: c_statfs.struct_statfs = undefined;
if (c_statfs.fstatfs(dir.fd, &buf) != 0) return false; // silently ignoring errors isn't too nice.
const iskern = switch (buf.f_type) {
// These numbers are documented in the Linux 'statfs(2)' man page, so I assume they're stable.
0x42494e4d, // BINFMTFS_MAGIC
0xcafe4a11, // BPF_FS_MAGIC
0x27e0eb, // CGROUP_SUPER_MAGIC
0x63677270, // CGROUP2_SUPER_MAGIC
0x64626720, // DEBUGFS_MAGIC
0x1cd1, // DEVPTS_SUPER_MAGIC
0x9fa0, // PROC_SUPER_MAGIC
0x6165676c, // PSTOREFS_MAGIC
0x73636673, // SECURITYFS_MAGIC
0xf97cff8c, // SELINUX_MAGIC
0x62656572, // SYSFS_MAGIC
0x74726163 // TRACEFS_MAGIC
=> true,
else => false,
}; };
kernfs_cache.put(dev, iskern) catch {};
return iskern;
} }
// Read and index entries of the given dir. The entry for the directory is already assumed to be in 'parents'. const Context = struct {
parents: model.Parents = .{},
path: std.ArrayList(u8) = std.ArrayList(u8).init(main.allocator),
path_indices: std.ArrayList(usize) = std.ArrayList(usize).init(main.allocator),
// 0-terminated name of the top entry, points into 'path', invalid after popPath().
// This is a workaround to Zig's directory iterator not returning a [:0]const u8.
name: [:0]const u8 = undefined,
const Self = @This();
fn pushPath(self: *Self, name: []const u8) !void {
try self.path_indices.append(self.path.items.len);
if (self.path.items.len > 1) try self.path.append('/');
const start = self.path.items.len;
try self.path.appendSlice(name);
try self.path.append(0);
self.name = self.path.items[start..self.path.items.len-1:0];
self.path.items.len -= 1;
}
fn popPath(self: *Self) void {
self.path.items.len = self.path_indices.items[self.path_indices.items.len-1];
self.path_indices.items.len -= 1;
}
};
// Read and index entries of the given dir. The entry for the directory is already assumed to be in 'ctx.parents'.
// (TODO: shouldn't error on OOM but instead call a function that waits or something) // (TODO: shouldn't error on OOM but instead call a function that waits or something)
fn scanDir(parents: *model.Parents, dir: std.fs.Dir) std.mem.Allocator.Error!void { fn scanDir(ctx: *Context, dir: std.fs.Dir) std.mem.Allocator.Error!void {
var it = dir.iterate(); var it = dir.iterate();
while(true) { while(true) {
const entry = it.next() catch { const entry = it.next() catch {
parents.top().entry.set_err(parents); ctx.parents.top().entry.set_err(&ctx.parents);
return; return;
} orelse break; } orelse break;
// TODO: Check for exclude patterns try ctx.pushPath(entry.name);
defer ctx.popPath();
// XXX: Surely the name already has a trailing \0 in the buffer received by the OS? // XXX: This algorithm is extremely slow, can be optimized with some clever pattern parsing.
// XXX#2: Does this allocate PATH_MAX bytes on the stack for each level of recursion!? const excluded = blk: {
const name_z = std.os.toPosixPath(entry.name) catch undefined; for (main.config.exclude_patterns.items) |pat| {
var stat = readStat(dir, &name_z, false) catch { ctx.path.append(0) catch unreachable;
var path = ctx.path.items[0..ctx.path.items.len-1:0];
ctx.path.items.len -= 1;
while (path.len > 0) {
if (c_fnmatch.fnmatch(pat, path, 0) == 0) break :blk true;
if (std.mem.indexOfScalar(u8, path, '/')) |idx| path = path[idx+1..:0]
else break;
}
}
break :blk false;
};
if (excluded) {
var e = try model.Entry.create(.file, false, entry.name); var e = try model.Entry.create(.file, false, entry.name);
e.insert(parents) catch unreachable; e.file().?.excluded = true;
e.set_err(parents); e.insert(&ctx.parents) catch unreachable;
continue;
}
var stat = Stat.read(dir, ctx.name, false) catch {
var e = try model.Entry.create(.file, false, entry.name);
e.insert(&ctx.parents) catch unreachable;
e.set_err(&ctx.parents);
continue; continue;
}; };
if (main.config.same_fs and stat.dev != model.getDev(parents.top().dev)) { if (main.config.same_fs and stat.dev != model.getDev(ctx.parents.top().dev)) {
var e = try model.Entry.create(.file, false, entry.name); var e = try model.Entry.create(.file, false, entry.name);
e.file().?.other_fs = true; e.file().?.other_fs = true;
e.insert(parents) catch unreachable; e.insert(&ctx.parents) catch unreachable;
continue; continue;
} }
if (main.config.follow_symlinks and stat.symlink) { if (main.config.follow_symlinks and stat.symlink) {
if (readStat(dir, &name_z, true)) |nstat| { if (Stat.read(dir, ctx.name, true)) |nstat| {
if (!nstat.dir) { if (!nstat.dir) {
stat = nstat; stat = nstat;
// Symlink targets may reside on different filesystems, // Symlink targets may reside on different filesystems,
// this will break hardlink detection and counting so let's disable it. // this will break hardlink detection and counting so let's disable it.
if (stat.nlink > 1 and stat.dev != model.getDev(parents.top().dev)) if (stat.nlink > 1 and stat.dev != model.getDev(ctx.parents.top().dev))
stat.nlink = 1; stat.nlink = 1;
} }
} else |_| {} } else |_| {}
} }
// TODO: Check for kernfs; Zig has no wrappers for fstatfs() yet and calling the syscall directly doesn't seem too trivial. :(
var edir = var edir =
if (stat.dir) dir.openDirZ(&name_z, .{ .access_sub_paths = true, .iterate = true, .no_follow = true }) catch { if (stat.dir) dir.openDirZ(ctx.name, .{ .access_sub_paths = true, .iterate = true, .no_follow = true }) catch {
var e = try model.Entry.create(.file, false, entry.name); var e = try model.Entry.create(.file, false, entry.name);
e.insert(parents) catch unreachable; e.insert(&ctx.parents) catch unreachable;
e.set_err(parents); e.set_err(&ctx.parents);
continue; continue;
} else null; } else null;
defer if (edir != null) edir.?.close(); defer if (edir != null) edir.?.close();
if (std.builtin.os.tag == .linux and main.config.exclude_kernfs and stat.dir and isKernfs(edir.?, stat.dev)) {
var e = try model.Entry.create(.file, false, entry.name);
e.file().?.kernfs = true;
e.insert(&ctx.parents) catch unreachable;
continue;
}
if (main.config.exclude_caches and stat.dir) { if (main.config.exclude_caches and stat.dir) {
if (edir.?.openFileZ("CACHEDIR.TAG", .{})) |f| { if (edir.?.openFileZ("CACHEDIR.TAG", .{})) |f| {
const sig = "Signature: 8a477f597d28d172789f06886806bc55"; const sig = "Signature: 8a477f597d28d172789f06886806bc55";
...@@ -124,7 +206,7 @@ fn scanDir(parents: *model.Parents, dir: std.fs.Dir) std.mem.Allocator.Error!voi ...@@ -124,7 +206,7 @@ fn scanDir(parents: *model.Parents, dir: std.fs.Dir) std.mem.Allocator.Error!voi
if (len == sig.len and std.mem.eql(u8, &buf, sig)) { if (len == sig.len and std.mem.eql(u8, &buf, sig)) {
var e = try model.Entry.create(.file, false, entry.name); var e = try model.Entry.create(.file, false, entry.name);
e.file().?.excluded = true; e.file().?.excluded = true;
e.insert(parents) catch unreachable; e.insert(&ctx.parents) catch unreachable;
continue; continue;
} }
} else |_| {} } else |_| {}
...@@ -148,12 +230,12 @@ fn scanDir(parents: *model.Parents, dir: std.fs.Dir) std.mem.Allocator.Error!voi ...@@ -148,12 +230,12 @@ fn scanDir(parents: *model.Parents, dir: std.fs.Dir) std.mem.Allocator.Error!voi
l.nlink = stat.nlink; l.nlink = stat.nlink;
} }
if (e.ext()) |ext| ext.* = stat.ext; if (e.ext()) |ext| ext.* = stat.ext;
try e.insert(parents); try e.insert(&ctx.parents);
if (e.dir()) |d| { if (e.dir()) |d| {
try parents.push(d); try ctx.parents.push(d);
try scanDir(parents, edir.?); try scanDir(ctx, edir.?);
parents.pop(); ctx.parents.pop();
} }
} }
} }
...@@ -162,14 +244,15 @@ pub fn scanRoot(path: []const u8) !void { ...@@ -162,14 +244,15 @@ pub fn scanRoot(path: []const u8) !void {
const full_path = std.fs.realpathAlloc(main.allocator, path) catch path; const full_path = std.fs.realpathAlloc(main.allocator, path) catch path;
model.root = (try model.Entry.create(.dir, false, full_path)).dir().?; model.root = (try model.Entry.create(.dir, false, full_path)).dir().?;
const stat = try readStat(std.fs.cwd(), model.root.entry.name(), true); const stat = try Stat.read(std.fs.cwd(), model.root.entry.name(), true);
if (!stat.dir) return error.NotADirectory; if (!stat.dir) return error.NotADirectory;
model.root.entry.blocks = stat.blocks; model.root.entry.blocks = stat.blocks;
model.root.entry.size = stat.size; model.root.entry.size = stat.size;
model.root.dev = try model.getDevId(stat.dev); model.root.dev = try model.getDevId(stat.dev);
if (model.root.entry.ext()) |ext| ext.* = stat.ext; if (model.root.entry.ext()) |ext| ext.* = stat.ext;
var parents = model.Parents{}; var ctx = Context{};
try ctx.pushPath(full_path);
const dir = try std.fs.cwd().openDirZ(model.root.entry.name(), .{ .access_sub_paths = true, .iterate = true }); const dir = try std.fs.cwd().openDirZ(model.root.entry.name(), .{ .access_sub_paths = true, .iterate = true });
try scanDir(&parents, dir); try scanDir(&ctx, dir);
} }
...@@ -6,7 +6,6 @@ const main = @import("main.zig"); ...@@ -6,7 +6,6 @@ const main = @import("main.zig");
pub const c = @cImport({ pub const c = @cImport({
@cInclude("stdio.h"); @cInclude("stdio.h");
@cInclude("string.h"); @cInclude("string.h");
@cInclude("unistd.h");
@cInclude("curses.h"); @cInclude("curses.h");
}); });
...@@ -121,7 +120,7 @@ pub fn init() void { ...@@ -121,7 +120,7 @@ pub fn init() void {
if (term == null) die("Error initializing ncurses.\n", .{}); if (term == null) die("Error initializing ncurses.\n", .{});
_ = c.set_term(term); _ = c.set_term(term);
} else { } else {
if (c.isatty(0) != 1) die("Standard input is not a TTY. Did you mean to import a file using '-f -'?\n", .{}); if (!std.io.getStdIn().isTty()) die("Standard input is not a TTY. Did you mean to import a file using '-f -'?\n", .{});
if (c.initscr() == null) die("Error initializing ncurses.\n", .{}); if (c.initscr() == null) die("Error initializing ncurses.\n", .{});
} }
updateSize(); updateSize();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment