diff --git a/src/main.zig b/src/main.zig index 452d78e9747bd3352986f6aa5b4af06e55113a88..24d1461f48a3a1b2c7870faf4d99e0e706bb78c4 100644 --- a/src/main.zig +++ b/src/main.zig @@ -12,10 +12,10 @@ pub const allocator = std.heap.c_allocator; pub const Config = struct { same_fs: bool = true, extended: bool = false, - exclude_caches: bool = false, follow_symlinks: bool = false, + exclude_caches: bool = false, exclude_kernfs: bool = false, - // TODO: exclude patterns + exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator), update_delay: u32 = 100, si: bool = false, @@ -31,14 +31,14 @@ pub const Config = struct { pub var config = Config{}; // Simple generic argument parser, supports getopt_long() style arguments. -// T can be any type that has a 'fn next(T) ?[]const u8' method, e.g.: +// T can be any type that has a 'fn next(T) ?[:0]const u8' method, e.g.: // var args = Args(std.process.ArgIteratorPosix).init(std.process.ArgIteratorPosix.init()); fn Args(T: anytype) type { return struct { it: T, - short: ?[]const u8 = null, // Remainder after a short option, e.g. -x<stuff> (which may be either more short options or an argument) + short: ?[:0]const u8 = null, // Remainder after a short option, e.g. -x<stuff> (which may be either more short options or an argument) last: ?[]const u8 = null, - last_arg: ?[]const u8 = null, // In the case of --option=<arg> + last_arg: ?[:0]const u8 = null, // In the case of --option=<arg> shortbuf: [2]u8 = undefined, argsep: bool = false, @@ -56,10 +56,10 @@ fn Args(T: anytype) type { return Self{ .it = it }; } - fn shortopt(self: *Self, s: []const u8) Option { + fn shortopt(self: *Self, s: [:0]const u8) Option { self.shortbuf[0] = '-'; self.shortbuf[1] = s[0]; - self.short = if (s.len > 1) s[1..] else null; + self.short = if (s.len > 1) s[1.. :0] else null; self.last = &self.shortbuf; return .{ .opt = true, .val = &self.shortbuf }; } @@ -87,11 +87,11 @@ fn Args(T: anytype) type { self.last = val; return Option{ .opt = true, .val = val }; } - return self.shortopt(val[1..]); + return self.shortopt(val[1..:0]); } /// Returns the argument given to the last returned option. Dies with an error if no argument is provided. - pub fn arg(self: *Self) []const u8 { + pub fn arg(self: *Self) [:0]const u8 { if (self.short) |a| { defer self.short = null; return a; @@ -175,6 +175,19 @@ fn help() noreturn { std.process.exit(0); } +fn readExcludeFile(path: []const u8) !void { + const f = try std.fs.cwd().openFile(path, .{}); + defer f.close(); + var rd = std.io.bufferedReader(f.reader()).reader(); + var buf = std.ArrayList(u8).init(allocator); + while (true) { + rd.readUntilDelimiterArrayList(&buf, '\n', 4096) + catch |e| if (e != error.EndOfStream) return e else if (buf.items.len == 0) break; + if (buf.items.len > 0) + try config.exclude_patterns.append(try buf.toOwnedSliceSentinel(0)); + } +} + pub fn main() anyerror!void { // Grab thousands_sep from the current C locale. // (We can safely remove this when not linking against libc, it's a somewhat obscure feature) @@ -206,7 +219,11 @@ pub fn main() anyerror!void { else if(opt.is("-r")) config.read_only = true else if(opt.is("--si")) config.si = true else if(opt.is("-L") or opt.is("--follow-symlinks")) config.follow_symlinks = true - else if(opt.is("--exclude-caches")) config.exclude_caches = true + else if(opt.is("--exclude")) try config.exclude_patterns.append(args.arg()) + else if(opt.is("-X") or opt.is("--exclude-from")) { + const arg = args.arg(); + readExcludeFile(arg) catch |e| ui.die("Error reading excludes from {s}: {}.\n", .{ arg, e }); + } else if(opt.is("--exclude-caches")) config.exclude_caches = true else if(opt.is("--exclude-kernfs")) config.exclude_kernfs = true else if(opt.is("--confirm-quit")) config.confirm_quit = true else if(opt.is("--color")) { @@ -215,9 +232,12 @@ pub fn main() anyerror!void { else if (std.mem.eql(u8, val, "dark")) config.ui_color = .dark else ui.die("Unknown --color option: {s}.\n", .{val}); } else ui.die("Unrecognized option '{s}'.\n", .{opt.val}); - // TODO: -o, -f, -0, -1, -2, --exclude, -X, --exclude-from + // TODO: -o, -f, -0, -1, -2 } + if (std.builtin.os.tag != .linux and config.exclude_kernfs) + ui.die("The --exclude-kernfs tag is currently only supported on Linux.\n", .{}); + try scan.scanRoot(scan_dir orelse "."); ui.init(); diff --git a/src/scan.zig b/src/scan.zig index 88b3c104019a30ac725a06df7e9222d29806c622..ee673d8a05450cab737b670e12428ff2ee504509 100644 --- a/src/scan.zig +++ b/src/scan.zig @@ -1,6 +1,8 @@ const std = @import("std"); const main = @import("main.zig"); const model = @import("model.zig"); +const c_statfs = @cImport(@cInclude("sys/vfs.h")); +const c_fnmatch = @cImport(@cInclude("fnmatch.h")); // Concise stat struct for fields we're interested in, with the types used by the model. @@ -14,108 +16,188 @@ const Stat = struct { reg: bool, symlink: bool, ext: model.Ext, -}; -// Cast any integer type to the target type, clamping the value to the supported maximum if necessary. -fn castClamp(comptime T: type, x: anytype) T { - // (adapted from std.math.cast) - if (std.math.maxInt(@TypeOf(x)) > std.math.maxInt(T) and x > std.math.maxInt(T)) { - return std.math.maxInt(T); - } else if (std.math.minInt(@TypeOf(x)) < std.math.minInt(T) and x < std.math.minInt(T)) { - return std.math.minInt(T); - } else { - return @intCast(T, x); + // Cast any integer type to the target type, clamping the value to the supported maximum if necessary. + fn castClamp(comptime T: type, x: anytype) T { + // (adapted from std.math.cast) + if (std.math.maxInt(@TypeOf(x)) > std.math.maxInt(T) and x > std.math.maxInt(T)) { + return std.math.maxInt(T); + } else if (std.math.minInt(@TypeOf(x)) < std.math.minInt(T) and x < std.math.minInt(T)) { + return std.math.minInt(T); + } else { + return @intCast(T, x); + } } -} -// Cast any integer type to the target type, truncating if necessary. -fn castTruncate(comptime T: type, x: anytype) T { - const Ti = @typeInfo(T).Int; - const Xi = @typeInfo(@TypeOf(x)).Int; - const nx = if (Xi.signedness != Ti.signedness) @bitCast(std.meta.Int(Ti.signedness, Xi.bits), x) else x; - return if (Xi.bits > Ti.bits) @truncate(T, nx) else nx; -} + // Cast any integer type to the target type, truncating if necessary. + fn castTruncate(comptime T: type, x: anytype) T { + const Ti = @typeInfo(T).Int; + const Xi = @typeInfo(@TypeOf(x)).Int; + const nx = if (Xi.signedness != Ti.signedness) @bitCast(std.meta.Int(Ti.signedness, Xi.bits), x) else x; + return if (Xi.bits > Ti.bits) @truncate(T, nx) else nx; + } -fn clamp(comptime T: type, comptime field: anytype, x: anytype) std.meta.fieldInfo(T, field).field_type { - return castClamp(std.meta.fieldInfo(T, field).field_type, x); -} + fn clamp(comptime T: type, comptime field: anytype, x: anytype) std.meta.fieldInfo(T, field).field_type { + return castClamp(std.meta.fieldInfo(T, field).field_type, x); + } -fn truncate(comptime T: type, comptime field: anytype, x: anytype) std.meta.fieldInfo(T, field).field_type { - return castTruncate(std.meta.fieldInfo(T, field).field_type, x); -} + fn truncate(comptime T: type, comptime field: anytype, x: anytype) std.meta.fieldInfo(T, field).field_type { + return castTruncate(std.meta.fieldInfo(T, field).field_type, x); + } -fn readStat(parent: std.fs.Dir, name: [:0]const u8, follow: bool) !Stat { - const stat = try std.os.fstatatZ(parent.fd, name, if (follow) 0 else std.os.AT_SYMLINK_NOFOLLOW); - return Stat{ - .blocks = clamp(Stat, .blocks, stat.blocks), - .size = clamp(Stat, .size, stat.size), - .dev = truncate(Stat, .dev, stat.dev), - .ino = truncate(Stat, .ino, stat.ino), - .nlink = clamp(Stat, .nlink, stat.nlink), - .dir = std.os.system.S_ISDIR(stat.mode), - .reg = std.os.system.S_ISREG(stat.mode), - .symlink = std.os.system.S_ISLNK(stat.mode), - .ext = .{ - .mtime = clamp(model.Ext, .mtime, stat.mtime().tv_sec), - .uid = truncate(model.Ext, .uid, stat.uid), - .gid = truncate(model.Ext, .gid, stat.gid), - .mode = truncate(model.Ext, .mode, stat.mode), - }, + fn read(parent: std.fs.Dir, name: [:0]const u8, follow: bool) !Stat { + const stat = try std.os.fstatatZ(parent.fd, name, if (follow) 0 else std.os.AT_SYMLINK_NOFOLLOW); + return Stat{ + .blocks = clamp(Stat, .blocks, stat.blocks), + .size = clamp(Stat, .size, stat.size), + .dev = truncate(Stat, .dev, stat.dev), + .ino = truncate(Stat, .ino, stat.ino), + .nlink = clamp(Stat, .nlink, stat.nlink), + .dir = std.os.system.S_ISDIR(stat.mode), + .reg = std.os.system.S_ISREG(stat.mode), + .symlink = std.os.system.S_ISLNK(stat.mode), + .ext = .{ + .mtime = clamp(model.Ext, .mtime, stat.mtime().tv_sec), + .uid = truncate(model.Ext, .uid, stat.uid), + .gid = truncate(model.Ext, .gid, stat.gid), + .mode = truncate(model.Ext, .mode, stat.mode), + }, + }; + } +}; + +var kernfs_cache: std.AutoHashMap(u64,bool) = std.AutoHashMap(u64,bool).init(main.allocator); + +// This function only works on Linux +fn isKernfs(dir: std.fs.Dir, dev: u64) bool { + if (kernfs_cache.get(dev)) |e| return e; + var buf: c_statfs.struct_statfs = undefined; + if (c_statfs.fstatfs(dir.fd, &buf) != 0) return false; // silently ignoring errors isn't too nice. + const iskern = switch (buf.f_type) { + // These numbers are documented in the Linux 'statfs(2)' man page, so I assume they're stable. + 0x42494e4d, // BINFMTFS_MAGIC + 0xcafe4a11, // BPF_FS_MAGIC + 0x27e0eb, // CGROUP_SUPER_MAGIC + 0x63677270, // CGROUP2_SUPER_MAGIC + 0x64626720, // DEBUGFS_MAGIC + 0x1cd1, // DEVPTS_SUPER_MAGIC + 0x9fa0, // PROC_SUPER_MAGIC + 0x6165676c, // PSTOREFS_MAGIC + 0x73636673, // SECURITYFS_MAGIC + 0xf97cff8c, // SELINUX_MAGIC + 0x62656572, // SYSFS_MAGIC + 0x74726163 // TRACEFS_MAGIC + => true, + else => false, }; + kernfs_cache.put(dev, iskern) catch {}; + return iskern; } -// Read and index entries of the given dir. The entry for the directory is already assumed to be in 'parents'. +const Context = struct { + parents: model.Parents = .{}, + path: std.ArrayList(u8) = std.ArrayList(u8).init(main.allocator), + path_indices: std.ArrayList(usize) = std.ArrayList(usize).init(main.allocator), + + // 0-terminated name of the top entry, points into 'path', invalid after popPath(). + // This is a workaround to Zig's directory iterator not returning a [:0]const u8. + name: [:0]const u8 = undefined, + + const Self = @This(); + + fn pushPath(self: *Self, name: []const u8) !void { + try self.path_indices.append(self.path.items.len); + if (self.path.items.len > 1) try self.path.append('/'); + const start = self.path.items.len; + try self.path.appendSlice(name); + + try self.path.append(0); + self.name = self.path.items[start..self.path.items.len-1:0]; + self.path.items.len -= 1; + } + + fn popPath(self: *Self) void { + self.path.items.len = self.path_indices.items[self.path_indices.items.len-1]; + self.path_indices.items.len -= 1; + } +}; + +// Read and index entries of the given dir. The entry for the directory is already assumed to be in 'ctx.parents'. // (TODO: shouldn't error on OOM but instead call a function that waits or something) -fn scanDir(parents: *model.Parents, dir: std.fs.Dir) std.mem.Allocator.Error!void { +fn scanDir(ctx: *Context, dir: std.fs.Dir) std.mem.Allocator.Error!void { var it = dir.iterate(); while(true) { const entry = it.next() catch { - parents.top().entry.set_err(parents); + ctx.parents.top().entry.set_err(&ctx.parents); return; } orelse break; - // TODO: Check for exclude patterns + try ctx.pushPath(entry.name); + defer ctx.popPath(); - // XXX: Surely the name already has a trailing \0 in the buffer received by the OS? - // XXX#2: Does this allocate PATH_MAX bytes on the stack for each level of recursion!? - const name_z = std.os.toPosixPath(entry.name) catch undefined; - var stat = readStat(dir, &name_z, false) catch { + // XXX: This algorithm is extremely slow, can be optimized with some clever pattern parsing. + const excluded = blk: { + for (main.config.exclude_patterns.items) |pat| { + ctx.path.append(0) catch unreachable; + var path = ctx.path.items[0..ctx.path.items.len-1:0]; + ctx.path.items.len -= 1; + while (path.len > 0) { + if (c_fnmatch.fnmatch(pat, path, 0) == 0) break :blk true; + if (std.mem.indexOfScalar(u8, path, '/')) |idx| path = path[idx+1..:0] + else break; + } + } + break :blk false; + }; + if (excluded) { var e = try model.Entry.create(.file, false, entry.name); - e.insert(parents) catch unreachable; - e.set_err(parents); + e.file().?.excluded = true; + e.insert(&ctx.parents) catch unreachable; + continue; + } + + var stat = Stat.read(dir, ctx.name, false) catch { + var e = try model.Entry.create(.file, false, entry.name); + e.insert(&ctx.parents) catch unreachable; + e.set_err(&ctx.parents); continue; }; - if (main.config.same_fs and stat.dev != model.getDev(parents.top().dev)) { + if (main.config.same_fs and stat.dev != model.getDev(ctx.parents.top().dev)) { var e = try model.Entry.create(.file, false, entry.name); e.file().?.other_fs = true; - e.insert(parents) catch unreachable; + e.insert(&ctx.parents) catch unreachable; continue; } if (main.config.follow_symlinks and stat.symlink) { - if (readStat(dir, &name_z, true)) |nstat| { + if (Stat.read(dir, ctx.name, true)) |nstat| { if (!nstat.dir) { stat = nstat; // Symlink targets may reside on different filesystems, // this will break hardlink detection and counting so let's disable it. - if (stat.nlink > 1 and stat.dev != model.getDev(parents.top().dev)) + if (stat.nlink > 1 and stat.dev != model.getDev(ctx.parents.top().dev)) stat.nlink = 1; } } else |_| {} } - // TODO: Check for kernfs; Zig has no wrappers for fstatfs() yet and calling the syscall directly doesn't seem too trivial. :( - var edir = - if (stat.dir) dir.openDirZ(&name_z, .{ .access_sub_paths = true, .iterate = true, .no_follow = true }) catch { + if (stat.dir) dir.openDirZ(ctx.name, .{ .access_sub_paths = true, .iterate = true, .no_follow = true }) catch { var e = try model.Entry.create(.file, false, entry.name); - e.insert(parents) catch unreachable; - e.set_err(parents); + e.insert(&ctx.parents) catch unreachable; + e.set_err(&ctx.parents); continue; } else null; defer if (edir != null) edir.?.close(); + if (std.builtin.os.tag == .linux and main.config.exclude_kernfs and stat.dir and isKernfs(edir.?, stat.dev)) { + var e = try model.Entry.create(.file, false, entry.name); + e.file().?.kernfs = true; + e.insert(&ctx.parents) catch unreachable; + continue; + } + if (main.config.exclude_caches and stat.dir) { if (edir.?.openFileZ("CACHEDIR.TAG", .{})) |f| { const sig = "Signature: 8a477f597d28d172789f06886806bc55"; @@ -124,7 +206,7 @@ fn scanDir(parents: *model.Parents, dir: std.fs.Dir) std.mem.Allocator.Error!voi if (len == sig.len and std.mem.eql(u8, &buf, sig)) { var e = try model.Entry.create(.file, false, entry.name); e.file().?.excluded = true; - e.insert(parents) catch unreachable; + e.insert(&ctx.parents) catch unreachable; continue; } } else |_| {} @@ -148,12 +230,12 @@ fn scanDir(parents: *model.Parents, dir: std.fs.Dir) std.mem.Allocator.Error!voi l.nlink = stat.nlink; } if (e.ext()) |ext| ext.* = stat.ext; - try e.insert(parents); + try e.insert(&ctx.parents); if (e.dir()) |d| { - try parents.push(d); - try scanDir(parents, edir.?); - parents.pop(); + try ctx.parents.push(d); + try scanDir(ctx, edir.?); + ctx.parents.pop(); } } } @@ -162,14 +244,15 @@ pub fn scanRoot(path: []const u8) !void { const full_path = std.fs.realpathAlloc(main.allocator, path) catch path; model.root = (try model.Entry.create(.dir, false, full_path)).dir().?; - const stat = try readStat(std.fs.cwd(), model.root.entry.name(), true); + const stat = try Stat.read(std.fs.cwd(), model.root.entry.name(), true); if (!stat.dir) return error.NotADirectory; model.root.entry.blocks = stat.blocks; model.root.entry.size = stat.size; model.root.dev = try model.getDevId(stat.dev); if (model.root.entry.ext()) |ext| ext.* = stat.ext; - var parents = model.Parents{}; + var ctx = Context{}; + try ctx.pushPath(full_path); const dir = try std.fs.cwd().openDirZ(model.root.entry.name(), .{ .access_sub_paths = true, .iterate = true }); - try scanDir(&parents, dir); + try scanDir(&ctx, dir); } diff --git a/src/ui.zig b/src/ui.zig index 88410e095bdae7d1fe58f4cfcd2c4bfa7dc348c7..dff364294bf272a21ac2c2d2541d4a063dab9d1b 100644 --- a/src/ui.zig +++ b/src/ui.zig @@ -6,7 +6,6 @@ const main = @import("main.zig"); pub const c = @cImport({ @cInclude("stdio.h"); @cInclude("string.h"); - @cInclude("unistd.h"); @cInclude("curses.h"); }); @@ -121,7 +120,7 @@ pub fn init() void { if (term == null) die("Error initializing ncurses.\n", .{}); _ = c.set_term(term); } else { - if (c.isatty(0) != 1) die("Standard input is not a TTY. Did you mean to import a file using '-f -'?\n", .{}); + if (!std.io.getStdIn().isTty()) die("Standard input is not a TTY. Did you mean to import a file using '-f -'?\n", .{}); if (c.initscr() == null) die("Error initializing ncurses.\n", .{}); } updateSize();