diff --git a/lib/std/fs/Dir.zig b/lib/std/fs/Dir.zig index d097573122..2d9057793c 100644 --- a/lib/std/fs/Dir.zig +++ b/lib/std/fs/Dir.zig @@ -663,11 +663,120 @@ fn iterateImpl(self: Dir, first_iter_start_value: bool) Iterator { } } -pub const Walker = struct { - stack: std.ArrayListUnmanaged(StackItem), +pub const SelectiveWalker = struct { + stack: std.ArrayListUnmanaged(Walker.StackItem), name_buffer: std.ArrayListUnmanaged(u8), allocator: Allocator, + /// After each call to this function, and on deinit(), the memory returned + /// from this function becomes invalid. A copy must be made in order to keep + /// a reference to the path. + pub fn next(self: *SelectiveWalker) !?Walker.Entry { + while (self.stack.items.len > 0) { + const top = &self.stack.items[self.stack.items.len - 1]; + var dirname_len = top.dirname_len; + if (top.iter.next() catch |err| { + // If we get an error, then we want the user to be able to continue + // walking if they want, which means that we need to pop the directory + // that errored from the stack. Otherwise, all future `next` calls would + // likely just fail with the same error. + var item = self.stack.pop().?; + if (self.stack.items.len != 0) { + item.iter.dir.close(); + } + return err; + }) |entry| { + self.name_buffer.shrinkRetainingCapacity(dirname_len); + if (self.name_buffer.items.len != 0) { + try self.name_buffer.append(self.allocator, fs.path.sep); + dirname_len += 1; + } + try self.name_buffer.ensureUnusedCapacity(self.allocator, entry.name.len + 1); + self.name_buffer.appendSliceAssumeCapacity(entry.name); + self.name_buffer.appendAssumeCapacity(0); + const walker_entry: Walker.Entry = .{ + .dir = top.iter.dir, + .basename = self.name_buffer.items[dirname_len .. self.name_buffer.items.len - 1 :0], + .path = self.name_buffer.items[0 .. self.name_buffer.items.len - 1 :0], + .kind = entry.kind, + }; + return walker_entry; + } else { + var item = self.stack.pop().?; + if (self.stack.items.len != 0) { + item.iter.dir.close(); + } + } + } + return null; + } + + /// Traverses into the directory, continuing walking one level down. + pub fn enter(self: *SelectiveWalker, entry: Walker.Entry) !void { + if (entry.kind != .directory) { + @branchHint(.cold); + return; + } + + var new_dir = entry.dir.openDir(entry.basename, .{ .iterate = true }) catch |err| { + switch (err) { + error.NameTooLong => unreachable, + else => |e| return e, + } + }; + errdefer new_dir.close(); + + try self.stack.append(self.allocator, .{ + .iter = new_dir.iterateAssumeFirstIteration(), + .dirname_len = self.name_buffer.items.len - 1, + }); + } + + pub fn deinit(self: *SelectiveWalker) void { + self.name_buffer.deinit(self.allocator); + self.stack.deinit(self.allocator); + } + + /// Leaves the current directory, continuing walking one level up. + pub fn leave(self: *SelectiveWalker) void { + var item = self.stack.pop().?; + if (self.stack.items.len != 0) { + @branchHint(.likely); + item.iter.dir.close(); + } + } +}; + +/// Recursively iterates over a directory, but requires the user to +/// opt-in to recursing into each directory entry. +/// +/// `self` must have been opened with `OpenOptions{.iterate = true}`. +/// +/// `Walker.deinit` releases allocated memory and directory handles. +/// +/// The order of returned file system entries is undefined. +/// +/// `self` will not be closed after walking it. +/// +/// See also `walk`. +pub fn walkSelectively(self: Dir, allocator: Allocator) !SelectiveWalker { + var stack: std.ArrayListUnmanaged(Walker.StackItem) = .empty; + + try stack.append(allocator, .{ + .iter = self.iterate(), + .dirname_len = 0, + }); + + return .{ + .stack = stack, + .name_buffer = .{}, + .allocator = allocator, + }; +} + +pub const Walker = struct { + inner: SelectiveWalker, + pub const Entry = struct { /// The containing directory. This can be used to operate directly on `basename` /// rather than `path`, avoiding `error.NameTooLong` for deeply nested paths. @@ -687,72 +796,22 @@ pub const Walker = struct { /// from this function becomes invalid. A copy must be made in order to keep /// a reference to the path. pub fn next(self: *Walker) !?Walker.Entry { - const gpa = self.allocator; - while (self.stack.items.len != 0) { - // `top` and `containing` become invalid after appending to `self.stack` - var top = &self.stack.items[self.stack.items.len - 1]; - var containing = top; - var dirname_len = top.dirname_len; - if (top.iter.next() catch |err| { - // If we get an error, then we want the user to be able to continue - // walking if they want, which means that we need to pop the directory - // that errored from the stack. Otherwise, all future `next` calls would - // likely just fail with the same error. - var item = self.stack.pop().?; - if (self.stack.items.len != 0) { - item.iter.dir.close(); - } - return err; - }) |base| { - self.name_buffer.shrinkRetainingCapacity(dirname_len); - if (self.name_buffer.items.len != 0) { - try self.name_buffer.append(gpa, fs.path.sep); - dirname_len += 1; - } - try self.name_buffer.ensureUnusedCapacity(gpa, base.name.len + 1); - self.name_buffer.appendSliceAssumeCapacity(base.name); - self.name_buffer.appendAssumeCapacity(0); - if (base.kind == .directory) { - var new_dir = top.iter.dir.openDir(base.name, .{ .iterate = true }) catch |err| switch (err) { - error.NameTooLong => unreachable, // no path sep in base.name - else => |e| return e, - }; - { - errdefer new_dir.close(); - try self.stack.append(gpa, .{ - .iter = new_dir.iterateAssumeFirstIteration(), - .dirname_len = self.name_buffer.items.len - 1, - }); - top = &self.stack.items[self.stack.items.len - 1]; - containing = &self.stack.items[self.stack.items.len - 2]; - } - } - return .{ - .dir = containing.iter.dir, - .basename = self.name_buffer.items[dirname_len .. self.name_buffer.items.len - 1 :0], - .path = self.name_buffer.items[0 .. self.name_buffer.items.len - 1 :0], - .kind = base.kind, - }; - } else { - var item = self.stack.pop().?; - if (self.stack.items.len != 0) { - item.iter.dir.close(); - } - } + const entry = try self.inner.next(); + if (entry != null and entry.?.kind == .directory) { + try self.inner.enter(entry.?); } - return null; + return entry; } pub fn deinit(self: *Walker) void { - const gpa = self.allocator; - // Close any remaining directories except the initial one (which is always at index 0) - if (self.stack.items.len > 1) { - for (self.stack.items[1..]) |*item| { - item.iter.dir.close(); - } - } - self.stack.deinit(gpa); - self.name_buffer.deinit(gpa); + self.inner.deinit(); + } + + /// Leaves the current directory, continuing walking one level up. + /// If the current entry is a directory entry, then the "current directory" + /// is the directory pertaining to the current entry. + pub fn leave(self: *Walker) void { + self.inner.leave(); } }; @@ -765,18 +824,11 @@ pub const Walker = struct { /// The order of returned file system entries is undefined. /// /// `self` will not be closed after walking it. +/// +/// See also `walkSelectively`. pub fn walk(self: Dir, allocator: Allocator) Allocator.Error!Walker { - var stack: std.ArrayListUnmanaged(Walker.StackItem) = .empty; - - try stack.append(allocator, .{ - .iter = self.iterate(), - .dirname_len = 0, - }); - return .{ - .stack = stack, - .name_buffer = .{}, - .allocator = allocator, + .inner = try walkSelectively(self, allocator), }; } diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 65e86e4c2e..7c28939bef 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -1810,6 +1810,67 @@ test "walker" { try testing.expectEqual(expected_paths.kvs.len, num_walked); } +test "selective walker, skip entries that start with ." { + var tmp = tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); + + const paths_to_create: []const []const u8 = &.{ + "dir1/foo/.git/ignored", + ".hidden/bar", + "a/b/c", + "a/baz", + }; + + // iteration order of walker is undefined, so need lookup maps to check against + + const expected_paths = std.StaticStringMap(void).initComptime(.{ + .{"dir1"}, + .{"dir1" ++ fs.path.sep_str ++ "foo"}, + .{"a"}, + .{"a" ++ fs.path.sep_str ++ "b"}, + .{"a" ++ fs.path.sep_str ++ "b" ++ fs.path.sep_str ++ "c"}, + .{"a" ++ fs.path.sep_str ++ "baz"}, + }); + + const expected_basenames = std.StaticStringMap(void).initComptime(.{ + .{"dir1"}, + .{"foo"}, + .{"a"}, + .{"b"}, + .{"c"}, + .{"baz"}, + }); + + for (paths_to_create) |path| { + try tmp.dir.makePath(path); + } + + var walker = try tmp.dir.walkSelectively(testing.allocator); + defer walker.deinit(); + + var num_walked: usize = 0; + while (try walker.next()) |entry| { + if (entry.basename[0] == '.') continue; + if (entry.kind == .directory) { + try walker.enter(entry); + } + + testing.expect(expected_basenames.has(entry.basename)) catch |err| { + std.debug.print("found unexpected basename: {f}\n", .{std.ascii.hexEscape(entry.basename, .lower)}); + return err; + }; + testing.expect(expected_paths.has(entry.path)) catch |err| { + std.debug.print("found unexpected path: {f}\n", .{std.ascii.hexEscape(entry.path, .lower)}); + return err; + }; + // make sure that the entry.dir is the containing dir + var entry_dir = try entry.dir.openDir(entry.basename, .{}); + defer entry_dir.close(); + num_walked += 1; + } + try testing.expectEqual(expected_paths.kvs.len, num_walked); +} + test "walker without fully iterating" { var tmp = tmpDir(.{ .iterate = true }); defer tmp.cleanup();