Merge pull request #25320 from hkupty/walker2

std.fs.Dir: Add `walkSelectively` to provide more control over directory walking
This commit is contained in:
Ryan Liptak 2025-10-04 22:41:25 -07:00 committed by GitHub
commit 0a74d73459
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 250 additions and 89 deletions

View File

@ -260,7 +260,7 @@ pub fn build(b: *std.Build) !void {
};
const git_describe = mem.trim(u8, git_describe_untrimmed, " \n\r");
switch (mem.count(u8, git_describe, "-")) {
switch (mem.countScalar(u8, git_describe, '-')) {
0 => {
// Tagged release version (e.g. 0.10.0).
if (!mem.eql(u8, git_describe, version_string)) {

View File

@ -663,35 +663,17 @@ fn iterateImpl(self: Dir, first_iter_start_value: bool) Iterator {
}
}
pub const Walker = struct {
stack: std.ArrayListUnmanaged(StackItem),
pub const SelectiveWalker = struct {
stack: std.ArrayListUnmanaged(Walker.StackItem),
name_buffer: std.ArrayListUnmanaged(u8),
allocator: Allocator,
pub const Entry = struct {
/// The containing directory. This can be used to operate directly on `basename`
/// rather than `path`, avoiding `error.NameTooLong` for deeply nested paths.
/// The directory remains open until `next` or `deinit` is called.
dir: Dir,
basename: [:0]const u8,
path: [:0]const u8,
kind: Dir.Entry.Kind,
};
const StackItem = struct {
iter: Dir.Iterator,
dirname_len: usize,
};
/// After each call to this function, and on deinit(), the memory returned
/// from this function becomes invalid. A copy must be made in order to keep
/// a reference to the path.
pub fn next(self: *Walker) !?Walker.Entry {
const gpa = self.allocator;
while (self.stack.items.len != 0) {
// `top` and `containing` become invalid after appending to `self.stack`
var top = &self.stack.items[self.stack.items.len - 1];
var containing = top;
pub fn next(self: *SelectiveWalker) !?Walker.Entry {
while (self.stack.items.len > 0) {
const top = &self.stack.items[self.stack.items.len - 1];
var dirname_len = top.dirname_len;
if (top.iter.next() catch |err| {
// If we get an error, then we want the user to be able to continue
@ -703,36 +685,22 @@ pub const Walker = struct {
item.iter.dir.close();
}
return err;
}) |base| {
}) |entry| {
self.name_buffer.shrinkRetainingCapacity(dirname_len);
if (self.name_buffer.items.len != 0) {
try self.name_buffer.append(gpa, fs.path.sep);
try self.name_buffer.append(self.allocator, fs.path.sep);
dirname_len += 1;
}
try self.name_buffer.ensureUnusedCapacity(gpa, base.name.len + 1);
self.name_buffer.appendSliceAssumeCapacity(base.name);
try self.name_buffer.ensureUnusedCapacity(self.allocator, entry.name.len + 1);
self.name_buffer.appendSliceAssumeCapacity(entry.name);
self.name_buffer.appendAssumeCapacity(0);
if (base.kind == .directory) {
var new_dir = top.iter.dir.openDir(base.name, .{ .iterate = true }) catch |err| switch (err) {
error.NameTooLong => unreachable, // no path sep in base.name
else => |e| return e,
};
{
errdefer new_dir.close();
try self.stack.append(gpa, .{
.iter = new_dir.iterateAssumeFirstIteration(),
.dirname_len = self.name_buffer.items.len - 1,
});
top = &self.stack.items[self.stack.items.len - 1];
containing = &self.stack.items[self.stack.items.len - 2];
}
}
return .{
.dir = containing.iter.dir,
const walker_entry: Walker.Entry = .{
.dir = top.iter.dir,
.basename = self.name_buffer.items[dirname_len .. self.name_buffer.items.len - 1 :0],
.path = self.name_buffer.items[0 .. self.name_buffer.items.len - 1 :0],
.kind = base.kind,
.kind = entry.kind,
};
return walker_entry;
} else {
var item = self.stack.pop().?;
if (self.stack.items.len != 0) {
@ -743,16 +711,116 @@ pub const Walker = struct {
return null;
}
pub fn deinit(self: *Walker) void {
const gpa = self.allocator;
// Close any remaining directories except the initial one (which is always at index 0)
if (self.stack.items.len > 1) {
for (self.stack.items[1..]) |*item| {
item.iter.dir.close();
}
/// Traverses into the directory, continuing walking one level down.
pub fn enter(self: *SelectiveWalker, entry: Walker.Entry) !void {
if (entry.kind != .directory) {
@branchHint(.cold);
return;
}
self.stack.deinit(gpa);
self.name_buffer.deinit(gpa);
var new_dir = entry.dir.openDir(entry.basename, .{ .iterate = true }) catch |err| {
switch (err) {
error.NameTooLong => unreachable,
else => |e| return e,
}
};
errdefer new_dir.close();
try self.stack.append(self.allocator, .{
.iter = new_dir.iterateAssumeFirstIteration(),
.dirname_len = self.name_buffer.items.len - 1,
});
}
pub fn deinit(self: *SelectiveWalker) void {
self.name_buffer.deinit(self.allocator);
self.stack.deinit(self.allocator);
}
/// Leaves the current directory, continuing walking one level up.
/// If the current entry is a directory entry, then the "current directory"
/// will pertain to that entry if `enter` is called before `leave`.
pub fn leave(self: *SelectiveWalker) void {
var item = self.stack.pop().?;
if (self.stack.items.len != 0) {
@branchHint(.likely);
item.iter.dir.close();
}
}
};
/// Recursively iterates over a directory, but requires the user to
/// opt-in to recursing into each directory entry.
///
/// `self` must have been opened with `OpenOptions{.iterate = true}`.
///
/// `Walker.deinit` releases allocated memory and directory handles.
///
/// The order of returned file system entries is undefined.
///
/// `self` will not be closed after walking it.
///
/// See also `walk`.
pub fn walkSelectively(self: Dir, allocator: Allocator) !SelectiveWalker {
var stack: std.ArrayListUnmanaged(Walker.StackItem) = .empty;
try stack.append(allocator, .{
.iter = self.iterate(),
.dirname_len = 0,
});
return .{
.stack = stack,
.name_buffer = .{},
.allocator = allocator,
};
}
pub const Walker = struct {
inner: SelectiveWalker,
pub const Entry = struct {
/// The containing directory. This can be used to operate directly on `basename`
/// rather than `path`, avoiding `error.NameTooLong` for deeply nested paths.
/// The directory remains open until `next` or `deinit` is called.
dir: Dir,
basename: [:0]const u8,
path: [:0]const u8,
kind: Dir.Entry.Kind,
/// Returns the depth of the entry relative to the initial directory.
/// Returns 1 for a direct child of the initial directory, 2 for an entry
/// within a direct child of the initial directory, etc.
pub fn depth(self: Walker.Entry) usize {
return mem.countScalar(u8, self.path, fs.path.sep) + 1;
}
};
const StackItem = struct {
iter: Dir.Iterator,
dirname_len: usize,
};
/// After each call to this function, and on deinit(), the memory returned
/// from this function becomes invalid. A copy must be made in order to keep
/// a reference to the path.
pub fn next(self: *Walker) !?Walker.Entry {
const entry = try self.inner.next();
if (entry != null and entry.?.kind == .directory) {
try self.inner.enter(entry.?);
}
return entry;
}
pub fn deinit(self: *Walker) void {
self.inner.deinit();
}
/// Leaves the current directory, continuing walking one level up.
/// If the current entry is a directory entry, then the "current directory"
/// is the directory pertaining to the current entry.
pub fn leave(self: *Walker) void {
self.inner.leave();
}
};
@ -765,18 +833,11 @@ pub const Walker = struct {
/// The order of returned file system entries is undefined.
///
/// `self` will not be closed after walking it.
///
/// See also `walkSelectively`.
pub fn walk(self: Dir, allocator: Allocator) Allocator.Error!Walker {
var stack: std.ArrayListUnmanaged(Walker.StackItem) = .empty;
try stack.append(allocator, .{
.iter = self.iterate(),
.dirname_len = 0,
});
return .{
.stack = stack,
.name_buffer = .{},
.allocator = allocator,
.inner = try walkSelectively(self, allocator),
};
}

View File

@ -1765,14 +1765,14 @@ test "walker" {
// iteration order of walker is undefined, so need lookup maps to check against
const expected_paths = std.StaticStringMap(void).initComptime(.{
.{"dir1"},
.{"dir2"},
.{"dir3"},
.{"dir4"},
.{"dir3" ++ fs.path.sep_str ++ "sub1"},
.{"dir3" ++ fs.path.sep_str ++ "sub2"},
.{"dir3" ++ fs.path.sep_str ++ "sub2" ++ fs.path.sep_str ++ "subsub1"},
const expected_paths = std.StaticStringMap(usize).initComptime(.{
.{ "dir1", 1 },
.{ "dir2", 1 },
.{ "dir3", 1 },
.{ "dir4", 1 },
.{ "dir3" ++ fs.path.sep_str ++ "sub1", 2 },
.{ "dir3" ++ fs.path.sep_str ++ "sub2", 2 },
.{ "dir3" ++ fs.path.sep_str ++ "sub2" ++ fs.path.sep_str ++ "subsub1", 3 },
});
const expected_basenames = std.StaticStringMap(void).initComptime(.{
@ -1802,6 +1802,76 @@ test "walker" {
std.debug.print("found unexpected path: {f}\n", .{std.ascii.hexEscape(entry.path, .lower)});
return err;
};
testing.expectEqual(expected_paths.get(entry.path).?, entry.depth()) catch |err| {
std.debug.print("path reported unexpected depth: {f}\n", .{std.ascii.hexEscape(entry.path, .lower)});
return err;
};
// make sure that the entry.dir is the containing dir
var entry_dir = try entry.dir.openDir(entry.basename, .{});
defer entry_dir.close();
num_walked += 1;
}
try testing.expectEqual(expected_paths.kvs.len, num_walked);
}
test "selective walker, skip entries that start with ." {
var tmp = tmpDir(.{ .iterate = true });
defer tmp.cleanup();
const paths_to_create: []const []const u8 = &.{
"dir1/foo/.git/ignored",
".hidden/bar",
"a/b/c",
"a/baz",
};
// iteration order of walker is undefined, so need lookup maps to check against
const expected_paths = std.StaticStringMap(usize).initComptime(.{
.{ "dir1", 1 },
.{ "dir1" ++ fs.path.sep_str ++ "foo", 2 },
.{ "a", 1 },
.{ "a" ++ fs.path.sep_str ++ "b", 2 },
.{ "a" ++ fs.path.sep_str ++ "b" ++ fs.path.sep_str ++ "c", 3 },
.{ "a" ++ fs.path.sep_str ++ "baz", 2 },
});
const expected_basenames = std.StaticStringMap(void).initComptime(.{
.{"dir1"},
.{"foo"},
.{"a"},
.{"b"},
.{"c"},
.{"baz"},
});
for (paths_to_create) |path| {
try tmp.dir.makePath(path);
}
var walker = try tmp.dir.walkSelectively(testing.allocator);
defer walker.deinit();
var num_walked: usize = 0;
while (try walker.next()) |entry| {
if (entry.basename[0] == '.') continue;
if (entry.kind == .directory) {
try walker.enter(entry);
}
testing.expect(expected_basenames.has(entry.basename)) catch |err| {
std.debug.print("found unexpected basename: {f}\n", .{std.ascii.hexEscape(entry.basename, .lower)});
return err;
};
testing.expect(expected_paths.has(entry.path)) catch |err| {
std.debug.print("found unexpected path: {f}\n", .{std.ascii.hexEscape(entry.path, .lower)});
return err;
};
testing.expectEqual(expected_paths.get(entry.path).?, entry.depth()) catch |err| {
std.debug.print("path reported unexpected depth: {f}\n", .{std.ascii.hexEscape(entry.path, .lower)});
return err;
};
// make sure that the entry.dir is the containing dir
var entry_dir = try entry.dir.openDir(entry.basename, .{});
defer entry_dir.close();

View File

@ -1704,6 +1704,26 @@ test count {
try testing.expect(count(u8, "owowowu", "owowu") == 1);
}
/// Returns the number of needles inside the haystack
pub fn countScalar(comptime T: type, haystack: []const T, needle: T) usize {
var i: usize = 0;
var found: usize = 0;
while (findScalarPos(T, haystack, i, needle)) |idx| {
i = idx + 1;
found += 1;
}
return found;
}
test countScalar {
try testing.expectEqual(0, countScalar(u8, "", 'h'));
try testing.expectEqual(1, countScalar(u8, "h", 'h'));
try testing.expectEqual(2, countScalar(u8, "hh", 'h'));
try testing.expectEqual(3, countScalar(u8, " abcabc abc", 'b'));
}
/// Returns true if the haystack contains expected_count or more needles
/// needle.len must be > 0
/// does not count overlapping needles

View File

@ -109,13 +109,29 @@ pub fn main() !void {
{
// Also add all new def and def.in files.
var walker = try src_crt_dir.walk(arena);
var walker = try src_crt_dir.walkSelectively(arena);
defer walker.deinit();
var fail = false;
while (try walker.next()) |entry| {
if (entry.kind != .file) continue;
switch (entry.kind) {
.directory => {
switch (entry.depth()) {
1 => if (def_dirs.has(entry.basename)) {
try walker.enter(entry);
continue;
},
else => {
// The top-level directory was already validated
try walker.enter(entry);
continue;
},
}
},
.file => {},
else => continue,
}
const ok_ext = for (def_exts) |ext| {
if (std.mem.endsWith(u8, entry.path, ext)) break true;
@ -123,12 +139,6 @@ pub fn main() !void {
if (!ok_ext) continue;
const ok_prefix = for (def_dirs) |p| {
if (std.mem.startsWith(u8, entry.path, p)) break true;
} else false;
if (!ok_prefix) continue;
const blacklisted = for (blacklisted_defs) |item| {
if (std.mem.eql(u8, entry.basename, item)) break true;
} else false;
@ -162,14 +172,14 @@ const def_exts = [_][]const u8{
".def.in",
};
const def_dirs = [_][]const u8{
"lib32" ++ std.fs.path.sep_str,
"lib64" ++ std.fs.path.sep_str,
"libarm32" ++ std.fs.path.sep_str,
"libarm64" ++ std.fs.path.sep_str,
"lib-common" ++ std.fs.path.sep_str,
"def-include" ++ std.fs.path.sep_str,
};
const def_dirs = std.StaticStringMap(void).initComptime(.{
.{"lib32"},
.{"lib64"},
.{"libarm32"},
.{"libarm64"},
.{"lib-common"},
.{"def-include"},
});
const blacklisted_defs = [_][]const u8{
"crtdll.def.in",