Merge pull request #22222 from ianprime0509/git-sha256

zig fetch: add support for SHA-256 Git repositories
This commit is contained in:
Andrew Kelley 2024-12-14 16:34:47 -05:00 committed by GitHub
commit 13a28345bb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 283 additions and 169 deletions

View File

@ -381,32 +381,6 @@ pub fn build(b: *std.Build) !void {
const test_target_filters = b.option([]const []const u8, "test-target-filter", "Skip tests whose target triple do not match any filter") orelse &[0][]const u8{};
const test_slow_targets = b.option(bool, "test-slow-targets", "Enable running module tests for targets that have a slow compiler backend") orelse false;
const test_cases_options = b.addOptions();
test_cases_options.addOption(bool, "enable_tracy", false);
test_cases_options.addOption(bool, "enable_debug_extensions", enable_debug_extensions);
test_cases_options.addOption(bool, "enable_logging", enable_logging);
test_cases_options.addOption(bool, "enable_link_snapshots", enable_link_snapshots);
test_cases_options.addOption(bool, "skip_non_native", skip_non_native);
test_cases_options.addOption(bool, "have_llvm", enable_llvm);
test_cases_options.addOption(bool, "llvm_has_m68k", llvm_has_m68k);
test_cases_options.addOption(bool, "llvm_has_csky", llvm_has_csky);
test_cases_options.addOption(bool, "llvm_has_arc", llvm_has_arc);
test_cases_options.addOption(bool, "llvm_has_xtensa", llvm_has_xtensa);
test_cases_options.addOption(bool, "force_gpa", force_gpa);
test_cases_options.addOption(bool, "enable_qemu", b.enable_qemu);
test_cases_options.addOption(bool, "enable_wine", b.enable_wine);
test_cases_options.addOption(bool, "enable_wasmtime", b.enable_wasmtime);
test_cases_options.addOption(bool, "enable_rosetta", b.enable_rosetta);
test_cases_options.addOption(bool, "enable_darling", b.enable_darling);
test_cases_options.addOption(u32, "mem_leak_frames", mem_leak_frames * 2);
test_cases_options.addOption(bool, "value_tracing", value_tracing);
test_cases_options.addOption(?[]const u8, "glibc_runtimes_dir", b.glibc_runtimes_dir);
test_cases_options.addOption([:0]const u8, "version", version);
test_cases_options.addOption(std.SemanticVersion, "semver", semver);
test_cases_options.addOption([]const []const u8, "test_filters", test_filters);
test_cases_options.addOption(DevEnv, "dev", if (only_c) .bootstrap else .core);
var chosen_opt_modes_buf: [4]builtin.OptimizeMode = undefined;
var chosen_mode_index: usize = 0;
if (!skip_debug) {
@ -533,6 +507,21 @@ pub fn build(b: *std.Build) !void {
.max_rss = 5029889638,
}));
test_modules_step.dependOn(tests.addModuleTests(b, .{
.test_filters = test_filters,
.test_target_filters = test_target_filters,
.test_slow_targets = test_slow_targets,
.root_src = "src/main.zig",
.name = "compiler-internals",
.desc = "Run the compiler internals tests",
.optimize_modes = optimization_modes,
.include_paths = &.{},
.skip_single_threaded = skip_single_threaded,
.skip_non_native = true,
.skip_libc = skip_libc,
.build_options = exe_options,
}));
test_step.dependOn(test_modules_step);
test_step.dependOn(tests.addCompareOutputTests(b, test_filters, optimization_modes));

View File

@ -9822,6 +9822,7 @@ test "basic usage" {
const gpa = std.testing.allocator;
var ip: InternPool = .empty;
try ip.init(gpa, 1);
defer ip.deinit(gpa);
const i32_type = try ip.get(gpa, .main, .{ .int_type = .{

View File

@ -814,7 +814,7 @@ const Resource = union(enum) {
const Git = struct {
session: git.Session,
fetch_stream: git.Session.FetchStream,
want_oid: [git.oid_length]u8,
want_oid: git.Oid,
};
fn deinit(resource: *Resource) void {
@ -976,7 +976,7 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re
const want_oid = want_oid: {
const want_ref =
if (uri.fragment) |fragment| try fragment.toRawMaybeAlloc(arena) else "HEAD";
if (git.parseOid(want_ref)) |oid| break :want_oid oid else |_| {}
if (git.Oid.parseAny(want_ref)) |oid| break :want_oid oid else |_| {}
const want_ref_head = try std.fmt.allocPrint(arena, "refs/heads/{s}", .{want_ref});
const want_ref_tag = try std.fmt.allocPrint(arena, "refs/tags/{s}", .{want_ref});
@ -1018,17 +1018,13 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re
});
const notes_start = try eb.reserveNotes(notes_len);
eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{
.msg = try eb.printString("try .url = \"{;+/}#{}\",", .{
uri, std.fmt.fmtSliceHexLower(&want_oid),
}),
.msg = try eb.printString("try .url = \"{;+/}#{}\",", .{ uri, want_oid }),
}));
return error.FetchFailed;
}
var want_oid_buf: [git.fmt_oid_length]u8 = undefined;
_ = std.fmt.bufPrint(&want_oid_buf, "{}", .{
std.fmt.fmtSliceHexLower(&want_oid),
}) catch unreachable;
var want_oid_buf: [git.Oid.max_formatted_length]u8 = undefined;
_ = std.fmt.bufPrint(&want_oid_buf, "{}", .{want_oid}) catch unreachable;
var fetch_stream = session.fetch(&.{&want_oid_buf}, server_header_buffer) catch |err| {
return f.fail(f.location_tok, try eb.printString(
"unable to create fetch stream: {s}",
@ -1163,7 +1159,7 @@ fn unpackResource(
});
return try unpackTarball(f, tmp_directory.handle, dcp.reader());
},
.git_pack => return unpackGitPack(f, tmp_directory.handle, resource) catch |err| switch (err) {
.git_pack => return unpackGitPack(f, tmp_directory.handle, &resource.git) catch |err| switch (err) {
error.FetchFailed => return error.FetchFailed,
error.OutOfMemory => return error.OutOfMemory,
else => |e| return f.fail(f.location_tok, try eb.printString(
@ -1298,11 +1294,10 @@ fn unzip(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!UnpackResult {
return res;
}
fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!UnpackResult {
fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource.Git) anyerror!UnpackResult {
const arena = f.arena.allocator();
const gpa = f.arena.child_allocator;
const want_oid = resource.git.want_oid;
const reader = resource.git.fetch_stream.reader();
const object_format: git.Oid.Format = resource.want_oid;
var res: UnpackResult = .{};
// The .git directory is used to store the packfile and associated index, but
@ -1314,7 +1309,7 @@ fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!Unpac
var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true });
defer pack_file.close();
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
try fifo.pump(reader, pack_file.writer());
try fifo.pump(resource.fetch_stream.reader(), pack_file.writer());
try pack_file.sync();
var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true });
@ -1323,7 +1318,7 @@ fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!Unpac
const index_prog_node = f.prog_node.start("Index pack", 0);
defer index_prog_node.end();
var index_buffered_writer = std.io.bufferedWriter(index_file.writer());
try git.indexPack(gpa, pack_file, index_buffered_writer.writer());
try git.indexPack(gpa, object_format, pack_file, index_buffered_writer.writer());
try index_buffered_writer.flush();
try index_file.sync();
}
@ -1331,10 +1326,10 @@ fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!Unpac
{
const checkout_prog_node = f.prog_node.start("Checkout", 0);
defer checkout_prog_node.end();
var repository = try git.Repository.init(gpa, pack_file, index_file);
var repository = try git.Repository.init(gpa, object_format, pack_file, index_file);
defer repository.deinit();
var diagnostics: git.Diagnostics = .{ .allocator = arena };
try repository.checkout(out_dir, want_oid, &diagnostics);
try repository.checkout(out_dir, resource.want_oid, &diagnostics);
if (diagnostics.errors.items.len > 0) {
try res.allocErrors(arena, diagnostics.errors.items.len, "unable to unpack packfile");
@ -1695,7 +1690,7 @@ const HashedFile = struct {
fn stripRoot(fs_path: []const u8, root_dir: []const u8) []const u8 {
if (root_dir.len == 0 or fs_path.len <= root_dir.len) return fs_path;
if (std.mem.eql(u8, fs_path[0..root_dir.len], root_dir) and fs_path[root_dir.len] == fs.path.sep) {
if (std.mem.eql(u8, fs_path[0..root_dir.len], root_dir) and fs.path.isSep(fs_path[root_dir.len])) {
return fs_path[root_dir.len + 1 ..];
}
@ -1810,8 +1805,8 @@ const FileHeader = struct {
}
pub fn isExecutable(self: *FileHeader) bool {
return std.mem.eql(u8, self.header[0..shebang.len], shebang) or
std.mem.eql(u8, self.header[0..elf_magic.len], elf_magic);
return std.mem.eql(u8, self.header[0..@min(self.bytes_read, shebang.len)], shebang) or
std.mem.eql(u8, self.header[0..@min(self.bytes_read, elf_magic.len)], elf_magic);
}
};
@ -2244,7 +2239,6 @@ const TestFetchBuilder = struct {
thread_pool: ThreadPool,
http_client: std.http.Client,
global_cache_directory: Cache.Directory,
progress: std.Progress,
job_queue: Fetch.JobQueue,
fetch: Fetch,
@ -2260,8 +2254,6 @@ const TestFetchBuilder = struct {
self.http_client = .{ .allocator = allocator };
self.global_cache_directory = .{ .handle = cache_dir, .path = null };
self.progress = .{ .dont_print_on_dumb = true };
self.job_queue = .{
.http_client = &self.http_client,
.thread_pool = &self.thread_pool,
@ -2281,10 +2273,11 @@ const TestFetchBuilder = struct {
.lazy_status = .eager,
.parent_package_root = Cache.Path{ .root_dir = Cache.Directory{ .handle = cache_dir, .path = null } },
.parent_manifest_ast = null,
.prog_node = self.progress.start("Fetch", 0),
.prog_node = std.Progress.Node.none,
.job_queue = &self.job_queue,
.omit_missing_hash_error = true,
.allow_missing_paths_field = false,
.use_latest_commit = true,
.package_root = undefined,
.error_bundle = undefined,
@ -2293,6 +2286,8 @@ const TestFetchBuilder = struct {
.actual_hash = undefined,
.has_build_zig = false,
.oom_flag = false,
.latest_commit = null,
.module = null,
};
return &self.fetch;

View File

@ -9,32 +9,133 @@ const mem = std.mem;
const testing = std.testing;
const Allocator = mem.Allocator;
const Sha1 = std.crypto.hash.Sha1;
const Sha256 = std.crypto.hash.sha2.Sha256;
const assert = std.debug.assert;
pub const oid_length = Sha1.digest_length;
pub const fmt_oid_length = 2 * oid_length;
/// The ID of a Git object (an SHA-1 hash).
pub const Oid = [oid_length]u8;
/// The ID of a Git object.
pub const Oid = union(Format) {
sha1: [Sha1.digest_length]u8,
sha256: [Sha256.digest_length]u8,
pub fn parseOid(s: []const u8) !Oid {
if (s.len != fmt_oid_length) return error.InvalidOid;
var oid: Oid = undefined;
for (&oid, 0..) |*b, i| {
b.* = std.fmt.parseUnsigned(u8, s[2 * i ..][0..2], 16) catch return error.InvalidOid;
pub const max_formatted_length = len: {
var max: usize = 0;
for (std.enums.values(Format)) |f| {
max = @max(max, f.formattedLength());
}
break :len max;
};
pub const Format = enum {
sha1,
sha256,
pub fn byteLength(f: Format) usize {
return switch (f) {
.sha1 => Sha1.digest_length,
.sha256 => Sha256.digest_length,
};
}
pub fn formattedLength(f: Format) usize {
return 2 * f.byteLength();
}
};
const Hasher = union(Format) {
sha1: Sha1,
sha256: Sha256,
fn init(oid_format: Format) Hasher {
return switch (oid_format) {
.sha1 => .{ .sha1 = Sha1.init(.{}) },
.sha256 => .{ .sha256 = Sha256.init(.{}) },
};
}
// Must be public for use from HashedReader and HashedWriter.
pub fn update(hasher: *Hasher, b: []const u8) void {
switch (hasher.*) {
inline else => |*inner| inner.update(b),
}
}
fn finalResult(hasher: *Hasher) Oid {
return switch (hasher.*) {
inline else => |*inner, tag| @unionInit(Oid, @tagName(tag), inner.finalResult()),
};
}
};
pub fn fromBytes(oid_format: Format, bytes: []const u8) Oid {
assert(bytes.len == oid_format.byteLength());
return switch (oid_format) {
inline else => |tag| @unionInit(Oid, @tagName(tag), bytes[0..comptime tag.byteLength()].*),
};
}
return oid;
}
test parseOid {
try testing.expectEqualSlices(
u8,
&.{ 0xCE, 0x91, 0x9C, 0xCF, 0x45, 0x95, 0x18, 0x56, 0xA7, 0x62, 0xFF, 0xDB, 0x8E, 0xF8, 0x50, 0x30, 0x1C, 0xD8, 0xC5, 0x88 },
&try parseOid("ce919ccf45951856a762ffdb8ef850301cd8c588"),
);
try testing.expectError(error.InvalidOid, parseOid("ce919ccf"));
try testing.expectError(error.InvalidOid, parseOid("master"));
try testing.expectError(error.InvalidOid, parseOid("HEAD"));
}
pub fn readBytes(oid_format: Format, reader: anytype) @TypeOf(reader).NoEofError!Oid {
return switch (oid_format) {
inline else => |tag| @unionInit(Oid, @tagName(tag), try reader.readBytesNoEof(tag.byteLength())),
};
}
pub fn parse(oid_format: Format, s: []const u8) error{InvalidOid}!Oid {
switch (oid_format) {
inline else => |tag| {
if (s.len != tag.formattedLength()) return error.InvalidOid;
var bytes: [tag.byteLength()]u8 = undefined;
for (&bytes, 0..) |*b, i| {
b.* = std.fmt.parseUnsigned(u8, s[2 * i ..][0..2], 16) catch return error.InvalidOid;
}
return @unionInit(Oid, @tagName(tag), bytes);
},
}
}
test parse {
try testing.expectEqualSlices(
u8,
&.{ 0xCE, 0x91, 0x9C, 0xCF, 0x45, 0x95, 0x18, 0x56, 0xA7, 0x62, 0xFF, 0xDB, 0x8E, 0xF8, 0x50, 0x30, 0x1C, 0xD8, 0xC5, 0x88 },
&(try parse(.sha1, "ce919ccf45951856a762ffdb8ef850301cd8c588")).sha1,
);
try testing.expectError(error.InvalidOid, parse(.sha256, "ce919ccf45951856a762ffdb8ef850301cd8c588"));
try testing.expectError(error.InvalidOid, parse(.sha1, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a"));
try testing.expectEqualSlices(
u8,
&.{ 0x7F, 0x44, 0x4A, 0x92, 0xBD, 0x45, 0x72, 0xEE, 0x4A, 0x28, 0xB2, 0xC6, 0x30, 0x59, 0x92, 0x4A, 0x9C, 0xA1, 0x82, 0x91, 0x38, 0x55, 0x3E, 0xF3, 0xE7, 0xC4, 0x1E, 0xE1, 0x59, 0xAF, 0xAE, 0x7A },
&(try parse(.sha256, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a")).sha256,
);
try testing.expectError(error.InvalidOid, parse(.sha1, "ce919ccf"));
try testing.expectError(error.InvalidOid, parse(.sha256, "ce919ccf"));
try testing.expectError(error.InvalidOid, parse(.sha1, "master"));
try testing.expectError(error.InvalidOid, parse(.sha256, "master"));
try testing.expectError(error.InvalidOid, parse(.sha1, "HEAD"));
try testing.expectError(error.InvalidOid, parse(.sha256, "HEAD"));
}
pub fn parseAny(s: []const u8) error{InvalidOid}!Oid {
return for (std.enums.values(Format)) |f| {
if (s.len == f.formattedLength()) break parse(f, s);
} else error.InvalidOid;
}
pub fn format(
oid: Oid,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) @TypeOf(writer).Error!void {
_ = fmt;
_ = options;
try writer.print("{}", .{std.fmt.fmtSliceHexLower(oid.slice())});
}
pub fn slice(oid: *const Oid) []const u8 {
return switch (oid.*) {
inline else => |*bytes| bytes,
};
}
};
pub const Diagnostics = struct {
allocator: Allocator,
@ -72,8 +173,8 @@ pub const Diagnostics = struct {
pub const Repository = struct {
odb: Odb,
pub fn init(allocator: Allocator, pack_file: std.fs.File, index_file: std.fs.File) !Repository {
return .{ .odb = try Odb.init(allocator, pack_file, index_file) };
pub fn init(allocator: Allocator, format: Oid.Format, pack_file: std.fs.File, index_file: std.fs.File) !Repository {
return .{ .odb = try Odb.init(allocator, format, pack_file, index_file) };
}
pub fn deinit(repository: *Repository) void {
@ -92,7 +193,7 @@ pub const Repository = struct {
const tree_oid = tree_oid: {
const commit_object = try repository.odb.readObject();
if (commit_object.type != .commit) return error.NotACommit;
break :tree_oid try getCommitTree(commit_object.data);
break :tree_oid try getCommitTree(repository.odb.format, commit_object.data);
};
try repository.checkoutTree(worktree, tree_oid, "", diagnostics);
}
@ -114,7 +215,11 @@ pub const Repository = struct {
const tree_data = try repository.odb.allocator.dupe(u8, tree_object.data);
defer repository.odb.allocator.free(tree_data);
var tree_iter: TreeIterator = .{ .data = tree_data };
var tree_iter: TreeIterator = .{
.format = repository.odb.format,
.data = tree_data,
.pos = 0,
};
while (try tree_iter.next()) |entry| {
switch (entry.type) {
.directory => {
@ -170,19 +275,20 @@ pub const Repository = struct {
/// Returns the ID of the tree associated with the given commit (provided as
/// raw object data).
fn getCommitTree(commit_data: []const u8) !Oid {
fn getCommitTree(format: Oid.Format, commit_data: []const u8) !Oid {
if (!mem.startsWith(u8, commit_data, "tree ") or
commit_data.len < "tree ".len + fmt_oid_length + "\n".len or
commit_data["tree ".len + fmt_oid_length] != '\n')
commit_data.len < "tree ".len + format.formattedLength() + "\n".len or
commit_data["tree ".len + format.formattedLength()] != '\n')
{
return error.InvalidCommit;
}
return try parseOid(commit_data["tree ".len..][0..fmt_oid_length]);
return try .parse(format, commit_data["tree ".len..][0..format.formattedLength()]);
}
const TreeIterator = struct {
format: Oid.Format,
data: []const u8,
pos: usize = 0,
pos: usize,
const Entry = struct {
type: Type,
@ -220,8 +326,9 @@ pub const Repository = struct {
const name = iterator.data[iterator.pos..name_end :0];
iterator.pos = name_end + 1;
const oid_length = iterator.format.byteLength();
if (iterator.pos + oid_length > iterator.data.len) return error.InvalidTree;
const oid = iterator.data[iterator.pos..][0..oid_length].*;
const oid: Oid = .fromBytes(iterator.format, iterator.data[iterator.pos..][0..oid_length]);
iterator.pos += oid_length;
return .{ .type = @"type", .executable = executable, .name = name, .oid = oid };
@ -235,6 +342,7 @@ pub const Repository = struct {
/// The format of the packfile and its associated index are documented in
/// [pack-format](https://git-scm.com/docs/pack-format).
const Odb = struct {
format: Oid.Format,
pack_file: std.fs.File,
index_header: IndexHeader,
index_file: std.fs.File,
@ -242,11 +350,12 @@ const Odb = struct {
allocator: Allocator,
/// Initializes the database from open pack and index files.
fn init(allocator: Allocator, pack_file: std.fs.File, index_file: std.fs.File) !Odb {
fn init(allocator: Allocator, format: Oid.Format, pack_file: std.fs.File, index_file: std.fs.File) !Odb {
try pack_file.seekTo(0);
try index_file.seekTo(0);
const index_header = try IndexHeader.read(index_file.reader());
return .{
.format = format,
.pack_file = pack_file,
.index_header = index_header,
.index_file = index_file,
@ -268,7 +377,7 @@ const Odb = struct {
const base_object = while (true) {
if (odb.cache.get(base_offset)) |base_object| break base_object;
base_header = try EntryHeader.read(odb.pack_file.reader());
base_header = try EntryHeader.read(odb.format, odb.pack_file.reader());
switch (base_header) {
.ofs_delta => |ofs_delta| {
try delta_offsets.append(odb.allocator, base_offset);
@ -292,6 +401,7 @@ const Odb = struct {
const base_data = try resolveDeltaChain(
odb.allocator,
odb.format,
odb.pack_file,
base_object,
delta_offsets.items,
@ -303,14 +413,15 @@ const Odb = struct {
/// Seeks to the beginning of the object with the given ID.
fn seekOid(odb: *Odb, oid: Oid) !void {
const key = oid[0];
const oid_length = odb.format.byteLength();
const key = oid.slice()[0];
var start_index = if (key > 0) odb.index_header.fan_out_table[key - 1] else 0;
var end_index = odb.index_header.fan_out_table[key];
const found_index = while (start_index < end_index) {
const mid_index = start_index + (end_index - start_index) / 2;
try odb.index_file.seekTo(IndexHeader.size + mid_index * oid_length);
const mid_oid = try odb.index_file.reader().readBytesNoEof(oid_length);
switch (mem.order(u8, &mid_oid, &oid)) {
const mid_oid = try Oid.readBytes(odb.format, odb.index_file.reader());
switch (mem.order(u8, mid_oid.slice(), oid.slice())) {
.lt => start_index = mid_index + 1,
.gt => end_index = mid_index,
.eq => break mid_index,
@ -495,6 +606,7 @@ pub const Session = struct {
location: Location,
supports_agent: bool,
supports_shallow: bool,
object_format: Oid.Format,
allocator: Allocator,
const agent = "zig/" ++ @import("builtin").zig_version_string;
@ -513,6 +625,7 @@ pub const Session = struct {
.location = try .init(allocator, uri),
.supports_agent = false,
.supports_shallow = false,
.object_format = .sha1,
.allocator = allocator,
};
errdefer session.deinit();
@ -528,6 +641,10 @@ pub const Session = struct {
session.supports_shallow = true;
}
}
} else if (mem.eql(u8, capability.key, "object-format")) {
if (std.meta.stringToEnum(Oid.Format, capability.value orelse continue)) |format| {
session.object_format = format;
}
}
}
return session;
@ -708,6 +825,11 @@ pub const Session = struct {
if (session.supports_agent) {
try Packet.write(.{ .data = agent_capability }, body_writer);
}
{
const object_format_packet = try std.fmt.allocPrint(session.allocator, "object-format={s}\n", .{@tagName(session.object_format)});
defer session.allocator.free(object_format_packet);
try Packet.write(.{ .data = object_format_packet }, body_writer);
}
try Packet.write(.delimiter, body_writer);
for (options.ref_prefixes) |ref_prefix| {
const ref_prefix_packet = try std.fmt.allocPrint(session.allocator, "ref-prefix {s}\n", .{ref_prefix});
@ -739,10 +861,14 @@ pub const Session = struct {
try request.wait();
if (request.response.status != .ok) return error.ProtocolError;
return .{ .request = request };
return .{
.format = session.object_format,
.request = request,
};
}
pub const RefIterator = struct {
format: Oid.Format,
request: std.http.Client.Request,
buf: [Packet.max_data_length]u8 = undefined,
@ -764,7 +890,7 @@ pub const Session = struct {
.data => |data| {
const ref_data = Packet.normalizeText(data);
const oid_sep_pos = mem.indexOfScalar(u8, ref_data, ' ') orelse return error.InvalidRefPacket;
const oid = parseOid(data[0..oid_sep_pos]) catch return error.InvalidRefPacket;
const oid = Oid.parse(iterator.format, data[0..oid_sep_pos]) catch return error.InvalidRefPacket;
const name_sep_pos = mem.indexOfScalarPos(u8, ref_data, oid_sep_pos + 1, ' ') orelse ref_data.len;
const name = ref_data[oid_sep_pos + 1 .. name_sep_pos];
@ -778,7 +904,7 @@ pub const Session = struct {
if (mem.startsWith(u8, attribute, "symref-target:")) {
symref_target = attribute["symref-target:".len..];
} else if (mem.startsWith(u8, attribute, "peeled:")) {
peeled = parseOid(attribute["peeled:".len..]) catch return error.InvalidRefPacket;
peeled = Oid.parse(iterator.format, attribute["peeled:".len..]) catch return error.InvalidRefPacket;
}
last_sep_pos = next_sep_pos;
}
@ -814,6 +940,11 @@ pub const Session = struct {
if (session.supports_agent) {
try Packet.write(.{ .data = agent_capability }, body_writer);
}
{
const object_format_packet = try std.fmt.allocPrint(session.allocator, "object-format={s}\n", .{@tagName(session.object_format)});
defer session.allocator.free(object_format_packet);
try Packet.write(.{ .data = object_format_packet }, body_writer);
}
try Packet.write(.delimiter, body_writer);
// Our packfile parser supports the OFS_DELTA object type
try Packet.write(.{ .data = "ofs-delta\n" }, body_writer);
@ -997,7 +1128,7 @@ const EntryHeader = union(Type) {
};
}
fn read(reader: anytype) !EntryHeader {
fn read(format: Oid.Format, reader: anytype) !EntryHeader {
const InitialByte = packed struct { len: u4, type: u3, has_next: bool };
const initial: InitialByte = @bitCast(reader.readByte() catch |e| switch (e) {
error.EndOfStream => return error.InvalidFormat,
@ -1016,7 +1147,7 @@ const EntryHeader = union(Type) {
.uncompressed_length = uncompressed_length,
} },
.ref_delta => .{ .ref_delta = .{
.base_object = reader.readBytesNoEof(oid_length) catch |e| switch (e) {
.base_object = Oid.readBytes(format, reader) catch |e| switch (e) {
error.EndOfStream => return error.InvalidFormat,
else => |other| return other,
},
@ -1081,7 +1212,7 @@ const IndexEntry = struct {
/// Writes out a version 2 index for the given packfile, as documented in
/// [pack-format](https://git-scm.com/docs/pack-format).
pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) !void {
pub fn indexPack(allocator: Allocator, format: Oid.Format, pack: std.fs.File, index_writer: anytype) !void {
try pack.seekTo(0);
var index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry) = .empty;
@ -1089,7 +1220,7 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
var pending_deltas: std.ArrayListUnmanaged(IndexEntry) = .empty;
defer pending_deltas.deinit(allocator);
const pack_checksum = try indexPackFirstPass(allocator, pack, &index_entries, &pending_deltas);
const pack_checksum = try indexPackFirstPass(allocator, format, pack, &index_entries, &pending_deltas);
var cache: ObjectCache = .{};
defer cache.deinit(allocator);
@ -1099,7 +1230,7 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
while (i > 0) {
i -= 1;
const delta = pending_deltas.items[i];
if (try indexPackHashDelta(allocator, pack, delta, index_entries, &cache)) |oid| {
if (try indexPackHashDelta(allocator, format, pack, delta, index_entries, &cache)) |oid| {
try index_entries.put(allocator, oid, delta);
_ = pending_deltas.swapRemove(i);
}
@ -1117,7 +1248,7 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
}
mem.sortUnstable(Oid, oids.items, {}, struct {
fn lessThan(_: void, o1: Oid, o2: Oid) bool {
return mem.lessThan(u8, &o1, &o2);
return mem.lessThan(u8, o1.slice(), o2.slice());
}
}.lessThan);
@ -1125,15 +1256,16 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
var count: u32 = 0;
var fan_out_index: u8 = 0;
for (oids.items) |oid| {
if (oid[0] > fan_out_index) {
@memset(fan_out_table[fan_out_index..oid[0]], count);
fan_out_index = oid[0];
const key = oid.slice()[0];
if (key > fan_out_index) {
@memset(fan_out_table[fan_out_index..key], count);
fan_out_index = key;
}
count += 1;
}
@memset(fan_out_table[fan_out_index..], count);
var index_hashed_writer = hashedWriter(index_writer, Sha1.init(.{}));
var index_hashed_writer = std.compress.hashedWriter(index_writer, Oid.Hasher.init(format));
const writer = index_hashed_writer.writer();
try writer.writeAll(IndexHeader.signature);
try writer.writeInt(u32, IndexHeader.supported_version, .big);
@ -1142,7 +1274,7 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
}
for (oids.items) |oid| {
try writer.writeAll(&oid);
try writer.writeAll(oid.slice());
}
for (oids.items) |oid| {
@ -1165,9 +1297,9 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
try writer.writeInt(u64, offset, .big);
}
try writer.writeAll(&pack_checksum);
try writer.writeAll(pack_checksum.slice());
const index_checksum = index_hashed_writer.hasher.finalResult();
try index_writer.writeAll(&index_checksum);
try index_writer.writeAll(index_checksum.slice());
}
/// Performs the first pass over the packfile data for index construction.
@ -1176,13 +1308,14 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
/// format).
fn indexPackFirstPass(
allocator: Allocator,
format: Oid.Format,
pack: std.fs.File,
index_entries: *std.AutoHashMapUnmanaged(Oid, IndexEntry),
pending_deltas: *std.ArrayListUnmanaged(IndexEntry),
) ![Sha1.digest_length]u8 {
) !Oid {
var pack_buffered_reader = std.io.bufferedReader(pack.reader());
var pack_counting_reader = std.io.countingReader(pack_buffered_reader.reader());
var pack_hashed_reader = std.compress.hashedReader(pack_counting_reader.reader(), Sha1.init(.{}));
var pack_hashed_reader = std.compress.hashedReader(pack_counting_reader.reader(), Oid.Hasher.init(format));
const pack_reader = pack_hashed_reader.reader();
const pack_header = try PackHeader.read(pack_reader);
@ -1191,12 +1324,12 @@ fn indexPackFirstPass(
while (current_entry < pack_header.total_objects) : (current_entry += 1) {
const entry_offset = pack_counting_reader.bytes_read;
var entry_crc32_reader = std.compress.hashedReader(pack_reader, std.hash.Crc32.init());
const entry_header = try EntryHeader.read(entry_crc32_reader.reader());
const entry_header = try EntryHeader.read(format, entry_crc32_reader.reader());
switch (entry_header) {
.commit, .tree, .blob, .tag => |object| {
var entry_decompress_stream = std.compress.zlib.decompressor(entry_crc32_reader.reader());
var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader());
var entry_hashed_writer = hashedWriter(std.io.null_writer, Sha1.init(.{}));
var entry_hashed_writer = std.compress.hashedWriter(std.io.null_writer, Oid.Hasher.init(format));
const entry_writer = entry_hashed_writer.writer();
// The object header is not included in the pack data but is
// part of the object's ID
@ -1229,8 +1362,8 @@ fn indexPackFirstPass(
}
const pack_checksum = pack_hashed_reader.hasher.finalResult();
const recorded_checksum = try pack_buffered_reader.reader().readBytesNoEof(Sha1.digest_length);
if (!mem.eql(u8, &pack_checksum, &recorded_checksum)) {
const recorded_checksum = try Oid.readBytes(format, pack_buffered_reader.reader());
if (!mem.eql(u8, pack_checksum.slice(), recorded_checksum.slice())) {
return error.CorruptedPack;
}
_ = pack_reader.readByte() catch |e| switch (e) {
@ -1245,6 +1378,7 @@ fn indexPackFirstPass(
/// delta and we do not yet know the offset of the base object).
fn indexPackHashDelta(
allocator: Allocator,
format: Oid.Format,
pack: std.fs.File,
delta: IndexEntry,
index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry),
@ -1259,7 +1393,7 @@ fn indexPackHashDelta(
if (cache.get(base_offset)) |base_object| break base_object;
try pack.seekTo(base_offset);
base_header = try EntryHeader.read(pack.reader());
base_header = try EntryHeader.read(format, pack.reader());
switch (base_header) {
.ofs_delta => |ofs_delta| {
try delta_offsets.append(allocator, base_offset);
@ -1279,10 +1413,10 @@ fn indexPackHashDelta(
}
};
const base_data = try resolveDeltaChain(allocator, pack, base_object, delta_offsets.items, cache);
const base_data = try resolveDeltaChain(allocator, format, pack, base_object, delta_offsets.items, cache);
var entry_hasher = Sha1.init(.{});
var entry_hashed_writer = hashedWriter(std.io.null_writer, &entry_hasher);
var entry_hasher: Oid.Hasher = .init(format);
var entry_hashed_writer = std.compress.hashedWriter(std.io.null_writer, &entry_hasher);
try entry_hashed_writer.writer().print("{s} {}\x00", .{ @tagName(base_object.type), base_data.len });
entry_hasher.update(base_data);
return entry_hasher.finalResult();
@ -1294,6 +1428,7 @@ fn indexPackHashDelta(
/// to obtain the final object.
fn resolveDeltaChain(
allocator: Allocator,
format: Oid.Format,
pack: std.fs.File,
base_object: Object,
delta_offsets: []const u64,
@ -1306,7 +1441,7 @@ fn resolveDeltaChain(
const delta_offset = delta_offsets[i];
try pack.seekTo(delta_offset);
const delta_header = try EntryHeader.read(pack.reader());
const delta_header = try EntryHeader.read(format, pack.reader());
const delta_data = try readObjectRaw(allocator, pack.reader(), delta_header.uncompressedLength());
defer allocator.free(delta_data);
var delta_stream = std.io.fixedBufferStream(delta_data);
@ -1394,46 +1529,22 @@ fn expandDelta(base_object: anytype, delta_reader: anytype, writer: anytype) !vo
}
}
fn HashedWriter(
comptime WriterType: anytype,
comptime HasherType: anytype,
) type {
return struct {
child_writer: WriterType,
hasher: HasherType,
const Error = WriterType.Error;
const Writer = std.io.Writer(*@This(), Error, write);
fn write(hashed_writer: *@This(), buf: []const u8) Error!usize {
const amt = try hashed_writer.child_writer.write(buf);
hashed_writer.hasher.update(buf);
return amt;
}
fn writer(hashed_writer: *@This()) Writer {
return .{ .context = hashed_writer };
}
};
}
fn hashedWriter(
writer: anytype,
hasher: anytype,
) HashedWriter(@TypeOf(writer), @TypeOf(hasher)) {
return .{ .child_writer = writer, .hasher = hasher };
}
test "packfile indexing and checkout" {
// To verify the contents of this packfile without using the code in this
// file:
//
// 1. Create a new empty Git repository (`git init`)
// 2. `git unpack-objects <path/to/testdata.pack`
// 3. `git fsck` -> note the "dangling commit" ID (which matches the commit
// checked out below)
// 4. `git checkout dd582c0720819ab7130b103635bd7271b9fd4feb`
const testrepo_pack = @embedFile("git/testdata/testrepo.pack");
/// Runs the packfile indexing and checkout test.
///
/// The two testrepo repositories under testdata contain identical commit
/// histories and contents.
///
/// To verify the contents of the packfiles using Git alone, run the
/// following commands in an empty directory:
///
/// 1. `git init --object-format=(sha1|sha256)`
/// 2. `git unpack-objects <path/to/testrepo.pack`
/// 3. `git fsck` - will print one "dangling commit":
/// - SHA-1: `dd582c0720819ab7130b103635bd7271b9fd4feb`
/// - SHA-256: `7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a`
/// 4. `git checkout $commit`
fn runRepositoryTest(comptime format: Oid.Format, head_commit: []const u8) !void {
const testrepo_pack = @embedFile("git/testdata/testrepo-" ++ @tagName(format) ++ ".pack");
var git_dir = testing.tmpDir(.{});
defer git_dir.cleanup();
@ -1443,27 +1554,27 @@ test "packfile indexing and checkout" {
var index_file = try git_dir.dir.createFile("testrepo.idx", .{ .read = true });
defer index_file.close();
try indexPack(testing.allocator, pack_file, index_file.writer());
try indexPack(testing.allocator, format, pack_file, index_file.writer());
// Arbitrary size limit on files read while checking the repository contents
// (all files in the test repo are known to be much smaller than this)
const max_file_size = 4096;
// (all files in the test repo are known to be smaller than this)
const max_file_size = 8192;
const index_file_data = try git_dir.dir.readFileAlloc(testing.allocator, "testrepo.idx", max_file_size);
defer testing.allocator.free(index_file_data);
// testrepo.idx is generated by Git. The index created by this file should
// match it exactly. Running `git verify-pack -v testrepo.pack` can verify
// this.
const testrepo_idx = @embedFile("git/testdata/testrepo.idx");
const testrepo_idx = @embedFile("git/testdata/testrepo-" ++ @tagName(format) ++ ".idx");
try testing.expectEqualSlices(u8, testrepo_idx, index_file_data);
var repository = try Repository.init(testing.allocator, pack_file, index_file);
var repository = try Repository.init(testing.allocator, format, pack_file, index_file);
defer repository.deinit();
var worktree = testing.tmpDir(.{ .iterate = true });
defer worktree.cleanup();
const commit_id = try parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb");
const commit_id = try Oid.parse(format, head_commit);
var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
defer diagnostics.deinit();
@ -1527,6 +1638,14 @@ test "packfile indexing and checkout" {
try testing.expectEqualStrings(expected_file_contents, actual_file_contents);
}
test "SHA-1 packfile indexing and checkout" {
try runRepositoryTest(.sha1, "dd582c0720819ab7130b103635bd7271b9fd4feb");
}
test "SHA-256 packfile indexing and checkout" {
try runRepositoryTest(.sha256, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a");
}
/// Checks out a commit of a packfile. Intended for experimenting with and
/// benchmarking possible optimizations to the indexing and checkout behavior.
pub fn main() !void {
@ -1534,14 +1653,16 @@ pub fn main() !void {
const args = try std.process.argsAlloc(allocator);
defer std.process.argsFree(allocator, args);
if (args.len != 4) {
return error.InvalidArguments; // Arguments: packfile commit worktree
if (args.len != 5) {
return error.InvalidArguments; // Arguments: format packfile commit worktree
}
var pack_file = try std.fs.cwd().openFile(args[1], .{});
const format = std.meta.stringToEnum(Oid.Format, args[1]) orelse return error.InvalidFormat;
var pack_file = try std.fs.cwd().openFile(args[2], .{});
defer pack_file.close();
const commit = try parseOid(args[2]);
var worktree = try std.fs.cwd().makeOpenPath(args[3], .{});
const commit = try Oid.parse(format, args[3]);
var worktree = try std.fs.cwd().makeOpenPath(args[4], .{});
defer worktree.close();
var git_dir = try worktree.makeOpenPath(".git", .{});
@ -1551,12 +1672,12 @@ pub fn main() !void {
var index_file = try git_dir.createFile("idx", .{ .read = true });
defer index_file.close();
var index_buffered_writer = std.io.bufferedWriter(index_file.writer());
try indexPack(allocator, pack_file, index_buffered_writer.writer());
try indexPack(allocator, format, pack_file, index_buffered_writer.writer());
try index_buffered_writer.flush();
try index_file.sync();
std.debug.print("Starting checkout...\n", .{});
var repository = try Repository.init(allocator, pack_file, index_file);
var repository = try Repository.init(allocator, format, pack_file, index_file);
defer repository.deinit();
var diagnostics: Diagnostics = .{ .allocator = allocator };
defer diagnostics.deinit();

Binary file not shown.

Binary file not shown.

View File

@ -281,7 +281,7 @@ test "rebase - no entries" {
defer rebase.deinit(gpa);
try rebase.finalize(gpa);
try testing.expectEqual(@as(u64, 0), rebase.size());
try testing.expectEqual(0, rebase.buffer.items.len);
}
test "rebase - single entry" {

View File

@ -34,6 +34,10 @@ const Zcu = @import("Zcu.zig");
const mingw = @import("mingw.zig");
const dev = @import("dev.zig");
test {
_ = Package;
}
pub const std_options: std.Options = .{
.wasiCwd = wasi_cwd,
.logFn = log,
@ -7033,8 +7037,8 @@ fn cmdFetch(
var saved_path_or_url = path_or_url;
if (fetch.latest_commit) |*latest_commit| resolved: {
const latest_commit_hex = try std.fmt.allocPrint(arena, "{}", .{std.fmt.fmtSliceHexLower(latest_commit)});
if (fetch.latest_commit) |latest_commit| resolved: {
const latest_commit_hex = try std.fmt.allocPrint(arena, "{}", .{latest_commit});
var uri = try std.Uri.parse(path_or_url);

View File

@ -1303,6 +1303,7 @@ const ModuleTestOptions = struct {
skip_libc: bool,
max_rss: usize = 0,
no_builtin: bool = false,
build_options: ?*std.Build.Step.Options = null,
};
pub fn addModuleTests(b: *std.Build, options: ModuleTestOptions) *Step {
@ -1392,6 +1393,9 @@ pub fn addModuleTests(b: *std.Build, options: ModuleTestOptions) *Step {
.strip = test_target.strip,
});
if (options.no_builtin) these_tests.no_builtin = true;
if (options.build_options) |build_options| {
these_tests.root_module.addOptions("build_options", build_options);
}
const single_threaded_suffix = if (test_target.single_threaded == true) "-single" else "";
const backend_suffix = if (test_target.use_llvm == true)
"-llvm"