zig/src/git.zig
2023-10-02 18:14:57 -07:00

1469 lines
59 KiB
Zig

//! Git support for package fetching.
//!
//! This is not intended to support all features of Git: it is limited to the
//! basic functionality needed to clone a repository for the purpose of fetching
//! a package.
const std = @import("std");
const mem = std.mem;
const testing = std.testing;
const Allocator = mem.Allocator;
const Sha1 = std.crypto.hash.Sha1;
const assert = std.debug.assert;
const ProgressReader = @import("Package.zig").ProgressReader;
pub const oid_length = Sha1.digest_length;
pub const fmt_oid_length = 2 * oid_length;
/// The ID of a Git object (an SHA-1 hash).
pub const Oid = [oid_length]u8;
pub fn parseOid(s: []const u8) !Oid {
if (s.len != fmt_oid_length) return error.InvalidOid;
var oid: Oid = undefined;
for (&oid, 0..) |*b, i| {
b.* = std.fmt.parseUnsigned(u8, s[2 * i ..][0..2], 16) catch return error.InvalidOid;
}
return oid;
}
test parseOid {
try testing.expectEqualSlices(
u8,
&.{ 0xCE, 0x91, 0x9C, 0xCF, 0x45, 0x95, 0x18, 0x56, 0xA7, 0x62, 0xFF, 0xDB, 0x8E, 0xF8, 0x50, 0x30, 0x1C, 0xD8, 0xC5, 0x88 },
&try parseOid("ce919ccf45951856a762ffdb8ef850301cd8c588"),
);
try testing.expectError(error.InvalidOid, parseOid("ce919ccf"));
try testing.expectError(error.InvalidOid, parseOid("master"));
try testing.expectError(error.InvalidOid, parseOid("HEAD"));
}
pub const Diagnostics = struct {
allocator: Allocator,
errors: std.ArrayListUnmanaged(Error) = .{},
pub const Error = union(enum) {
unable_to_create_sym_link: struct {
code: anyerror,
file_name: []const u8,
link_name: []const u8,
},
};
pub fn deinit(d: *Diagnostics) void {
for (d.errors.items) |item| {
switch (item) {
.unable_to_create_sym_link => |info| {
d.allocator.free(info.file_name);
d.allocator.free(info.link_name);
},
}
}
d.errors.deinit(d.allocator);
d.* = undefined;
}
};
pub const Repository = struct {
odb: Odb,
pub fn init(allocator: Allocator, pack_file: std.fs.File, index_file: std.fs.File) !Repository {
return .{ .odb = try Odb.init(allocator, pack_file, index_file) };
}
pub fn deinit(repository: *Repository) void {
repository.odb.deinit();
repository.* = undefined;
}
/// Checks out the repository at `commit_oid` to `worktree`.
pub fn checkout(
repository: *Repository,
worktree: std.fs.Dir,
commit_oid: Oid,
diagnostics: *Diagnostics,
) !void {
try repository.odb.seekOid(commit_oid);
const tree_oid = tree_oid: {
var commit_object = try repository.odb.readObject();
if (commit_object.type != .commit) return error.NotACommit;
break :tree_oid try getCommitTree(commit_object.data);
};
try repository.checkoutTree(worktree, tree_oid, "", diagnostics);
}
/// Checks out the tree at `tree_oid` to `worktree`.
fn checkoutTree(
repository: *Repository,
dir: std.fs.Dir,
tree_oid: Oid,
current_path: []const u8,
diagnostics: *Diagnostics,
) !void {
try repository.odb.seekOid(tree_oid);
const tree_object = try repository.odb.readObject();
if (tree_object.type != .tree) return error.NotATree;
// The tree object may be evicted from the object cache while we're
// iterating over it, so we can make a defensive copy here to make sure
// it remains valid until we're done with it
const tree_data = try repository.odb.allocator.dupe(u8, tree_object.data);
defer repository.odb.allocator.free(tree_data);
var tree_iter: TreeIterator = .{ .data = tree_data };
while (try tree_iter.next()) |entry| {
switch (entry.type) {
.directory => {
try dir.makeDir(entry.name);
var subdir = try dir.openDir(entry.name, .{});
defer subdir.close();
const sub_path = try std.fs.path.join(repository.odb.allocator, &.{ current_path, entry.name });
defer repository.odb.allocator.free(sub_path);
try repository.checkoutTree(subdir, entry.oid, sub_path, diagnostics);
},
.file => {
var file = try dir.createFile(entry.name, .{});
defer file.close();
try repository.odb.seekOid(entry.oid);
var file_object = try repository.odb.readObject();
if (file_object.type != .blob) return error.InvalidFile;
try file.writeAll(file_object.data);
try file.sync();
},
.symlink => {
try repository.odb.seekOid(entry.oid);
var symlink_object = try repository.odb.readObject();
if (symlink_object.type != .blob) return error.InvalidFile;
const link_name = symlink_object.data;
dir.symLink(link_name, entry.name, .{}) catch |e| {
const file_name = try std.fs.path.join(diagnostics.allocator, &.{ current_path, entry.name });
errdefer diagnostics.allocator.free(file_name);
const link_name_dup = try diagnostics.allocator.dupe(u8, link_name);
errdefer diagnostics.allocator.free(link_name_dup);
try diagnostics.errors.append(diagnostics.allocator, .{ .unable_to_create_sym_link = .{
.code = e,
.file_name = file_name,
.link_name = link_name_dup,
} });
};
},
.gitlink => {
// Consistent with git archive behavior, create the directory but
// do nothing else
try dir.makeDir(entry.name);
},
}
}
}
/// Returns the ID of the tree associated with the given commit (provided as
/// raw object data).
fn getCommitTree(commit_data: []const u8) !Oid {
if (!mem.startsWith(u8, commit_data, "tree ") or
commit_data.len < "tree ".len + fmt_oid_length + "\n".len or
commit_data["tree ".len + fmt_oid_length] != '\n')
{
return error.InvalidCommit;
}
return try parseOid(commit_data["tree ".len..][0..fmt_oid_length]);
}
const TreeIterator = struct {
data: []const u8,
pos: usize = 0,
const Entry = struct {
type: Type,
executable: bool,
name: [:0]const u8,
oid: Oid,
const Type = enum(u4) {
directory = 0o4,
file = 0o10,
symlink = 0o12,
gitlink = 0o16,
};
};
fn next(iterator: *TreeIterator) !?Entry {
if (iterator.pos == iterator.data.len) return null;
const mode_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, ' ') orelse return error.InvalidTree;
const mode: packed struct {
permission: u9,
unused: u3,
type: u4,
} = @bitCast(std.fmt.parseUnsigned(u16, iterator.data[iterator.pos..mode_end], 8) catch return error.InvalidTree);
const @"type" = std.meta.intToEnum(Entry.Type, mode.type) catch return error.InvalidTree;
const executable = switch (mode.permission) {
0 => if (@"type" == .file) return error.InvalidTree else false,
0o644 => if (@"type" != .file) return error.InvalidTree else false,
0o755 => if (@"type" != .file) return error.InvalidTree else true,
else => return error.InvalidTree,
};
iterator.pos = mode_end + 1;
const name_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, 0) orelse return error.InvalidTree;
const name = iterator.data[iterator.pos..name_end :0];
iterator.pos = name_end + 1;
if (iterator.pos + oid_length > iterator.data.len) return error.InvalidTree;
const oid = iterator.data[iterator.pos..][0..oid_length].*;
iterator.pos += oid_length;
return .{ .type = @"type", .executable = executable, .name = name, .oid = oid };
}
};
};
/// A Git object database backed by a packfile. A packfile index is also used
/// for efficient access to objects in the packfile.
///
/// The format of the packfile and its associated index are documented in
/// [pack-format](https://git-scm.com/docs/pack-format).
const Odb = struct {
pack_file: std.fs.File,
index_header: IndexHeader,
index_file: std.fs.File,
cache: ObjectCache = .{},
allocator: Allocator,
/// Initializes the database from open pack and index files.
fn init(allocator: Allocator, pack_file: std.fs.File, index_file: std.fs.File) !Odb {
try pack_file.seekTo(0);
try index_file.seekTo(0);
const index_header = try IndexHeader.read(index_file.reader());
return .{
.pack_file = pack_file,
.index_header = index_header,
.index_file = index_file,
.allocator = allocator,
};
}
fn deinit(odb: *Odb) void {
odb.cache.deinit(odb.allocator);
odb.* = undefined;
}
/// Reads the object at the current position in the database.
fn readObject(odb: *Odb) !Object {
var base_offset = try odb.pack_file.getPos();
var base_header: EntryHeader = undefined;
var delta_offsets = std.ArrayListUnmanaged(u64){};
defer delta_offsets.deinit(odb.allocator);
const base_object = while (true) {
if (odb.cache.get(base_offset)) |base_object| break base_object;
base_header = try EntryHeader.read(odb.pack_file.reader());
switch (base_header) {
.ofs_delta => |ofs_delta| {
try delta_offsets.append(odb.allocator, base_offset);
base_offset = std.math.sub(u64, base_offset, ofs_delta.offset) catch return error.InvalidFormat;
try odb.pack_file.seekTo(base_offset);
},
.ref_delta => |ref_delta| {
try delta_offsets.append(odb.allocator, base_offset);
try odb.seekOid(ref_delta.base_object);
base_offset = try odb.pack_file.getPos();
},
else => {
const base_data = try readObjectRaw(odb.allocator, odb.pack_file.reader(), base_header.uncompressedLength());
errdefer odb.allocator.free(base_data);
const base_object: Object = .{ .type = base_header.objectType(), .data = base_data };
try odb.cache.put(odb.allocator, base_offset, base_object);
break base_object;
},
}
};
const base_data = try resolveDeltaChain(
odb.allocator,
odb.pack_file,
base_object,
delta_offsets.items,
&odb.cache,
);
return .{ .type = base_object.type, .data = base_data };
}
/// Seeks to the beginning of the object with the given ID.
fn seekOid(odb: *Odb, oid: Oid) !void {
const key = oid[0];
var start_index = if (key > 0) odb.index_header.fan_out_table[key - 1] else 0;
var end_index = odb.index_header.fan_out_table[key];
const found_index = while (start_index < end_index) {
const mid_index = start_index + (end_index - start_index) / 2;
try odb.index_file.seekTo(IndexHeader.size + mid_index * oid_length);
const mid_oid = try odb.index_file.reader().readBytesNoEof(oid_length);
switch (mem.order(u8, &mid_oid, &oid)) {
.lt => start_index = mid_index + 1,
.gt => end_index = mid_index,
.eq => break mid_index,
}
} else return error.ObjectNotFound;
const n_objects = odb.index_header.fan_out_table[255];
const offset_values_start = IndexHeader.size + n_objects * (oid_length + 4);
try odb.index_file.seekTo(offset_values_start + found_index * 4);
const l1_offset: packed struct { value: u31, big: bool } = @bitCast(try odb.index_file.reader().readIntBig(u32));
const pack_offset = pack_offset: {
if (l1_offset.big) {
const l2_offset_values_start = offset_values_start + n_objects * 4;
try odb.index_file.seekTo(l2_offset_values_start + l1_offset.value * 4);
break :pack_offset try odb.index_file.reader().readIntBig(u64);
} else {
break :pack_offset l1_offset.value;
}
};
try odb.pack_file.seekTo(pack_offset);
}
};
const Object = struct {
type: Type,
data: []const u8,
const Type = enum {
commit,
tree,
blob,
tag,
};
};
/// A cache for object data.
///
/// The purpose of this cache is to speed up resolution of deltas by caching the
/// results of resolving delta objects, while maintaining a maximum cache size
/// to avoid excessive memory usage. If the total size of the objects in the
/// cache exceeds the maximum, the cache will begin evicting the least recently
/// used objects: when resolving delta chains, the most recently used objects
/// will likely be more helpful as they will be further along in the chain
/// (skipping earlier reconstruction steps).
///
/// Object data stored in the cache is managed by the cache. It should not be
/// freed by the caller at any point after inserting it into the cache. Any
/// objects remaining in the cache will be freed when the cache itself is freed.
const ObjectCache = struct {
objects: std.AutoHashMapUnmanaged(u64, CacheEntry) = .{},
lru_nodes: LruList = .{},
byte_size: usize = 0,
const max_byte_size = 128 * 1024 * 1024; // 128MiB
/// A list of offsets stored in the cache, with the most recently used
/// entries at the end.
const LruList = std.DoublyLinkedList(u64);
const CacheEntry = struct { object: Object, lru_node: *LruList.Node };
fn deinit(cache: *ObjectCache, allocator: Allocator) void {
var object_iterator = cache.objects.iterator();
while (object_iterator.next()) |object| {
allocator.free(object.value_ptr.object.data);
allocator.destroy(object.value_ptr.lru_node);
}
cache.objects.deinit(allocator);
cache.* = undefined;
}
/// Gets an object from the cache, moving it to the most recently used
/// position if it is present.
fn get(cache: *ObjectCache, offset: u64) ?Object {
if (cache.objects.get(offset)) |entry| {
cache.lru_nodes.remove(entry.lru_node);
cache.lru_nodes.append(entry.lru_node);
return entry.object;
} else {
return null;
}
}
/// Puts an object in the cache, possibly evicting older entries if the
/// cache exceeds its maximum size. Note that, although old objects may
/// be evicted, the object just added to the cache with this function
/// will not be evicted before the next call to `put` or `deinit` even if
/// it exceeds the maximum cache size.
fn put(cache: *ObjectCache, allocator: Allocator, offset: u64, object: Object) !void {
const lru_node = try allocator.create(LruList.Node);
errdefer allocator.destroy(lru_node);
lru_node.data = offset;
const gop = try cache.objects.getOrPut(allocator, offset);
if (gop.found_existing) {
cache.byte_size -= gop.value_ptr.object.data.len;
cache.lru_nodes.remove(gop.value_ptr.lru_node);
allocator.destroy(gop.value_ptr.lru_node);
allocator.free(gop.value_ptr.object.data);
}
gop.value_ptr.* = .{ .object = object, .lru_node = lru_node };
cache.byte_size += object.data.len;
cache.lru_nodes.append(lru_node);
while (cache.byte_size > max_byte_size and cache.lru_nodes.len > 1) {
// The > 1 check is to make sure that we don't evict the most
// recently added node, even if it by itself happens to exceed the
// maximum size of the cache.
const evict_node = cache.lru_nodes.popFirst().?;
const evict_offset = evict_node.data;
allocator.destroy(evict_node);
const evict_object = cache.objects.get(evict_offset).?.object;
cache.byte_size -= evict_object.data.len;
allocator.free(evict_object.data);
_ = cache.objects.remove(evict_offset);
}
}
};
/// A single pkt-line in the Git protocol.
///
/// The format of a pkt-line is documented in
/// [protocol-common](https://git-scm.com/docs/protocol-common). The special
/// meanings of the delimiter and response-end packets are documented in
/// [protocol-v2](https://git-scm.com/docs/protocol-v2).
const Packet = union(enum) {
flush,
delimiter,
response_end,
data: []const u8,
const max_data_length = 65516;
/// Reads a packet in pkt-line format.
fn read(reader: anytype, buf: *[max_data_length]u8) !Packet {
const length = std.fmt.parseUnsigned(u16, &try reader.readBytesNoEof(4), 16) catch return error.InvalidPacket;
switch (length) {
0 => return .flush,
1 => return .delimiter,
2 => return .response_end,
3 => return error.InvalidPacket,
else => if (length - 4 > max_data_length) return error.InvalidPacket,
}
const data = buf[0 .. length - 4];
try reader.readNoEof(data);
return .{ .data = data };
}
/// Writes a packet in pkt-line format.
fn write(packet: Packet, writer: anytype) !void {
switch (packet) {
.flush => try writer.writeAll("0000"),
.delimiter => try writer.writeAll("0001"),
.response_end => try writer.writeAll("0002"),
.data => |data| {
assert(data.len <= max_data_length);
try writer.print("{x:0>4}", .{data.len + 4});
try writer.writeAll(data);
},
}
}
};
/// A client session for the Git protocol, currently limited to an HTTP(S)
/// transport. Only protocol version 2 is supported, as documented in
/// [protocol-v2](https://git-scm.com/docs/protocol-v2).
pub const Session = struct {
transport: *std.http.Client,
uri: std.Uri,
supports_agent: bool = false,
supports_shallow: bool = false,
const agent = "zig/" ++ @import("builtin").zig_version_string;
const agent_capability = std.fmt.comptimePrint("agent={s}\n", .{agent});
/// Discovers server capabilities. This should be called before using any
/// other client functionality, or the client will be forced to default to
/// the bare minimum server requirements, which may be considerably less
/// efficient (e.g. no shallow fetches).
///
/// See the note on `getCapabilities` regarding `redirect_uri`.
pub fn discoverCapabilities(
session: *Session,
allocator: Allocator,
redirect_uri: *[]u8,
) !void {
var capability_iterator = try session.getCapabilities(allocator, redirect_uri);
defer capability_iterator.deinit();
while (try capability_iterator.next()) |capability| {
if (mem.eql(u8, capability.key, "agent")) {
session.supports_agent = true;
} else if (mem.eql(u8, capability.key, "fetch")) {
var feature_iterator = mem.splitScalar(u8, capability.value orelse continue, ' ');
while (feature_iterator.next()) |feature| {
if (mem.eql(u8, feature, "shallow")) {
session.supports_shallow = true;
}
}
}
}
}
/// Returns an iterator over capabilities supported by the server.
///
/// If the server redirects the request, `error.Redirected` is returned and
/// `redirect_uri` is populated with the URI resulting from the redirects.
/// When this occurs, the value of `redirect_uri` must be freed with
/// `allocator` when the caller is done with it.
fn getCapabilities(
session: Session,
allocator: Allocator,
redirect_uri: *[]u8,
) !CapabilityIterator {
var info_refs_uri = session.uri;
info_refs_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "info/refs" });
defer allocator.free(info_refs_uri.path);
info_refs_uri.query = "service=git-upload-pack";
info_refs_uri.fragment = null;
var headers = std.http.Headers.init(allocator);
defer headers.deinit();
try headers.append("Git-Protocol", "version=2");
var request = try session.transport.request(.GET, info_refs_uri, headers, .{
.max_redirects = 3,
});
errdefer request.deinit();
try request.start(.{});
try request.finish();
try request.wait();
if (request.response.status != .ok) return error.ProtocolError;
if (request.redirects_left < 3) {
if (!mem.endsWith(u8, request.uri.path, "/info/refs")) return error.UnparseableRedirect;
var new_uri = request.uri;
new_uri.path = new_uri.path[0 .. new_uri.path.len - "/info/refs".len];
new_uri.query = null;
redirect_uri.* = try std.fmt.allocPrint(allocator, "{+/}", .{new_uri});
return error.Redirected;
}
const reader = request.reader();
var buf: [Packet.max_data_length]u8 = undefined;
var state: enum { response_start, response_content } = .response_start;
while (true) {
// Some Git servers (at least GitHub) include an additional
// '# service=git-upload-pack' informative response before sending
// the expected 'version 2' packet and capability information.
// This is not universal: SourceHut, for example, does not do this.
// Thus, we need to skip any such useless additional responses
// before we get the one we're actually looking for. The responses
// will be delimited by flush packets.
const packet = Packet.read(reader, &buf) catch |e| switch (e) {
error.EndOfStream => return error.UnsupportedProtocol, // 'version 2' packet not found
else => |other| return other,
};
switch (packet) {
.flush => state = .response_start,
.data => |data| switch (state) {
.response_start => if (mem.eql(u8, data, "version 2\n")) {
return .{ .request = request };
} else {
state = .response_content;
},
else => {},
},
else => return error.UnexpectedPacket,
}
}
}
const CapabilityIterator = struct {
request: std.http.Client.Request,
buf: [Packet.max_data_length]u8 = undefined,
const Capability = struct {
key: []const u8,
value: ?[]const u8 = null,
};
fn deinit(iterator: *CapabilityIterator) void {
iterator.request.deinit();
iterator.* = undefined;
}
fn next(iterator: *CapabilityIterator) !?Capability {
switch (try Packet.read(iterator.request.reader(), &iterator.buf)) {
.flush => return null,
.data => |data| if (data.len > 0 and data[data.len - 1] == '\n') {
if (mem.indexOfScalar(u8, data, '=')) |separator_pos| {
return .{ .key = data[0..separator_pos], .value = data[separator_pos + 1 .. data.len - 1] };
} else {
return .{ .key = data[0 .. data.len - 1] };
}
} else return error.UnexpectedPacket,
else => return error.UnexpectedPacket,
}
}
};
const ListRefsOptions = struct {
/// The ref prefixes (if any) to use to filter the refs available on the
/// server. Note that the client must still check the returned refs
/// against its desired filters itself: the server is not required to
/// respect these prefix filters and may return other refs as well.
ref_prefixes: []const []const u8 = &.{},
/// Whether to include symref targets for returned symbolic refs.
include_symrefs: bool = false,
/// Whether to include the peeled object ID for returned tag refs.
include_peeled: bool = false,
};
/// Returns an iterator over refs known to the server.
pub fn listRefs(session: Session, allocator: Allocator, options: ListRefsOptions) !RefIterator {
var upload_pack_uri = session.uri;
upload_pack_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "git-upload-pack" });
defer allocator.free(upload_pack_uri.path);
upload_pack_uri.query = null;
upload_pack_uri.fragment = null;
var headers = std.http.Headers.init(allocator);
defer headers.deinit();
try headers.append("Content-Type", "application/x-git-upload-pack-request");
try headers.append("Git-Protocol", "version=2");
var body = std.ArrayListUnmanaged(u8){};
defer body.deinit(allocator);
const body_writer = body.writer(allocator);
try Packet.write(.{ .data = "command=ls-refs\n" }, body_writer);
if (session.supports_agent) {
try Packet.write(.{ .data = agent_capability }, body_writer);
}
try Packet.write(.delimiter, body_writer);
for (options.ref_prefixes) |ref_prefix| {
const ref_prefix_packet = try std.fmt.allocPrint(allocator, "ref-prefix {s}\n", .{ref_prefix});
defer allocator.free(ref_prefix_packet);
try Packet.write(.{ .data = ref_prefix_packet }, body_writer);
}
if (options.include_symrefs) {
try Packet.write(.{ .data = "symrefs\n" }, body_writer);
}
if (options.include_peeled) {
try Packet.write(.{ .data = "peel\n" }, body_writer);
}
try Packet.write(.flush, body_writer);
var request = try session.transport.request(.POST, upload_pack_uri, headers, .{
.handle_redirects = false,
});
errdefer request.deinit();
request.transfer_encoding = .{ .content_length = body.items.len };
try request.start(.{});
try request.writeAll(body.items);
try request.finish();
try request.wait();
if (request.response.status != .ok) return error.ProtocolError;
return .{ .request = request };
}
pub const RefIterator = struct {
request: std.http.Client.Request,
buf: [Packet.max_data_length]u8 = undefined,
pub const Ref = struct {
oid: Oid,
name: []const u8,
symref_target: ?[]const u8,
peeled: ?Oid,
};
pub fn deinit(iterator: *RefIterator) void {
iterator.request.deinit();
iterator.* = undefined;
}
pub fn next(iterator: *RefIterator) !?Ref {
switch (try Packet.read(iterator.request.reader(), &iterator.buf)) {
.flush => return null,
.data => |data| {
const oid_sep_pos = mem.indexOfScalar(u8, data, ' ') orelse return error.InvalidRefPacket;
const oid = parseOid(data[0..oid_sep_pos]) catch return error.InvalidRefPacket;
const name_sep_pos = mem.indexOfAnyPos(u8, data, oid_sep_pos + 1, " \n") orelse return error.InvalidRefPacket;
const name = data[oid_sep_pos + 1 .. name_sep_pos];
var symref_target: ?[]const u8 = null;
var peeled: ?Oid = null;
var last_sep_pos = name_sep_pos;
while (data[last_sep_pos] == ' ') {
const next_sep_pos = mem.indexOfAnyPos(u8, data, last_sep_pos + 1, " \n") orelse return error.InvalidRefPacket;
const attribute = data[last_sep_pos + 1 .. next_sep_pos];
if (mem.startsWith(u8, attribute, "symref-target:")) {
symref_target = attribute["symref-target:".len..];
} else if (mem.startsWith(u8, attribute, "peeled:")) {
peeled = parseOid(attribute["peeled:".len..]) catch return error.InvalidRefPacket;
}
last_sep_pos = next_sep_pos;
}
return .{ .oid = oid, .name = name, .symref_target = symref_target, .peeled = peeled };
},
else => return error.UnexpectedPacket,
}
}
};
/// Fetches the given refs from the server. A shallow fetch (depth 1) is
/// performed if the server supports it.
pub fn fetch(session: Session, allocator: Allocator, wants: []const []const u8) !FetchStream {
var upload_pack_uri = session.uri;
upload_pack_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "git-upload-pack" });
defer allocator.free(upload_pack_uri.path);
upload_pack_uri.query = null;
upload_pack_uri.fragment = null;
var headers = std.http.Headers.init(allocator);
defer headers.deinit();
try headers.append("Content-Type", "application/x-git-upload-pack-request");
try headers.append("Git-Protocol", "version=2");
var body = std.ArrayListUnmanaged(u8){};
defer body.deinit(allocator);
const body_writer = body.writer(allocator);
try Packet.write(.{ .data = "command=fetch\n" }, body_writer);
if (session.supports_agent) {
try Packet.write(.{ .data = agent_capability }, body_writer);
}
try Packet.write(.delimiter, body_writer);
// Our packfile parser supports the OFS_DELTA object type
try Packet.write(.{ .data = "ofs-delta\n" }, body_writer);
// We do not currently convey server progress information to the user
try Packet.write(.{ .data = "no-progress\n" }, body_writer);
if (session.supports_shallow) {
try Packet.write(.{ .data = "deepen 1\n" }, body_writer);
}
for (wants) |want| {
var buf: [Packet.max_data_length]u8 = undefined;
const arg = std.fmt.bufPrint(&buf, "want {s}\n", .{want}) catch unreachable;
try Packet.write(.{ .data = arg }, body_writer);
}
try Packet.write(.{ .data = "done\n" }, body_writer);
try Packet.write(.flush, body_writer);
var request = try session.transport.request(.POST, upload_pack_uri, headers, .{
.handle_redirects = false,
});
errdefer request.deinit();
request.transfer_encoding = .{ .content_length = body.items.len };
try request.start(.{});
try request.writeAll(body.items);
try request.finish();
try request.wait();
if (request.response.status != .ok) return error.ProtocolError;
const reader = request.reader();
// We are not interested in any of the sections of the returned fetch
// data other than the packfile section, since we aren't doing anything
// complex like ref negotiation (this is a fresh clone).
var state: enum { section_start, section_content } = .section_start;
while (true) {
var buf: [Packet.max_data_length]u8 = undefined;
const packet = try Packet.read(reader, &buf);
switch (state) {
.section_start => switch (packet) {
.data => |data| if (mem.eql(u8, data, "packfile\n")) {
return .{ .request = request };
} else {
state = .section_content;
},
else => return error.UnexpectedPacket,
},
.section_content => switch (packet) {
.delimiter => state = .section_start,
.data => {},
else => return error.UnexpectedPacket,
},
}
}
}
pub const FetchStream = struct {
request: std.http.Client.Request,
buf: [Packet.max_data_length]u8 = undefined,
pos: usize = 0,
len: usize = 0,
pub fn deinit(stream: *FetchStream) void {
stream.request.deinit();
}
pub const ReadError = std.http.Client.Request.ReadError || error{
InvalidPacket,
ProtocolError,
UnexpectedPacket,
};
pub const Reader = std.io.Reader(*FetchStream, ReadError, read);
const StreamCode = enum(u8) {
pack_data = 1,
progress = 2,
fatal_error = 3,
_,
};
pub fn reader(stream: *FetchStream) Reader {
return .{ .context = stream };
}
pub fn read(stream: *FetchStream, buf: []u8) !usize {
if (stream.pos == stream.len) {
while (true) {
switch (try Packet.read(stream.request.reader(), &stream.buf)) {
.flush => return 0,
.data => |data| if (data.len > 1) switch (@as(StreamCode, @enumFromInt(data[0]))) {
.pack_data => {
stream.pos = 1;
stream.len = data.len;
break;
},
.fatal_error => return error.ProtocolError,
else => {},
},
else => return error.UnexpectedPacket,
}
}
}
const size = @min(buf.len, stream.len - stream.pos);
@memcpy(buf[0..size], stream.buf[stream.pos .. stream.pos + size]);
stream.pos += size;
return size;
}
};
};
const PackHeader = struct {
total_objects: u32,
const signature = "PACK";
const supported_version = 2;
fn read(reader: anytype) !PackHeader {
const actual_signature = reader.readBytesNoEof(4) catch |e| switch (e) {
error.EndOfStream => return error.InvalidHeader,
else => |other| return other,
};
if (!mem.eql(u8, &actual_signature, signature)) return error.InvalidHeader;
const version = reader.readIntBig(u32) catch |e| switch (e) {
error.EndOfStream => return error.InvalidHeader,
else => |other| return other,
};
if (version != supported_version) return error.UnsupportedVersion;
const total_objects = reader.readIntBig(u32) catch |e| switch (e) {
error.EndOfStream => return error.InvalidHeader,
else => |other| return other,
};
return .{ .total_objects = total_objects };
}
};
const EntryHeader = union(Type) {
commit: Undeltified,
tree: Undeltified,
blob: Undeltified,
tag: Undeltified,
ofs_delta: OfsDelta,
ref_delta: RefDelta,
const Type = enum(u3) {
commit = 1,
tree = 2,
blob = 3,
tag = 4,
ofs_delta = 6,
ref_delta = 7,
};
const Undeltified = struct {
uncompressed_length: u64,
};
const OfsDelta = struct {
offset: u64,
uncompressed_length: u64,
};
const RefDelta = struct {
base_object: Oid,
uncompressed_length: u64,
};
fn objectType(header: EntryHeader) Object.Type {
return switch (header) {
inline .commit, .tree, .blob, .tag => |_, tag| @field(Object.Type, @tagName(tag)),
else => unreachable,
};
}
fn uncompressedLength(header: EntryHeader) u64 {
return switch (header) {
inline else => |entry| entry.uncompressed_length,
};
}
fn read(reader: anytype) !EntryHeader {
const InitialByte = packed struct { len: u4, type: u3, has_next: bool };
const initial: InitialByte = @bitCast(reader.readByte() catch |e| switch (e) {
error.EndOfStream => return error.InvalidFormat,
else => |other| return other,
});
const rest_len = if (initial.has_next) try readSizeVarInt(reader) else 0;
var uncompressed_length: u64 = initial.len;
uncompressed_length |= std.math.shlExact(u64, rest_len, 4) catch return error.InvalidFormat;
const @"type" = std.meta.intToEnum(EntryHeader.Type, initial.type) catch return error.InvalidFormat;
return switch (@"type") {
inline .commit, .tree, .blob, .tag => |tag| @unionInit(EntryHeader, @tagName(tag), .{
.uncompressed_length = uncompressed_length,
}),
.ofs_delta => .{ .ofs_delta = .{
.offset = try readOffsetVarInt(reader),
.uncompressed_length = uncompressed_length,
} },
.ref_delta => .{ .ref_delta = .{
.base_object = reader.readBytesNoEof(oid_length) catch |e| switch (e) {
error.EndOfStream => return error.InvalidFormat,
else => |other| return other,
},
.uncompressed_length = uncompressed_length,
} },
};
}
};
fn readSizeVarInt(r: anytype) !u64 {
const Byte = packed struct { value: u7, has_next: bool };
var b: Byte = @bitCast(try r.readByte());
var value: u64 = b.value;
var shift: u6 = 0;
while (b.has_next) {
b = @bitCast(try r.readByte());
shift = std.math.add(u6, shift, 7) catch return error.InvalidFormat;
value |= @as(u64, b.value) << shift;
}
return value;
}
fn readOffsetVarInt(r: anytype) !u64 {
const Byte = packed struct { value: u7, has_next: bool };
var b: Byte = @bitCast(try r.readByte());
var value: u64 = b.value;
while (b.has_next) {
b = @bitCast(try r.readByte());
value = std.math.shlExact(u64, value + 1, 7) catch return error.InvalidFormat;
value |= b.value;
}
return value;
}
const IndexHeader = struct {
fan_out_table: [256]u32,
const signature = "\xFFtOc";
const supported_version = 2;
const size = 4 + 4 + @sizeOf([256]u32);
fn read(reader: anytype) !IndexHeader {
var header_bytes = try reader.readBytesNoEof(size);
if (!mem.eql(u8, header_bytes[0..4], signature)) return error.InvalidHeader;
const version = mem.readIntBig(u32, header_bytes[4..8]);
if (version != supported_version) return error.UnsupportedVersion;
var fan_out_table: [256]u32 = undefined;
var fan_out_table_stream = std.io.fixedBufferStream(header_bytes[8..]);
const fan_out_table_reader = fan_out_table_stream.reader();
for (&fan_out_table) |*entry| {
entry.* = fan_out_table_reader.readIntBig(u32) catch unreachable;
}
return .{ .fan_out_table = fan_out_table };
}
};
const IndexEntry = struct {
offset: u64,
crc32: u32,
};
/// Writes out a version 2 index for the given packfile, as documented in
/// [pack-format](https://git-scm.com/docs/pack-format).
pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) !void {
try pack.seekTo(0);
var index_entries = std.AutoHashMapUnmanaged(Oid, IndexEntry){};
defer index_entries.deinit(allocator);
var pending_deltas = std.ArrayListUnmanaged(IndexEntry){};
defer pending_deltas.deinit(allocator);
const pack_checksum = try indexPackFirstPass(allocator, pack, &index_entries, &pending_deltas);
var cache: ObjectCache = .{};
defer cache.deinit(allocator);
var remaining_deltas = pending_deltas.items.len;
while (remaining_deltas > 0) {
var i: usize = remaining_deltas;
while (i > 0) {
i -= 1;
const delta = pending_deltas.items[i];
if (try indexPackHashDelta(allocator, pack, delta, index_entries, &cache)) |oid| {
try index_entries.put(allocator, oid, delta);
_ = pending_deltas.swapRemove(i);
}
}
if (pending_deltas.items.len == remaining_deltas) return error.IncompletePack;
remaining_deltas = pending_deltas.items.len;
}
var oids = std.ArrayListUnmanaged(Oid){};
defer oids.deinit(allocator);
try oids.ensureTotalCapacityPrecise(allocator, index_entries.count());
var index_entries_iter = index_entries.iterator();
while (index_entries_iter.next()) |entry| {
oids.appendAssumeCapacity(entry.key_ptr.*);
}
mem.sortUnstable(Oid, oids.items, {}, struct {
fn lessThan(_: void, o1: Oid, o2: Oid) bool {
return mem.lessThan(u8, &o1, &o2);
}
}.lessThan);
var fan_out_table: [256]u32 = undefined;
var count: u32 = 0;
var fan_out_index: u8 = 0;
for (oids.items) |oid| {
if (oid[0] > fan_out_index) {
@memset(fan_out_table[fan_out_index..oid[0]], count);
fan_out_index = oid[0];
}
count += 1;
}
@memset(fan_out_table[fan_out_index..], count);
var index_hashed_writer = hashedWriter(index_writer, Sha1.init(.{}));
const writer = index_hashed_writer.writer();
try writer.writeAll(IndexHeader.signature);
try writer.writeIntBig(u32, IndexHeader.supported_version);
for (fan_out_table) |fan_out_entry| {
try writer.writeIntBig(u32, fan_out_entry);
}
for (oids.items) |oid| {
try writer.writeAll(&oid);
}
for (oids.items) |oid| {
try writer.writeIntBig(u32, index_entries.get(oid).?.crc32);
}
var big_offsets = std.ArrayListUnmanaged(u64){};
defer big_offsets.deinit(allocator);
for (oids.items) |oid| {
const offset = index_entries.get(oid).?.offset;
if (offset <= std.math.maxInt(u31)) {
try writer.writeIntBig(u32, @intCast(offset));
} else {
const index = big_offsets.items.len;
try big_offsets.append(allocator, offset);
try writer.writeIntBig(u32, @as(u32, @intCast(index)) | (1 << 31));
}
}
for (big_offsets.items) |offset| {
try writer.writeIntBig(u64, offset);
}
try writer.writeAll(&pack_checksum);
const index_checksum = index_hashed_writer.hasher.finalResult();
try index_writer.writeAll(&index_checksum);
}
/// Performs the first pass over the packfile data for index construction.
/// This will index all non-delta objects, queue delta objects for further
/// processing, and return the pack checksum (which is part of the index
/// format).
fn indexPackFirstPass(
allocator: Allocator,
pack: std.fs.File,
index_entries: *std.AutoHashMapUnmanaged(Oid, IndexEntry),
pending_deltas: *std.ArrayListUnmanaged(IndexEntry),
) ![Sha1.digest_length]u8 {
var pack_buffered_reader = std.io.bufferedReader(pack.reader());
var pack_counting_reader = std.io.countingReader(pack_buffered_reader.reader());
var pack_hashed_reader = std.compress.hashedReader(pack_counting_reader.reader(), Sha1.init(.{}));
const pack_reader = pack_hashed_reader.reader();
const pack_header = try PackHeader.read(pack_reader);
var current_entry: u32 = 0;
while (current_entry < pack_header.total_objects) : (current_entry += 1) {
const entry_offset = pack_counting_reader.bytes_read;
var entry_crc32_reader = std.compress.hashedReader(pack_reader, std.hash.Crc32.init());
const entry_header = try EntryHeader.read(entry_crc32_reader.reader());
switch (entry_header) {
inline .commit, .tree, .blob, .tag => |object, tag| {
var entry_decompress_stream = try std.compress.zlib.decompressStream(allocator, entry_crc32_reader.reader());
defer entry_decompress_stream.deinit();
var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader());
var entry_hashed_writer = hashedWriter(std.io.null_writer, Sha1.init(.{}));
const entry_writer = entry_hashed_writer.writer();
// The object header is not included in the pack data but is
// part of the object's ID
try entry_writer.print("{s} {}\x00", .{ @tagName(tag), object.uncompressed_length });
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
try fifo.pump(entry_counting_reader.reader(), entry_writer);
if (entry_counting_reader.bytes_read != object.uncompressed_length) {
return error.InvalidObject;
}
const oid = entry_hashed_writer.hasher.finalResult();
try index_entries.put(allocator, oid, .{
.offset = entry_offset,
.crc32 = entry_crc32_reader.hasher.final(),
});
},
inline .ofs_delta, .ref_delta => |delta| {
var entry_decompress_stream = try std.compress.zlib.decompressStream(allocator, entry_crc32_reader.reader());
defer entry_decompress_stream.deinit();
var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader());
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
try fifo.pump(entry_counting_reader.reader(), std.io.null_writer);
if (entry_counting_reader.bytes_read != delta.uncompressed_length) {
return error.InvalidObject;
}
try pending_deltas.append(allocator, .{
.offset = entry_offset,
.crc32 = entry_crc32_reader.hasher.final(),
});
},
}
}
const pack_checksum = pack_hashed_reader.hasher.finalResult();
const recorded_checksum = try pack_buffered_reader.reader().readBytesNoEof(Sha1.digest_length);
if (!mem.eql(u8, &pack_checksum, &recorded_checksum)) {
return error.CorruptedPack;
}
_ = pack_buffered_reader.reader().readByte() catch |e| switch (e) {
error.EndOfStream => return pack_checksum,
else => |other| return other,
};
return error.InvalidFormat;
}
/// Attempts to determine the final object ID of the given deltified object.
/// May return null if this is not yet possible (if the delta is a ref-based
/// delta and we do not yet know the offset of the base object).
fn indexPackHashDelta(
allocator: Allocator,
pack: std.fs.File,
delta: IndexEntry,
index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry),
cache: *ObjectCache,
) !?Oid {
// Figure out the chain of deltas to resolve
var base_offset = delta.offset;
var base_header: EntryHeader = undefined;
var delta_offsets = std.ArrayListUnmanaged(u64){};
defer delta_offsets.deinit(allocator);
const base_object = while (true) {
if (cache.get(base_offset)) |base_object| break base_object;
try pack.seekTo(base_offset);
base_header = try EntryHeader.read(pack.reader());
switch (base_header) {
.ofs_delta => |ofs_delta| {
try delta_offsets.append(allocator, base_offset);
base_offset = std.math.sub(u64, base_offset, ofs_delta.offset) catch return error.InvalidObject;
},
.ref_delta => |ref_delta| {
try delta_offsets.append(allocator, base_offset);
base_offset = (index_entries.get(ref_delta.base_object) orelse return null).offset;
},
else => {
const base_data = try readObjectRaw(allocator, pack.reader(), base_header.uncompressedLength());
errdefer allocator.free(base_data);
const base_object: Object = .{ .type = base_header.objectType(), .data = base_data };
try cache.put(allocator, base_offset, base_object);
break base_object;
},
}
};
const base_data = try resolveDeltaChain(allocator, pack, base_object, delta_offsets.items, cache);
var entry_hasher = Sha1.init(.{});
var entry_hashed_writer = hashedWriter(std.io.null_writer, &entry_hasher);
try entry_hashed_writer.writer().print("{s} {}\x00", .{ @tagName(base_object.type), base_data.len });
entry_hasher.update(base_data);
return entry_hasher.finalResult();
}
/// Resolves a chain of deltas, returning the final base object data. `pack` is
/// assumed to be looking at the start of the object data for the base object of
/// the chain, and will then apply the deltas in `delta_offsets` in reverse order
/// to obtain the final object.
fn resolveDeltaChain(
allocator: Allocator,
pack: std.fs.File,
base_object: Object,
delta_offsets: []const u64,
cache: *ObjectCache,
) ![]const u8 {
var base_data = base_object.data;
var i: usize = delta_offsets.len;
while (i > 0) {
i -= 1;
const delta_offset = delta_offsets[i];
try pack.seekTo(delta_offset);
const delta_header = try EntryHeader.read(pack.reader());
var delta_data = try readObjectRaw(allocator, pack.reader(), delta_header.uncompressedLength());
defer allocator.free(delta_data);
var delta_stream = std.io.fixedBufferStream(delta_data);
const delta_reader = delta_stream.reader();
_ = try readSizeVarInt(delta_reader); // base object size
const expanded_size = try readSizeVarInt(delta_reader);
const expanded_alloc_size = std.math.cast(usize, expanded_size) orelse return error.ObjectTooLarge;
var expanded_data = try allocator.alloc(u8, expanded_alloc_size);
errdefer allocator.free(expanded_data);
var expanded_delta_stream = std.io.fixedBufferStream(expanded_data);
var base_stream = std.io.fixedBufferStream(base_data);
try expandDelta(&base_stream, delta_reader, expanded_delta_stream.writer());
if (expanded_delta_stream.pos != expanded_size) return error.InvalidObject;
try cache.put(allocator, delta_offset, .{ .type = base_object.type, .data = expanded_data });
base_data = expanded_data;
}
return base_data;
}
/// Reads the complete contents of an object from `reader`. This function may
/// read more bytes than required from `reader`, so the reader position after
/// returning is not reliable.
fn readObjectRaw(allocator: Allocator, reader: anytype, size: u64) ![]u8 {
const alloc_size = std.math.cast(usize, size) orelse return error.ObjectTooLarge;
var buffered_reader = std.io.bufferedReader(reader);
var decompress_stream = try std.compress.zlib.decompressStream(allocator, buffered_reader.reader());
defer decompress_stream.deinit();
var data = try allocator.alloc(u8, alloc_size);
errdefer allocator.free(data);
try decompress_stream.reader().readNoEof(data);
_ = decompress_stream.reader().readByte() catch |e| switch (e) {
error.EndOfStream => return data,
else => |other| return other,
};
return error.InvalidFormat;
}
/// Expands delta data from `delta_reader` to `writer`. `base_object` must
/// support `reader` and `seekTo` (such as a `std.io.FixedBufferStream`).
///
/// The format of the delta data is documented in
/// [pack-format](https://git-scm.com/docs/pack-format).
fn expandDelta(base_object: anytype, delta_reader: anytype, writer: anytype) !void {
while (true) {
const inst: packed struct { value: u7, copy: bool } = @bitCast(delta_reader.readByte() catch |e| switch (e) {
error.EndOfStream => return,
else => |other| return other,
});
if (inst.copy) {
const available: packed struct {
offset1: bool,
offset2: bool,
offset3: bool,
offset4: bool,
size1: bool,
size2: bool,
size3: bool,
} = @bitCast(inst.value);
var offset_parts: packed struct { offset1: u8, offset2: u8, offset3: u8, offset4: u8 } = .{
.offset1 = if (available.offset1) try delta_reader.readByte() else 0,
.offset2 = if (available.offset2) try delta_reader.readByte() else 0,
.offset3 = if (available.offset3) try delta_reader.readByte() else 0,
.offset4 = if (available.offset4) try delta_reader.readByte() else 0,
};
const offset: u32 = @bitCast(offset_parts);
var size_parts: packed struct { size1: u8, size2: u8, size3: u8 } = .{
.size1 = if (available.size1) try delta_reader.readByte() else 0,
.size2 = if (available.size2) try delta_reader.readByte() else 0,
.size3 = if (available.size3) try delta_reader.readByte() else 0,
};
var size: u24 = @bitCast(size_parts);
if (size == 0) size = 0x10000;
try base_object.seekTo(offset);
var copy_reader = std.io.limitedReader(base_object.reader(), size);
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
try fifo.pump(copy_reader.reader(), writer);
} else if (inst.value != 0) {
var data_reader = std.io.limitedReader(delta_reader, inst.value);
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
try fifo.pump(data_reader.reader(), writer);
} else {
return error.InvalidDeltaInstruction;
}
}
}
fn HashedWriter(
comptime WriterType: anytype,
comptime HasherType: anytype,
) type {
return struct {
child_writer: WriterType,
hasher: HasherType,
const Error = WriterType.Error;
const Writer = std.io.Writer(*@This(), Error, write);
fn write(hashed_writer: *@This(), buf: []const u8) Error!usize {
const amt = try hashed_writer.child_writer.write(buf);
hashed_writer.hasher.update(buf);
return amt;
}
fn writer(hashed_writer: *@This()) Writer {
return .{ .context = hashed_writer };
}
};
}
fn hashedWriter(
writer: anytype,
hasher: anytype,
) HashedWriter(@TypeOf(writer), @TypeOf(hasher)) {
return .{ .child_writer = writer, .hasher = hasher };
}
test "packfile indexing and checkout" {
// To verify the contents of this packfile without using the code in this
// file:
//
// 1. Create a new empty Git repository (`git init`)
// 2. `git unpack-objects <path/to/testdata.pack`
// 3. `git fsck` -> note the "dangling commit" ID (which matches the commit
// checked out below)
// 4. `git checkout dd582c0720819ab7130b103635bd7271b9fd4feb`
const testrepo_pack = @embedFile("git/testdata/testrepo.pack");
var git_dir = testing.tmpDir(.{});
defer git_dir.cleanup();
var pack_file = try git_dir.dir.createFile("testrepo.pack", .{ .read = true });
defer pack_file.close();
try pack_file.writeAll(testrepo_pack);
var index_file = try git_dir.dir.createFile("testrepo.idx", .{ .read = true });
defer index_file.close();
try indexPack(testing.allocator, pack_file, index_file.writer());
// Arbitrary size limit on files read while checking the repository contents
// (all files in the test repo are known to be much smaller than this)
const max_file_size = 4096;
const index_file_data = try git_dir.dir.readFileAlloc(testing.allocator, "testrepo.idx", max_file_size);
defer testing.allocator.free(index_file_data);
// testrepo.idx is generated by Git. The index created by this file should
// match it exactly. Running `git verify-pack -v testrepo.pack` can verify
// this.
const testrepo_idx = @embedFile("git/testdata/testrepo.idx");
try testing.expectEqualSlices(u8, testrepo_idx, index_file_data);
var repository = try Repository.init(testing.allocator, pack_file, index_file);
defer repository.deinit();
var worktree = testing.tmpIterableDir(.{});
defer worktree.cleanup();
const commit_id = try parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb");
try repository.checkout(worktree.iterable_dir.dir, commit_id);
const expected_files: []const []const u8 = &.{
"dir/file",
"dir/subdir/file",
"dir/subdir/file2",
"dir2/file",
"dir3/file",
"dir3/file2",
"file",
"file2",
"file3",
"file4",
"file5",
"file6",
"file7",
"file8",
"file9",
};
var actual_files: std.ArrayListUnmanaged([]u8) = .{};
defer actual_files.deinit(testing.allocator);
defer for (actual_files.items) |file| testing.allocator.free(file);
var walker = try worktree.iterable_dir.walk(testing.allocator);
defer walker.deinit();
while (try walker.next()) |entry| {
if (entry.kind != .file) continue;
var path = try testing.allocator.dupe(u8, entry.path);
errdefer testing.allocator.free(path);
mem.replaceScalar(u8, path, std.fs.path.sep, '/');
try actual_files.append(testing.allocator, path);
}
mem.sortUnstable([]u8, actual_files.items, {}, struct {
fn lessThan(_: void, a: []u8, b: []u8) bool {
return mem.lessThan(u8, a, b);
}
}.lessThan);
try testing.expectEqualDeep(expected_files, actual_files.items);
const expected_file_contents =
\\revision 1
\\revision 2
\\revision 4
\\revision 5
\\revision 7
\\revision 8
\\revision 9
\\revision 10
\\revision 12
\\revision 13
\\revision 14
\\revision 18
\\revision 19
\\
;
const actual_file_contents = try worktree.iterable_dir.dir.readFileAlloc(testing.allocator, "file", max_file_size);
defer testing.allocator.free(actual_file_contents);
try testing.expectEqualStrings(expected_file_contents, actual_file_contents);
}
/// Checks out a commit of a packfile. Intended for experimenting with and
/// benchmarking possible optimizations to the indexing and checkout behavior.
pub fn main() !void {
const allocator = std.heap.c_allocator;
const args = try std.process.argsAlloc(allocator);
defer std.process.argsFree(allocator, args);
if (args.len != 4) {
return error.InvalidArguments; // Arguments: packfile commit worktree
}
var pack_file = try std.fs.cwd().openFile(args[1], .{});
defer pack_file.close();
const commit = try parseOid(args[2]);
var worktree = try std.fs.cwd().makeOpenPath(args[3], .{});
defer worktree.close();
var git_dir = try worktree.makeOpenPath(".git", .{});
defer git_dir.close();
std.debug.print("Starting index...\n", .{});
var index_file = try git_dir.createFile("idx", .{ .read = true });
defer index_file.close();
var index_buffered_writer = std.io.bufferedWriter(index_file.writer());
try indexPack(allocator, pack_file, index_buffered_writer.writer());
try index_buffered_writer.flush();
try index_file.sync();
std.debug.print("Starting checkout...\n", .{});
var repository = try Repository.init(allocator, pack_file, index_file);
defer repository.deinit();
try repository.checkout(worktree, commit);
}