diff --git a/build.zig b/build.zig index 8777060a89..c4e9d19373 100644 --- a/build.zig +++ b/build.zig @@ -336,6 +336,7 @@ pub fn build(b: *std.Build) !void { artifact.linkSystemLibrary("version"); artifact.linkSystemLibrary("uuid"); artifact.linkSystemLibrary("ole32"); + artifact.linkSystemLibrary("shlwapi"); } } } @@ -712,6 +713,7 @@ fn addStaticLlvmOptionsToExe(exe: *std.Build.Step.Compile) !void { exe.linkSystemLibrary("version"); exe.linkSystemLibrary("uuid"); exe.linkSystemLibrary("ole32"); + exe.linkSystemLibrary("shlwapi"); } } diff --git a/lib/std/Uri.zig b/lib/std/Uri.zig index b27a3d7012..e2c23e5a1d 100644 --- a/lib/std/Uri.zig +++ b/lib/std/Uri.zig @@ -134,6 +134,7 @@ pub const ParseError = error{ UnexpectedCharacter, InvalidFormat, InvalidPort }; /// original `text`. Each component that is provided, will be non-`null`. pub fn parseWithoutScheme(text: []const u8) ParseError!Uri { var reader = SliceReader{ .slice = text }; + var uri = Uri{ .scheme = "", .user = null, @@ -145,13 +146,14 @@ pub fn parseWithoutScheme(text: []const u8) ParseError!Uri { .fragment = null, }; - if (reader.peekPrefix("//")) { // authority part + if (reader.peekPrefix("//")) a: { // authority part std.debug.assert(reader.get().? == '/'); std.debug.assert(reader.get().? == '/'); - const authority = reader.readUntil(isAuthoritySeparator); - if (authority.len == 0) - return error.InvalidFormat; + var authority = reader.readUntil(isAuthoritySeparator); + if (authority.len == 0) { + if (reader.peekPrefix("/")) break :a else return error.InvalidFormat; + } var start_of_host: usize = 0; if (std.mem.indexOf(u8, authority, "@")) |index| { @@ -224,7 +226,6 @@ pub fn format( try writer.writeAll(":"); if (uri.host) |host| { try writer.writeAll("//"); - if (uri.user) |user| { try writer.writeAll(user); if (uri.password) |password| { @@ -486,6 +487,23 @@ test "should fail gracefully" { try std.testing.expectEqual(@as(ParseError!Uri, error.InvalidFormat), parse("foobar://")); } +test "file" { + const parsed = try parse("file:///"); + try std.testing.expectEqualSlices(u8, "file", parsed.scheme); + try std.testing.expectEqual(@as(?[]const u8, null), parsed.host); + try std.testing.expectEqualSlices(u8, "/", parsed.path); + + const parsed2 = try parse("file:///an/absolute/path/to/something"); + try std.testing.expectEqualSlices(u8, "file", parsed2.scheme); + try std.testing.expectEqual(@as(?[]const u8, null), parsed2.host); + try std.testing.expectEqualSlices(u8, "/an/absolute/path/to/something", parsed2.path); + + const parsed3 = try parse("file://localhost/an/absolute/path/to/another/thing/"); + try std.testing.expectEqualSlices(u8, "file", parsed3.scheme); + try std.testing.expectEqualSlices(u8, "localhost", parsed3.host.?); + try std.testing.expectEqualSlices(u8, "/an/absolute/path/to/another/thing/", parsed3.path); +} + test "scheme" { try std.testing.expectEqualSlices(u8, "http", (try parse("http:_")).scheme); try std.testing.expectEqualSlices(u8, "scheme-mee", (try parse("scheme-mee:_")).scheme); @@ -695,3 +713,20 @@ test "URI query escaping" { defer std.testing.allocator.free(formatted_uri); try std.testing.expectEqualStrings("/?response-content-type=application%2Foctet-stream", formatted_uri); } + +test "format" { + const uri = Uri{ + .scheme = "file", + .user = null, + .password = null, + .host = null, + .port = null, + .path = "/foo/bar/baz", + .query = null, + .fragment = null, + }; + var buf = std.ArrayList(u8).init(std.testing.allocator); + defer buf.deinit(); + try uri.format("+/", .{}, buf.writer()); + try std.testing.expectEqualSlices(u8, "file:/foo/bar/baz", buf.items); +} diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index d40fee8db2..3522f238ec 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -30,6 +30,7 @@ pub const gdi32 = @import("windows/gdi32.zig"); pub const winmm = @import("windows/winmm.zig"); pub const crypt32 = @import("windows/crypt32.zig"); pub const nls = @import("windows/nls.zig"); +pub const shlwapi = @import("windows/shlwapi.zig"); pub const self_process_handle = @as(HANDLE, @ptrFromInt(maxInt(usize))); diff --git a/lib/std/os/windows/shlwapi.zig b/lib/std/os/windows/shlwapi.zig new file mode 100644 index 0000000000..0f0ceed576 --- /dev/null +++ b/lib/std/os/windows/shlwapi.zig @@ -0,0 +1,13 @@ +const std = @import("../../std.zig"); +const windows = std.os.windows; + +const DWORD = windows.DWORD; +const WINAPI = windows.WINAPI; +const HRESULT = windows.HRESULT; +const LPCSTR = windows.LPCSTR; +const LPSTR = windows.LPSTR; +const LPWSTR = windows.LPWSTR; +const LPCWSTR = windows.LPCWSTR; + +pub extern "shlwapi" fn PathCreateFromUrlW(pszUrl: LPCWSTR, pszPath: LPWSTR, pcchPath: *DWORD, dwFlags: DWORD) callconv(WINAPI) HRESULT; +pub extern "shlwapi" fn PathCreateFromUrlA(pszUrl: LPCSTR, pszPath: LPSTR, pcchPath: *DWORD, dwFlags: DWORD) callconv(WINAPI) HRESULT; diff --git a/src/Manifest.zig b/src/Manifest.zig index 199663556d..2ff54e6132 100644 --- a/src/Manifest.zig +++ b/src/Manifest.zig @@ -2,8 +2,11 @@ pub const basename = "build.zig.zon"; pub const Hash = std.crypto.hash.sha2.Sha256; pub const Dependency = struct { - url: []const u8, - url_tok: Ast.TokenIndex, + location: union(enum) { + url: []const u8, + path: []const u8, + }, + location_tok: Ast.TokenIndex, hash: ?[]const u8, hash_tok: Ast.TokenIndex, }; @@ -218,12 +221,12 @@ const Parse = struct { }; var dep: Dependency = .{ - .url = undefined, - .url_tok = undefined, + .location = undefined, + .location_tok = undefined, .hash = null, .hash_tok = undefined, }; - var have_url = false; + var has_location = false; for (struct_init.ast.fields) |field_init| { const name_token = ast.firstToken(field_init) - 2; @@ -232,12 +235,29 @@ const Parse = struct { // things manually provides an opportunity to do any additional verification // that is desirable on a per-field basis. if (mem.eql(u8, field_name, "url")) { - dep.url = parseString(p, field_init) catch |err| switch (err) { - error.ParseFailure => continue, - else => |e| return e, + if (has_location) { + return fail(p, main_tokens[field_init], "dependency should specify only one of 'url' and 'path' fields.", .{}); + } + dep.location = .{ + .url = parseString(p, field_init) catch |err| switch (err) { + error.ParseFailure => continue, + else => |e| return e, + }, }; - dep.url_tok = main_tokens[field_init]; - have_url = true; + has_location = true; + dep.location_tok = main_tokens[field_init]; + } else if (mem.eql(u8, field_name, "path")) { + if (has_location) { + return fail(p, main_tokens[field_init], "dependency should specify only one of 'url' and 'path' fields.", .{}); + } + dep.location = .{ + .path = parseString(p, field_init) catch |err| switch (err) { + error.ParseFailure => continue, + else => |e| return e, + }, + }; + has_location = true; + dep.location_tok = main_tokens[field_init]; } else if (mem.eql(u8, field_name, "hash")) { dep.hash = parseHash(p, field_init) catch |err| switch (err) { error.ParseFailure => continue, @@ -250,8 +270,8 @@ const Parse = struct { } } - if (!have_url) { - try appendError(p, main_tokens[node], "dependency is missing 'url' field", .{}); + if (!has_location) { + try appendError(p, main_tokens[node], "dependency requires location field, one of 'url' or 'path'.", .{}); } return dep; diff --git a/src/Package.zig b/src/Package.zig index d170baeae5..2429afd073 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -316,56 +316,51 @@ pub fn fetchAndAddDependencies( for (manifest.dependencies.keys(), 0..) |name, i| { const dep = deps_list[i]; - const sub = try fetchAndUnpack( - thread_pool, - http_client, + const sub_pkg = try getCachedPackage( + http_client.allocator, global_cache_directory, dep, report, all_modules, root_prog_node, - name, - ); + ) orelse m: { + const mod = try fetchAndUnpack( + thread_pool, + http_client, + directory, + global_cache_directory, + dep, + report, + all_modules, + root_prog_node, + name, + ); - if (sub.mod) |mod| { - if (!sub.found_existing) { - try mod.fetchAndAddDependencies( - deps_pkg, - arena, - thread_pool, - http_client, - mod.root_src_directory, - global_cache_directory, - local_cache_directory, - dependencies_source, - error_bundle, - all_modules, - root_prog_node, - dep.hash.?, - ); - } + try mod.fetchAndAddDependencies( + deps_pkg, + arena, + thread_pool, + http_client, + mod.root_src_directory, + global_cache_directory, + local_cache_directory, + dependencies_source, + error_bundle, + all_modules, + root_prog_node, + dep.hash.?, + ); - try pkg.add(gpa, name, mod); - if (deps_pkg.table.get(dep.hash.?)) |other_sub| { - // This should be the same package (and hence module) since it's the same hash - // TODO: dedup multiple versions of the same package - assert(other_sub == mod); - } else { - try deps_pkg.add(gpa, dep.hash.?, mod); - } - } else if (!sub.found_existing) { - const pkg_dir_sub_path = "p" ++ fs.path.sep_str ++ (dep.hash.?)[0..hex_multihash_len]; - const build_root = try global_cache_directory.join(arena, &.{pkg_dir_sub_path}); - try dependencies_source.writer().print( - \\ pub const {} = struct {{ - \\ pub const build_root = "{}"; - \\ pub const deps: []const struct {{ []const u8, []const u8 }} = &.{{}}; - \\ }}; - \\ - , .{ - std.zig.fmtId(dep.hash.?), - std.zig.fmtEscapes(build_root), - }); + break :m mod; + }; + + try pkg.add(gpa, name, sub_pkg); + if (deps_pkg.table.get(dep.hash.?)) |other_sub| { + // This should be the same package (and hence module) since it's the same hash + // TODO: dedup multiple versions of the same package + assert(other_sub == sub_pkg); + } else { + try deps_pkg.add(gpa, dep.hash.?, sub_pkg); } } @@ -490,6 +485,316 @@ const Report = struct { } }; +const FetchLocation = union(SourceType) { + /// The absolute path to a file or directory. + /// This may be a file that requires unpacking (such as a .tar.gz), + /// or the path to the root directory of a package. + file: []const u8, + http_request: std.Uri, + + pub fn init(gpa: Allocator, uri: std.Uri, directory: Compilation.Directory, dep: Manifest.Dependency, report: Report) !FetchLocation { + const source_type = getPackageSourceType(uri) catch + return report.fail(dep.location_tok, "Unknown scheme: {s}", .{uri.scheme}); + + return switch (source_type) { + .file => f: { + const path = if (builtin.os.tag == .windows) p: { + var uri_str = std.ArrayList(u8).init(gpa); + defer uri_str.deinit(); + try uri.format("+/", .{}, uri_str.writer()); + const uri_str_z = try gpa.dupeZ(u8, uri_str.items); + defer gpa.free(uri_str_z); + + var buf: [std.os.windows.MAX_PATH:0]u8 = undefined; + var buf_len: std.os.windows.DWORD = std.os.windows.MAX_PATH; + const result = std.os.windows.shlwapi.PathCreateFromUrlA(uri_str_z, &buf, &buf_len, 0); + + if (result != std.os.windows.S_OK) return report.fail(dep.location_tok, "Invalid URI", .{}); + + break :p try gpa.dupe(u8, buf[0..buf_len]); + } else try std.Uri.unescapeString(gpa, uri.path); + defer gpa.free(path); + + const new_path = try fs.path.resolve(gpa, &.{ directory.path.?, path }); + + break :f .{ .file = new_path }; + }, + .http_request => r: { + break :r .{ .http_request = uri }; + }, + }; + } + + pub fn deinit(f: *FetchLocation, gpa: Allocator) void { + switch (f.*) { + .file => |path| gpa.free(path), + .http_request => {}, + } + f.* = undefined; + } + + const SourceType = enum { + file, + http_request, + }; + + fn getPackageSourceType(uri: std.Uri) error{UnknownScheme}!SourceType { + const package_source_map = std.ComptimeStringMap( + SourceType, + .{ + .{ "file", .file }, + .{ "http", .http_request }, + .{ "https", .http_request }, + }, + ); + return package_source_map.get(uri.scheme) orelse error.UnknownScheme; + } + + pub fn isDirectory(path: []const u8, root_dir: Compilation.Directory) !bool { + return if (mem.endsWith(u8, path, std.fs.path.sep_str)) + true + else if (std.fs.path.extension(path).len > 0) + false + else d: { + // It's common to write directories without a trailing '/'. + // This is some special casing logic to detect directories if + // the file type cannot be determined from the extension. + var dir = root_dir.handle.openDir(path, .{}) catch |err| switch (err) { + error.NotDir => break :d false, + else => break :d err, + }; + defer dir.close(); + break :d true; + }; + } + + pub fn fetch( + f: FetchLocation, + gpa: Allocator, + root_dir: Compilation.Directory, + http_client: *std.http.Client, + dep: Manifest.Dependency, + report: Report, + ) !ReadableResource { + switch (f) { + .file => |file| { + const is_dir = isDirectory(file, root_dir) catch + return report.fail(dep.location_tok, "File not found: {s}", .{file}); + + return if (is_dir) + .{ + .path = try gpa.dupe(u8, file), + .resource = .{ .directory = try fs.openIterableDirAbsolute(file, .{}) }, + } + else + .{ + .path = try gpa.dupe(u8, file), + .resource = .{ .file = try fs.openFileAbsolute(file, .{}) }, + }; + }, + .http_request => |uri| { + var h = std.http.Headers{ .allocator = gpa }; + defer h.deinit(); + + var req = try http_client.request(.GET, uri, h, .{}); + + try req.start(.{}); + try req.wait(); + + if (req.response.status != .ok) { + return report.fail(dep.location_tok, "Expected response status '200 OK' got '{} {s}'", .{ + @intFromEnum(req.response.status), + req.response.status.phrase() orelse "", + }); + } + + return .{ + .path = try gpa.dupe(u8, uri.path), + .resource = .{ .http_request = req }, + }; + }, + } + } +}; + +const ReadableResource = struct { + path: []const u8, + resource: union(enum) { + file: fs.File, + directory: fs.IterableDir, + http_request: std.http.Client.Request, + }, + + /// Unpack the package into the global cache directory. + /// If `ps` does not require unpacking (for example, if it is a directory), then no caching is performed. + /// In either case, the hash is computed and returned along with the path to the package. + pub fn unpack( + rr: *ReadableResource, + allocator: Allocator, + thread_pool: *ThreadPool, + global_cache_directory: Compilation.Directory, + dep: Manifest.Dependency, + report: Report, + pkg_prog_node: *std.Progress.Node, + ) !PackageLocation { + switch (rr.resource) { + .directory => |dir| { + const actual_hash = try computePackageHash(thread_pool, dir); + return .{ + .hash = actual_hash, + .dir_path = try allocator.dupe(u8, rr.path), + }; + }, + inline .file, .http_request => |*r| { + const s = fs.path.sep_str; + const rand_int = std.crypto.random.int(u64); + const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); + + const actual_hash = h: { + var tmp_directory: Compilation.Directory = d: { + const path = try global_cache_directory.join(allocator, &.{tmp_dir_sub_path}); + errdefer allocator.free(path); + + const iterable_dir = try global_cache_directory.handle.makeOpenPathIterable(tmp_dir_sub_path, .{}); + errdefer iterable_dir.close(); + + break :d .{ + .path = path, + .handle = iterable_dir.dir, + }; + }; + defer tmp_directory.closeAndFree(allocator); + + const opt_content_length = try rr.getSize(); + + var prog_reader: ProgressReader(@TypeOf(r.reader())) = .{ + .child_reader = r.reader(), + .prog_node = pkg_prog_node, + .unit = if (opt_content_length) |content_length| unit: { + const kib = content_length / 1024; + const mib = kib / 1024; + if (mib > 0) { + pkg_prog_node.setEstimatedTotalItems(@intCast(mib)); + pkg_prog_node.setUnit("MiB"); + break :unit .mib; + } else { + pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib))); + pkg_prog_node.setUnit("KiB"); + break :unit .kib; + } + } else .any, + }; + pkg_prog_node.context.refresh(); + + switch (try rr.getFileType(dep, report)) { + .@"tar.gz" => try unpackTarball(allocator, prog_reader, tmp_directory.handle, std.compress.gzip), + // I have not checked what buffer sizes the xz decompression implementation uses + // by default, so the same logic applies for buffering the reader as for gzip. + .@"tar.xz" => try unpackTarball(allocator, prog_reader, tmp_directory.handle, std.compress.xz), + } + + // Unpack completed - stop showing amount as progress + pkg_prog_node.setEstimatedTotalItems(0); + pkg_prog_node.setCompletedItems(0); + pkg_prog_node.context.refresh(); + + // TODO: delete files not included in the package prior to computing the package hash. + // for example, if the ini file has directives to include/not include certain files, + // apply those rules directly to the filesystem right here. This ensures that files + // not protected by the hash are not present on the file system. + + break :h try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle }); + }; + + const pkg_dir_sub_path = "p" ++ s ++ Manifest.hexDigest(actual_hash); + const unpacked_path = try global_cache_directory.join(allocator, &.{pkg_dir_sub_path}); + errdefer allocator.free(unpacked_path); + + const relative_unpacked_path = try fs.path.relative(allocator, global_cache_directory.path.?, unpacked_path); + defer allocator.free(relative_unpacked_path); + try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, relative_unpacked_path); + + return .{ + .hash = actual_hash, + .dir_path = unpacked_path, + }; + }, + } + } + + const FileType = enum { + @"tar.gz", + @"tar.xz", + }; + + pub fn getSize(rr: ReadableResource) !?u64 { + switch (rr.resource) { + // TODO: Handle case of chunked content-length + .http_request => |req| return req.response.content_length, + .file => |f| return (try f.metadata()).size(), + .directory => unreachable, + } + } + + pub fn getFileType(rr: ReadableResource, dep: Manifest.Dependency, report: Report) !FileType { + switch (rr.resource) { + .file => { + return if (mem.endsWith(u8, rr.path, ".tar.gz")) + .@"tar.gz" + else if (mem.endsWith(u8, rr.path, ".tar.xz")) + .@"tar.xz" + else + return report.fail(dep.location_tok, "Unknown file type", .{}); + }, + .directory => return error.IsDir, + .http_request => |req| { + const content_type = req.response.headers.getFirstValue("Content-Type") orelse + return report.fail(dep.location_tok, "Missing 'Content-Type' header", .{}); + + // If the response has a different content type than the URI indicates, override + // the previously assumed file type. + return if (ascii.eqlIgnoreCase(content_type, "application/gzip") or + ascii.eqlIgnoreCase(content_type, "application/x-gzip") or + ascii.eqlIgnoreCase(content_type, "application/tar+gzip")) + .@"tar.gz" + else if (ascii.eqlIgnoreCase(content_type, "application/x-xz")) + .@"tar.xz" + else if (ascii.eqlIgnoreCase(content_type, "application/octet-stream")) ty: { + // support gitlab tarball urls such as https://gitlab.com///-/archive//-.tar.gz + // whose content-disposition header is: 'attachment; filename="-.tar.gz"' + const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse + return report.fail(dep.location_tok, "Missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{}); + if (mem.startsWith(u8, content_disposition, "attachment;") and + mem.endsWith(u8, content_disposition, ".tar.gz\"")) + { + break :ty .@"tar.gz"; + } else return report.fail(dep.location_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition}); + } else return report.fail(dep.location_tok, "Unrecognized value for 'Content-Type' header: {s}", .{content_type}); + }, + } + } + + pub fn deinit(rr: *ReadableResource, gpa: Allocator) void { + gpa.free(rr.path); + switch (rr.resource) { + .file => |file| file.close(), + .directory => |*dir| dir.close(), + .http_request => |*req| req.deinit(), + } + rr.* = undefined; + } +}; + +pub const PackageLocation = struct { + hash: [Manifest.Hash.digest_length]u8, + dir_path: []const u8, + + pub fn deinit(pl: *PackageLocation, allocator: Allocator) void { + allocator.free(pl.dir_path); + pl.* = undefined; + } +}; + const hex_multihash_len = 2 * Manifest.multihash_len; const MultiHashHexDigest = [hex_multihash_len]u8; /// This is to avoid creating multiple modules for the same build.zig file. @@ -542,29 +847,24 @@ fn ProgressReader(comptime ReaderType: type) type { }; } -fn fetchAndUnpack( - thread_pool: *ThreadPool, - http_client: *std.http.Client, +fn getCachedPackage( + gpa: Allocator, global_cache_directory: Compilation.Directory, dep: Manifest.Dependency, report: Report, all_modules: *AllModules, root_prog_node: *std.Progress.Node, - /// This does not have to be any form of canonical or fully-qualified name: it - /// is only intended to be human-readable for progress reporting. - name_for_prog: []const u8, -) !struct { mod: ?*Package, found_existing: bool } { - const gpa = http_client.allocator; +) !?*Package { + _ = report; const s = fs.path.sep_str; - // Check if the expected_hash is already present in the global package // cache, and thereby avoid both fetching and unpacking. - if (dep.hash) |h| cached: { + if (dep.hash) |h| { const hex_digest = h[0..hex_multihash_len]; const pkg_dir_sub_path = "p" ++ s ++ hex_digest; var pkg_dir = global_cache_directory.handle.openDir(pkg_dir_sub_path, .{}) catch |err| switch (err) { - error.FileNotFound => break :cached, + error.FileNotFound => return null, else => |e| return e, }; errdefer pkg_dir.close(); @@ -574,16 +874,7 @@ fn fetchAndUnpack( const gop = try all_modules.getOrPut(gpa, hex_digest.*); if (gop.found_existing) { if (gop.value_ptr.*) |mod| { - return switch (mod) { - .zig_pkg => |pkg| .{ - .mod = pkg, - .found_existing = true, - }, - .non_zig_pkg => .{ - .mod = null, - .found_existing = true, - }, - }; + return mod; } } @@ -615,121 +906,60 @@ fn fetchAndUnpack( .root_src_path = owned_src_path, }; - gop.value_ptr.* = .{ .zig_pkg = ptr }; - return .{ - .mod = ptr, - .found_existing = false, - }; + gop.value_ptr.* = ptr; + return ptr; } + return null; +} + +fn fetchAndUnpack( + thread_pool: *ThreadPool, + http_client: *std.http.Client, + directory: Compilation.Directory, + global_cache_directory: Compilation.Directory, + dep: Manifest.Dependency, + report: Report, + all_modules: *AllModules, + root_prog_node: *std.Progress.Node, + /// This does not have to be any form of canonical or fully-qualified name: it + /// is only intended to be human-readable for progress reporting. + name_for_prog: []const u8, +) !*Package { + const gpa = http_client.allocator; + var pkg_prog_node = root_prog_node.start(name_for_prog, 0); defer pkg_prog_node.end(); pkg_prog_node.activate(); pkg_prog_node.context.refresh(); - const uri = try std.Uri.parse(dep.url); - - const rand_int = std.crypto.random.int(u64); - const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); - - const actual_hash = a: { - var tmp_directory: Compilation.Directory = d: { - const path = try global_cache_directory.join(gpa, &.{tmp_dir_sub_path}); - errdefer gpa.free(path); - - const iterable_dir = try global_cache_directory.handle.makeOpenPathIterable(tmp_dir_sub_path, .{}); - errdefer iterable_dir.close(); - - break :d .{ - .path = path, - .handle = iterable_dir.dir, - }; - }; - defer tmp_directory.closeAndFree(gpa); - - var h = std.http.Headers{ .allocator = gpa }; - defer h.deinit(); - - var req = try http_client.request(.GET, uri, h, .{}); - defer req.deinit(); - - try req.start(.{}); - try req.wait(); - - if (req.response.status != .ok) { - return report.fail(dep.url_tok, "Expected response status '200 OK' got '{} {s}'", .{ - @intFromEnum(req.response.status), - req.response.status.phrase() orelse "", - }); - } - - const content_type = req.response.headers.getFirstValue("Content-Type") orelse - return report.fail(dep.url_tok, "Missing 'Content-Type' header", .{}); - - var prog_reader: ProgressReader(std.http.Client.Request.Reader) = .{ - .child_reader = req.reader(), - .prog_node = &pkg_prog_node, - .unit = if (req.response.content_length) |content_length| unit: { - const kib = content_length / 1024; - const mib = kib / 1024; - if (mib > 0) { - pkg_prog_node.setEstimatedTotalItems(@intCast(mib)); - pkg_prog_node.setUnit("MiB"); - break :unit .mib; - } else { - pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib))); - pkg_prog_node.setUnit("KiB"); - break :unit .kib; - } - } else .any, - }; - pkg_prog_node.context.refresh(); - - if (ascii.eqlIgnoreCase(content_type, "application/gzip") or - ascii.eqlIgnoreCase(content_type, "application/x-gzip") or - ascii.eqlIgnoreCase(content_type, "application/tar+gzip")) - { - // I observed the gzip stream to read 1 byte at a time, so I am using a - // buffered reader on the front of it. - try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip); - } else if (ascii.eqlIgnoreCase(content_type, "application/x-xz")) { - // I have not checked what buffer sizes the xz decompression implementation uses - // by default, so the same logic applies for buffering the reader as for gzip. - try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.xz); - } else if (ascii.eqlIgnoreCase(content_type, "application/octet-stream")) { - // support gitlab tarball urls such as https://gitlab.com///-/archive//-.tar.gz - // whose content-disposition header is: 'attachment; filename="-.tar.gz"' - const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse - return report.fail(dep.url_tok, "Missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{}); - if (isTarAttachment(content_disposition)) { - try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip); - } else return report.fail(dep.url_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition}); - } else { - return report.fail(dep.url_tok, "Unsupported 'Content-Type' header value: '{s}'", .{content_type}); - } - - // Download completed - stop showing downloaded amount as progress - pkg_prog_node.setEstimatedTotalItems(0); - pkg_prog_node.setCompletedItems(0); - pkg_prog_node.context.refresh(); - - // TODO: delete files not included in the package prior to computing the package hash. - // for example, if the ini file has directives to include/not include certain files, - // apply those rules directly to the filesystem right here. This ensures that files - // not protected by the hash are not present on the file system. - - // TODO: raise an error for files that have illegal paths on some operating systems. - // For example, on Linux a path with a backslash should raise an error here. - // Of course, if the ignore rules above omit the file from the package, then everything - // is fine and no error should be raised. - - break :a try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle }); + const uri = switch (dep.location) { + .url => |url| std.Uri.parse(url) catch |err| switch (err) { + error.UnexpectedCharacter => return report.fail(dep.location_tok, "failed to parse dependency location as URI.", .{}), + else => return err, + }, + .path => |path| std.Uri{ + .scheme = "file", + .user = null, + .password = null, + .host = null, + .port = null, + .path = path, + .query = null, + .fragment = null, + }, }; - const pkg_dir_sub_path = "p" ++ s ++ Manifest.hexDigest(actual_hash); - try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, pkg_dir_sub_path); + var fetch_location = try FetchLocation.init(gpa, uri, directory, dep, report); + defer fetch_location.deinit(gpa); - const actual_hex = Manifest.hexDigest(actual_hash); + var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep, report); + defer readable_resource.deinit(gpa); + + var package_location = try readable_resource.unpack(gpa, thread_pool, global_cache_directory, dep, report, &pkg_prog_node); + defer package_location.deinit(gpa); + + const actual_hex = Manifest.hexDigest(package_location.hash); if (dep.hash) |h| { if (!mem.eql(u8, h, &actual_hex)) { return report.fail(dep.hash_tok, "hash mismatch: expected: {s}, found: {s}", .{ @@ -743,9 +973,9 @@ fn fetchAndUnpack( const eb = report.error_bundle; const notes_len = 1; try Report.addErrorMessage(report.ast.*, file_path, eb, notes_len, .{ - .tok = dep.url_tok, + .tok = dep.location_tok, .off = 0, - .msg = "url field is missing corresponding hash field", + .msg = "dependency is missing hash field", }); const notes_start = try eb.reserveNotes(notes_len); eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ @@ -754,35 +984,24 @@ fn fetchAndUnpack( return error.PackageFetchFailed; } - const build_zig_path = try std.fs.path.join(gpa, &.{ pkg_dir_sub_path, build_zig_basename }); - defer gpa.free(build_zig_path); + const gop = try all_modules.getOrPut(gpa, actual_hex); - global_cache_directory.handle.access(build_zig_path, .{}) catch |err| switch (err) { - error.FileNotFound => { - try all_modules.put(gpa, actual_hex, .non_zig_pkg); - return .{ - .mod = null, - .found_existing = false, - }; - }, - else => return err, - }; - - const mod = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path, build_zig_basename); - try all_modules.put(gpa, actual_hex, .{ .zig_pkg = mod }); - return .{ - .mod = mod, - .found_existing = false, - }; + if (gop.found_existing and gop.value_ptr.* != null) { + return gop.value_ptr.*.?; + } else { + const module = try create(gpa, package_location.dir_path, build_zig_basename); + gop.value_ptr.* = module; + return module; + } } fn unpackTarball( gpa: Allocator, - req_reader: anytype, + reader: anytype, out_dir: fs.Dir, comptime compression: type, ) !void { - var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, req_reader); + var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); var decompress = try compression.decompress(gpa, br.reader()); defer decompress.deinit();