From 67f5a28257b50e72750f51a03d6ce9ee27ad1439 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 17 May 2021 17:39:52 -0700 Subject: [PATCH] Sema: use a hash map for ZIR->AIR mapping Previously, ZIR was per-function so we could simply allocate a slice for all ZIR instructions. However now ZIR is whole-file, so we need a sparse mapping of ZIR to AIR instructions in order to not waste memory. --- BRANCH_TODO | 5 ----- src/Module.zig | 14 +++++--------- src/Sema.zig | 22 +++++++++++++++------- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/BRANCH_TODO b/BRANCH_TODO index b85767350a..3ebaadae75 100644 --- a/BRANCH_TODO +++ b/BRANCH_TODO @@ -1,8 +1,3 @@ - * use a hash map for instructions because the array is too big - - no, actually modify the Zir.Inst.Ref strategy so that each decl gets - their indexes starting at 0 so that we can use an array to store Sema - results rather than a map. - * in SwitchProng resolve, make sure AST tree gets loaded. It will be unloaded if using cached ZIR. diff --git a/src/Module.zig b/src/Module.zig index 54a3c86aed..e659f4ed47 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -2904,14 +2904,13 @@ pub fn semaFile(mod: *Module, file: *Scope.File) InnerError!void { .gpa = gpa, .arena = &sema_arena.allocator, .code = file.zir, - // TODO use a map because this array is too big - .inst_map = try sema_arena.allocator.alloc(*ir.Inst, file.zir.instructions.len), .owner_decl = new_decl, .namespace = &struct_obj.namespace, .func = null, .owner_func = null, .param_inst_list = &.{}, }; + defer sema.deinit(); var block_scope: Scope.Block = .{ .parent = null, .sema = &sema, @@ -2960,13 +2959,13 @@ fn semaDecl(mod: *Module, decl: *Decl) !bool { .gpa = gpa, .arena = &analysis_arena.allocator, .code = zir, - .inst_map = try analysis_arena.allocator.alloc(*ir.Inst, zir.instructions.len), .owner_decl = decl, .namespace = decl.namespace, .func = null, .owner_func = null, .param_inst_list = &.{}, }; + defer sema.deinit(); if (decl.isRoot()) { log.debug("semaDecl root {*} ({s})", .{ decl, decl.name }); @@ -3565,14 +3564,13 @@ pub fn analyzeFnBody(mod: *Module, decl: *Decl, func: *Fn) !void { .gpa = mod.gpa, .arena = &arena.allocator, .code = zir, - .inst_map = try mod.gpa.alloc(*ir.Inst, zir.instructions.len), .owner_decl = decl, .namespace = decl.namespace, .func = func, .owner_func = func, .param_inst_list = param_inst_list, }; - defer mod.gpa.free(sema.inst_map); + defer sema.deinit(); var inner_block: Scope.Block = .{ .parent = null, @@ -4555,14 +4553,13 @@ pub fn analyzeStructFields(mod: *Module, struct_obj: *Struct) InnerError!void { .gpa = gpa, .arena = &decl_arena.allocator, .code = zir, - .inst_map = try gpa.alloc(*ir.Inst, zir.instructions.len), .owner_decl = struct_obj.owner_decl, .namespace = &struct_obj.namespace, .owner_func = null, .func = null, .param_inst_list = &.{}, }; - defer gpa.free(sema.inst_map); + defer sema.deinit(); var block: Scope.Block = .{ .parent = null, @@ -4712,14 +4709,13 @@ pub fn analyzeUnionFields(mod: *Module, union_obj: *Union) InnerError!void { .gpa = gpa, .arena = &decl_arena.allocator, .code = zir, - .inst_map = try gpa.alloc(*ir.Inst, zir.instructions.len), .owner_decl = union_obj.owner_decl, .namespace = &union_obj.namespace, .owner_func = null, .func = null, .param_inst_list = &.{}, }; - defer gpa.free(sema.inst_map); + defer sema.deinit(); var block: Scope.Block = .{ .parent = null, diff --git a/src/Sema.zig b/src/Sema.zig index 4070eedc75..a707785b17 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -12,7 +12,7 @@ gpa: *Allocator, arena: *Allocator, code: Zir, /// Maps ZIR to AIR. -inst_map: []*Inst, +inst_map: InstMap = .{}, /// When analyzing an inline function call, owner_decl is the Decl of the caller /// and `src_decl` of `Scope.Block` is the `Decl` of the callee. /// This `Decl` owns the arena memory of this `Sema`. @@ -65,6 +65,13 @@ const LazySrcLoc = Module.LazySrcLoc; const RangeSet = @import("RangeSet.zig"); const target_util = @import("target.zig"); +pub const InstMap = std.AutoHashMapUnmanaged(Zir.Inst.Index, *ir.Inst); + +pub fn deinit(sema: *Sema) void { + sema.inst_map.deinit(sema.gpa); + sema.* = undefined; +} + pub fn analyzeFnBody( sema: *Sema, block: *Scope.Block, @@ -129,7 +136,7 @@ pub fn analyzeBody( ) InnerError!Zir.Inst.Index { // No tracy calls here, to avoid interfering with the tail call mechanism. - const map = block.sema.inst_map; + const map = &block.sema.inst_map; const tags = block.sema.code.instructions.items(.tag); const datas = block.sema.code.instructions.items(.data); @@ -142,7 +149,7 @@ pub fn analyzeBody( var i: usize = 0; while (true) : (i += 1) { const inst = body[i]; - map[inst] = switch (tags[inst]) { + const air_inst = switch (tags[inst]) { // zig fmt: off .arg => try sema.zirArg(block, inst), .alloc => try sema.zirAlloc(block, inst), @@ -500,8 +507,9 @@ pub fn analyzeBody( } }, }; - if (map[inst].ty.isNoReturn()) + if (air_inst.ty.isNoReturn()) return always_noreturn; + try map.putNoClobber(sema.gpa, inst, air_inst); } } @@ -556,7 +564,7 @@ pub fn resolveInst(sema: *Sema, zir_ref: Zir.Inst.Ref) error{OutOfMemory}!*ir.In i -= Zir.Inst.Ref.typed_value_map.len; // Finally, the last section of indexes refers to the map of ZIR=>AIR. - return sema.inst_map[i]; + return sema.inst_map.get(@intCast(u32, i)).?; } fn resolveConstString( @@ -2244,9 +2252,9 @@ fn analyzeCall( defer sema.code = parent_zir; const parent_inst_map = sema.inst_map; - sema.inst_map = try sema.gpa.alloc(*ir.Inst, sema.code.instructions.len); + sema.inst_map = .{}; defer { - sema.gpa.free(sema.inst_map); + sema.inst_map.deinit(sema.gpa); sema.inst_map = parent_inst_map; }