diff --git a/src/Air.zig b/src/Air.zig index 0968d95180..2dae8454cf 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -815,6 +815,8 @@ pub const VectorCmp = struct { /// 1. `Inst.Ref` for every inputs_len /// 2. for every outputs_len /// - constraint: memory at this position is reinterpreted as a null +/// terminated string. +/// - name: memory at this position is reinterpreted as a null /// terminated string. pad to the next u32 after the null byte. /// 3. for every inputs_len /// - constraint: memory at this position is reinterpreted as a null diff --git a/src/Sema.zig b/src/Sema.zig index 87cb7cc9d2..9750111662 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -10535,7 +10535,11 @@ fn zirAsm( var output_type_bits = extra.data.output_type_bits; var needed_capacity: usize = @typeInfo(Air.Asm).Struct.fields.len + outputs_len + inputs_len; - const Output = struct { constraint: []const u8, ty: Type }; + const Output = struct { + constraint: []const u8, + name: []const u8, + ty: Type, + }; const output: ?Output = if (outputs_len == 0) null else blk: { const output = sema.code.extraData(Zir.Inst.Asm.Output, extra_i); extra_i = output.end; @@ -10548,10 +10552,12 @@ fn zirAsm( } const constraint = sema.code.nullTerminatedString(output.data.constraint); - needed_capacity += constraint.len / 4 + 1; + const name = sema.code.nullTerminatedString(output.data.name); + needed_capacity += (constraint.len + name.len + (2 + 3)) / 4; break :blk Output{ .constraint = constraint, + .name = name, .ty = try sema.resolveType(block, ret_ty_src, output.data.operand), }; }; @@ -10573,7 +10579,7 @@ fn zirAsm( const constraint = sema.code.nullTerminatedString(input.data.constraint); const name = sema.code.nullTerminatedString(input.data.name); - needed_capacity += (constraint.len + name.len + 1) / 4 + 1; + needed_capacity += (constraint.len + name.len + (2 + 3)) / 4; inputs[arg_i] = .{ .c = constraint, .n = name }; } @@ -10611,7 +10617,9 @@ fn zirAsm( const buffer = mem.sliceAsBytes(sema.air_extra.unusedCapacitySlice()); mem.copy(u8, buffer, o.constraint); buffer[o.constraint.len] = 0; - sema.air_extra.items.len += o.constraint.len / 4 + 1; + mem.copy(u8, buffer[o.constraint.len + 1 ..], o.name); + buffer[o.constraint.len + 1 + o.name.len] = 0; + sema.air_extra.items.len += (o.constraint.len + o.name.len + (2 + 3)) / 4; } for (inputs) |input| { const buffer = mem.sliceAsBytes(sema.air_extra.unusedCapacitySlice()); @@ -10619,7 +10627,7 @@ fn zirAsm( buffer[input.c.len] = 0; mem.copy(u8, buffer[input.c.len + 1 ..], input.n); buffer[input.c.len + 1 + input.n.len] = 0; - sema.air_extra.items.len += (input.c.len + input.n.len + 1) / 4 + 1; + sema.air_extra.items.len += (input.c.len + input.n.len + (2 + 3)) / 4; } for (clobbers) |clobber| { const buffer = mem.sliceAsBytes(sema.air_extra.unusedCapacitySlice()); diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 5ed7b63db3..3b27633f69 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -3272,10 +3272,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { if (output != .none) { return self.fail("TODO implement codegen for non-expr asm", .{}); } + const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0); + const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += constraint.len / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; break constraint; } else null; @@ -3283,10 +3285,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { for (inputs) |input| { const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); const constraint = std.mem.sliceTo(input_bytes, 0); - const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); + const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += (constraint.len + input_name.len + 1) / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') { return self.fail("unrecognized asm input constraint: '{s}'", .{constraint}); diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 73f51f6481..87d51b0276 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -4078,10 +4078,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { if (output != .none) { return self.fail("TODO implement codegen for non-expr asm", .{}); } + const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0); + const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += constraint.len / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; break constraint; } else null; @@ -4089,10 +4091,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { for (inputs) |input| { const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); const constraint = std.mem.sliceTo(input_bytes, 0); - const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); + const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += (constraint.len + input_name.len + 1) / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') { return self.fail("unrecognized asm input constraint: '{s}'", .{constraint}); diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index 61fddee207..96d30c31ce 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -2098,10 +2098,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { if (output != .none) { return self.fail("TODO implement codegen for non-expr asm", .{}); } + const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0); + const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += constraint.len / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; break constraint; } else null; @@ -2109,10 +2111,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { for (inputs) |input| { const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); const constraint = std.mem.sliceTo(input_bytes, 0); - const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); + const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += (constraint.len + input_name.len + 1) / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') { return self.fail("unrecognized asm input constraint: '{s}'", .{constraint}); diff --git a/src/arch/sparcv9/CodeGen.zig b/src/arch/sparcv9/CodeGen.zig index bcd8cf8eeb..7d93916fc1 100644 --- a/src/arch/sparcv9/CodeGen.zig +++ b/src/arch/sparcv9/CodeGen.zig @@ -642,10 +642,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { if (output != .none) { return self.fail("TODO implement codegen for non-expr asm", .{}); } + const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0); + const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += constraint.len / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; break constraint; } else null; @@ -653,10 +655,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { for (inputs) |input| { const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); const constraint = std.mem.sliceTo(input_bytes, 0); - const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); + const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += (constraint.len + input_name.len + 1) / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') { return self.fail("unrecognized asm input constraint: '{s}'", .{constraint}); diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 0103f5382f..a4f9c862d9 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -4739,10 +4739,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { if (output != .none) { return self.fail("TODO implement codegen for non-expr asm", .{}); } + const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0); + const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += constraint.len / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; break constraint; } else null; @@ -4750,10 +4752,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { for (inputs) |input| { const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); const constraint = std.mem.sliceTo(input_bytes, 0); - const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); + const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += (constraint.len + input_name.len + 1) / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') { return self.fail("unrecognized asm input constraint: '{s}'", .{constraint}); diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 2cd93d47fc..44b616c493 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -3018,10 +3018,12 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue { if (output != .none) { return f.fail("TODO implement codegen for non-expr asm", .{}); } + const extra_bytes = std.mem.sliceAsBytes(f.air.extra[extra_i..]); const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(f.air.extra[extra_i..]), 0); + const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += constraint.len / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; break constraint; } else null; @@ -3031,10 +3033,12 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue { const inputs_extra_begin = extra_i; for (inputs) |input, i| { - const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(f.air.extra[extra_i..]), 0); + const input_bytes = std.mem.sliceAsBytes(f.air.extra[extra_i..]); + const constraint = std.mem.sliceTo(input_bytes, 0); + const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += constraint.len / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; if (constraint[0] == '{' and constraint[constraint.len - 1] == '}') { const reg = constraint[1 .. constraint.len - 1]; diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index c9ea5bebac..63e7eece99 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -4638,14 +4638,19 @@ pub const FuncGen = struct { var llvm_param_i: usize = 0; var total_i: usize = 0; + var name_map: std.StringArrayHashMapUnmanaged(void) = .{}; + try name_map.ensureUnusedCapacity(arena, outputs.len + inputs.len); + for (outputs) |output| { if (output != .none) { return self.todo("implement inline asm with non-returned output", .{}); } + const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0); + const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += constraint.len / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; try llvm_constraints.ensureUnusedCapacity(self.gpa, constraint.len + 1); if (total_i != 0) { @@ -4654,17 +4659,17 @@ pub const FuncGen = struct { llvm_constraints.appendAssumeCapacity('='); llvm_constraints.appendSliceAssumeCapacity(constraint[1..]); + name_map.putAssumeCapacityNoClobber(name, {}); total_i += 1; } - const input_start_extra_i = extra_i; for (inputs) |input| { - const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); - const constraint = std.mem.sliceTo(input_bytes, 0); - const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); + const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); + const constraint = std.mem.sliceTo(extra_bytes, 0); + const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. - extra_i += (constraint.len + input_name.len + 1) / 4 + 1; + extra_i += (constraint.len + name.len + (2 + 3)) / 4; const arg_llvm_value = try self.resolveInst(input); @@ -4677,6 +4682,7 @@ pub const FuncGen = struct { } llvm_constraints.appendSliceAssumeCapacity(constraint); + name_map.putAssumeCapacityNoClobber(name, {}); llvm_param_i += 1; total_i += 1; } @@ -4739,20 +4745,11 @@ pub const FuncGen = struct { const name = asm_source[name_start..i]; state = .start; - extra_i = input_start_extra_i; - for (inputs) |_, input_i| { - const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); - const constraint = std.mem.sliceTo(input_bytes, 0); - const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); - extra_i += (constraint.len + input_name.len + 1) / 4 + 1; - - if (std.mem.eql(u8, name, input_name)) { - try rendered_template.writer().print("{d}", .{input_i}); - break; - } - } else { - return self.todo("TODO validate asm in Sema", .{}); - } + const index = name_map.getIndex(name) orelse { + // we should validate the assembly in Sema; by now it is too late + return self.todo("unknown input or output name: '{s}'", .{name}); + }; + try rendered_template.writer().print("{d}", .{index}); }, else => {}, }, diff --git a/src/print_air.zig b/src/print_air.zig index 6e336e138b..c01d96ed7f 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -542,15 +542,19 @@ const Writer = struct { extra_i += inputs.len; for (outputs) |output| { - const constraint = w.air.nullTerminatedString(extra_i); + const extra_bytes = std.mem.sliceAsBytes(w.air.extra[extra_i..]); + const constraint = std.mem.sliceTo(extra_bytes, 0); + const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); + // This equation accounts for the fact that even if we have exactly 4 bytes - // for the string, we still use the next u32 for the null terminator. - extra_i += constraint.len / 4 + 1; + // for the strings and their null terminators, we still use the next u32 + // for the null terminator. + extra_i += (constraint.len + name.len + (2 + 3)) / 4; if (output == .none) { - try s.print(", -> {s}", .{constraint}); + try s.print(", [{s}] -> {s}", .{ name, constraint }); } else { - try s.print(", out {s} = (", .{constraint}); + try s.print(", [{s}] out {s} = (", .{ name, constraint }); try w.writeOperand(s, inst, op_index, output); op_index += 1; try s.writeByte(')'); @@ -558,12 +562,15 @@ const Writer = struct { } for (inputs) |input| { - const constraint = w.air.nullTerminatedString(extra_i); + const extra_bytes = std.mem.sliceAsBytes(w.air.extra[extra_i..]); + const constraint = std.mem.sliceTo(extra_bytes, 0); + const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes - // for the string, we still use the next u32 for the null terminator. - extra_i += constraint.len / 4 + 1; + // for the strings and their null terminators, we still use the next u32 + // for the null terminator. + extra_i += (constraint.len + name.len + 1) / 4 + 1; - try s.print(", in {s} = (", .{constraint}); + try s.print(", [{s}] in {s} = (", .{ name, constraint }); try w.writeOperand(s, inst, op_index, input); op_index += 1; try s.writeByte(')'); @@ -572,7 +579,8 @@ const Writer = struct { { var clobber_i: u32 = 0; while (clobber_i < clobbers_len) : (clobber_i += 1) { - const clobber = w.air.nullTerminatedString(extra_i); + const extra_bytes = std.mem.sliceAsBytes(w.air.extra[extra_i..]); + const clobber = std.mem.sliceTo(extra_bytes, 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. extra_i += clobber.len / 4 + 1;