mirror of
https://github.com/ziglang/zig.git
synced 2026-02-21 08:45:52 +00:00
stage2: sparc64: Implement airByteSwap
This commit is contained in:
parent
486ab3852e
commit
83e6223192
@ -22,6 +22,7 @@ const Type = @import("../../type.zig").Type;
|
||||
const CodeGenError = codegen.CodeGenError;
|
||||
const Result = @import("../../codegen.zig").Result;
|
||||
const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput;
|
||||
const Endian = std.builtin.Endian;
|
||||
|
||||
const build_options = @import("build_options");
|
||||
|
||||
@ -30,6 +31,7 @@ const abi = @import("abi.zig");
|
||||
const errUnionPayloadOffset = codegen.errUnionPayloadOffset;
|
||||
const errUnionErrorOffset = codegen.errUnionErrorOffset;
|
||||
const Instruction = bits.Instruction;
|
||||
const ASI = Instruction.ASI;
|
||||
const ShiftWidth = Instruction.ShiftWidth;
|
||||
const RegisterManager = abi.RegisterManager;
|
||||
const RegisterLock = RegisterManager.RegisterLock;
|
||||
@ -615,7 +617,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
|
||||
.clz => try self.airClz(inst),
|
||||
.ctz => try self.airCtz(inst),
|
||||
.popcount => try self.airPopcount(inst),
|
||||
.byte_swap => @panic("TODO try self.airByteSwap(inst)"),
|
||||
.byte_swap => try self.airByteSwap(inst),
|
||||
.bit_reverse => try self.airBitReverse(inst),
|
||||
.tag_name => try self.airTagName(inst),
|
||||
.error_name => try self.airErrorName(inst),
|
||||
@ -1200,6 +1202,90 @@ fn airBreakpoint(self: *Self) !void {
|
||||
return self.finishAirBookkeeping();
|
||||
}
|
||||
|
||||
fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
|
||||
|
||||
// We have hardware byteswapper in SPARCv9, don't let mainstream compilers mislead you.
|
||||
// That being said, the strategy to lower this is:
|
||||
// - If src is an immediate, comptime-swap it.
|
||||
// - If src is in memory then issue an LD*A with #ASI_P_[oppposite-endian]
|
||||
// - If src is a register then issue an ST*A with #ASI_P_[oppposite-endian]
|
||||
// to a stack slot, then follow with a normal load from said stack slot.
|
||||
// This is because on some implementations, ASI-tagged memory operations are non-piplelinable
|
||||
// and loads tend to have longer latency than stores, so the sequence will minimize stall.
|
||||
// The result will always be either another immediate or stored in a register.
|
||||
// TODO: Fold byteswap+store into a single ST*A and load+byteswap into a single LD*A.
|
||||
const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: {
|
||||
const operand = try self.resolveInst(ty_op.operand);
|
||||
const operand_ty = self.air.typeOf(ty_op.operand);
|
||||
switch (operand_ty.zigTypeTag()) {
|
||||
.Vector => return self.fail("TODO byteswap for vectors", .{}),
|
||||
.Int => {
|
||||
const int_info = operand_ty.intInfo(self.target.*);
|
||||
if (int_info.bits == 8) break :result operand;
|
||||
|
||||
const abi_size = int_info.bits >> 3;
|
||||
const abi_align = operand_ty.abiAlignment(self.target.*);
|
||||
const opposite_endian_asi = switch (self.target.cpu.arch.endian()) {
|
||||
Endian.Big => ASI.asi_primary_little,
|
||||
Endian.Little => ASI.asi_primary,
|
||||
};
|
||||
|
||||
switch (operand) {
|
||||
.immediate => |imm| {
|
||||
const swapped = switch (int_info.bits) {
|
||||
16 => @byteSwap(@intCast(u16, imm)),
|
||||
24 => @byteSwap(@intCast(u24, imm)),
|
||||
32 => @byteSwap(@intCast(u32, imm)),
|
||||
40 => @byteSwap(@intCast(u40, imm)),
|
||||
48 => @byteSwap(@intCast(u48, imm)),
|
||||
56 => @byteSwap(@intCast(u56, imm)),
|
||||
64 => @byteSwap(@intCast(u64, imm)),
|
||||
else => return self.fail("TODO synthesize SPARCv9 byteswap for other integer sizes", .{}),
|
||||
};
|
||||
break :result .{ .immediate = swapped };
|
||||
},
|
||||
.register => |reg| {
|
||||
if (int_info.bits > 64 or @popCount(int_info.bits) != 1)
|
||||
return self.fail("TODO synthesize SPARCv9 byteswap for other integer sizes", .{});
|
||||
|
||||
const off = try self.allocMem(inst, abi_size, abi_align);
|
||||
const off_reg = try self.copyToTmpRegister(operand_ty, .{ .immediate = realStackOffset(off) });
|
||||
|
||||
try self.genStoreASI(reg, .sp, off_reg, abi_size, opposite_endian_asi);
|
||||
try self.genLoad(reg, .sp, Register, off_reg, abi_size);
|
||||
break :result reg;
|
||||
},
|
||||
.memory => {
|
||||
if (int_info.bits > 64 or @popCount(int_info.bits) != 1)
|
||||
return self.fail("TODO synthesize SPARCv9 byteswap for other integer sizes", .{});
|
||||
|
||||
const addr_reg = try self.copyToTmpRegister(operand_ty, operand);
|
||||
const dst_reg = try self.register_manager.allocReg(null, gp);
|
||||
|
||||
try self.genLoadASI(dst_reg, addr_reg, .g0, abi_size, opposite_endian_asi);
|
||||
break :result dst_reg;
|
||||
},
|
||||
.stack_offset => |off| {
|
||||
if (int_info.bits > 64 or @popCount(int_info.bits) != 1)
|
||||
return self.fail("TODO synthesize SPARCv9 byteswap for other integer sizes", .{});
|
||||
|
||||
const off_reg = try self.copyToTmpRegister(operand_ty, .{ .immediate = realStackOffset(off) });
|
||||
const dst_reg = try self.register_manager.allocReg(null, gp);
|
||||
|
||||
try self.genLoadASI(dst_reg, .sp, off_reg, abi_size, opposite_endian_asi);
|
||||
break :result dst_reg;
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
};
|
||||
|
||||
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
|
||||
}
|
||||
|
||||
fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void {
|
||||
if (modifier == .always_tail) return self.fail("TODO implement tail calls for {}", .{self.target.cpu.arch});
|
||||
|
||||
@ -3583,6 +3669,34 @@ fn genLoad(self: *Self, value_reg: Register, addr_reg: Register, comptime off_ty
|
||||
}
|
||||
}
|
||||
|
||||
fn genLoadASI(self: *Self, value_reg: Register, addr_reg: Register, off_reg: Register, abi_size: u64, asi: ASI) !void {
|
||||
switch (abi_size) {
|
||||
1, 2, 4, 8 => {
|
||||
const tag: Mir.Inst.Tag = switch (abi_size) {
|
||||
1 => .lduba,
|
||||
2 => .lduha,
|
||||
4 => .lduwa,
|
||||
8 => .ldxa,
|
||||
else => unreachable, // unexpected abi size
|
||||
};
|
||||
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.data = .{
|
||||
.mem_asi = .{
|
||||
.rd = value_reg,
|
||||
.rs1 = addr_reg,
|
||||
.rs2 = off_reg,
|
||||
.asi = asi,
|
||||
},
|
||||
},
|
||||
});
|
||||
},
|
||||
3, 5, 6, 7 => return self.fail("TODO: genLoad for more abi_sizes", .{}),
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void {
|
||||
switch (mcv) {
|
||||
.dead => unreachable,
|
||||
@ -3942,6 +4056,34 @@ fn genStore(self: *Self, value_reg: Register, addr_reg: Register, comptime off_t
|
||||
}
|
||||
}
|
||||
|
||||
fn genStoreASI(self: *Self, value_reg: Register, addr_reg: Register, off_reg: Register, abi_size: u64, asi: ASI) !void {
|
||||
switch (abi_size) {
|
||||
1, 2, 4, 8 => {
|
||||
const tag: Mir.Inst.Tag = switch (abi_size) {
|
||||
1 => .stba,
|
||||
2 => .stha,
|
||||
4 => .stwa,
|
||||
8 => .stxa,
|
||||
else => unreachable, // unexpected abi size
|
||||
};
|
||||
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.data = .{
|
||||
.mem_asi = .{
|
||||
.rd = value_reg,
|
||||
.rs1 = addr_reg,
|
||||
.rs2 = off_reg,
|
||||
.asi = asi,
|
||||
},
|
||||
},
|
||||
});
|
||||
},
|
||||
3, 5, 6, 7 => return self.fail("TODO: genLoad for more abi_sizes", .{}),
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
fn genTypedValue(self: *Self, typed_value: TypedValue) InnerError!MCValue {
|
||||
const mcv: MCValue = switch (try codegen.genTypedValue(
|
||||
self.bin_file,
|
||||
@ -4257,12 +4399,12 @@ fn processDeath(self: *Self, inst: Air.Inst.Index) void {
|
||||
/// Turns stack_offset MCV into a real SPARCv9 stack offset usable for asm.
|
||||
fn realStackOffset(off: u32) u32 {
|
||||
return off
|
||||
// SPARCv9 %sp points away from the stack by some amount.
|
||||
+ abi.stack_bias
|
||||
// The first couple bytes of each stack frame is reserved
|
||||
// for ABI and hardware purposes.
|
||||
+ abi.stack_reserved_area;
|
||||
// Only after that we have the usable stack frame portion.
|
||||
// SPARCv9 %sp points away from the stack by some amount.
|
||||
+ abi.stack_bias
|
||||
// The first couple bytes of each stack frame is reserved
|
||||
// for ABI and hardware purposes.
|
||||
+ abi.stack_reserved_area;
|
||||
// Only after that we have the usable stack frame portion.
|
||||
}
|
||||
|
||||
/// Caller must call `CallMCValues.deinit`.
|
||||
|
||||
@ -91,6 +91,11 @@ pub fn emitMir(
|
||||
.lduw => try emit.mirArithmetic3Op(inst),
|
||||
.ldx => try emit.mirArithmetic3Op(inst),
|
||||
|
||||
.lduba => unreachable,
|
||||
.lduha => unreachable,
|
||||
.lduwa => unreachable,
|
||||
.ldxa => unreachable,
|
||||
|
||||
.@"and" => try emit.mirArithmetic3Op(inst),
|
||||
.@"or" => try emit.mirArithmetic3Op(inst),
|
||||
.xor => try emit.mirArithmetic3Op(inst),
|
||||
@ -127,6 +132,11 @@ pub fn emitMir(
|
||||
.stw => try emit.mirArithmetic3Op(inst),
|
||||
.stx => try emit.mirArithmetic3Op(inst),
|
||||
|
||||
.stba => unreachable,
|
||||
.stha => unreachable,
|
||||
.stwa => unreachable,
|
||||
.stxa => unreachable,
|
||||
|
||||
.sub => try emit.mirArithmetic3Op(inst),
|
||||
.subcc => try emit.mirArithmetic3Op(inst),
|
||||
|
||||
|
||||
@ -15,6 +15,7 @@ const bits = @import("bits.zig");
|
||||
const Air = @import("../../Air.zig");
|
||||
|
||||
const Instruction = bits.Instruction;
|
||||
const ASI = bits.Instruction.ASI;
|
||||
const Register = bits.Register;
|
||||
|
||||
instructions: std.MultiArrayList(Inst).Slice,
|
||||
@ -70,6 +71,16 @@ pub const Inst = struct {
|
||||
lduw,
|
||||
ldx,
|
||||
|
||||
/// A.28 Load Integer from Alternate Space
|
||||
/// This uses the mem_asi field.
|
||||
/// Note that the ldda variant of this instruction is deprecated, so do not emit
|
||||
/// it unless specifically requested (e.g. by inline assembly).
|
||||
// TODO add other operations.
|
||||
lduba,
|
||||
lduha,
|
||||
lduwa,
|
||||
ldxa,
|
||||
|
||||
/// A.31 Logical Operations
|
||||
/// This uses the arithmetic_3op field.
|
||||
// TODO add other operations.
|
||||
@ -132,6 +143,16 @@ pub const Inst = struct {
|
||||
stw,
|
||||
stx,
|
||||
|
||||
/// A.55 Store Integer into Alternate Space
|
||||
/// This uses the mem_asi field.
|
||||
/// Note that the stda variant of this instruction is deprecated, so do not emit
|
||||
/// it unless specifically requested (e.g. by inline assembly).
|
||||
// TODO add other operations.
|
||||
stba,
|
||||
stha,
|
||||
stwa,
|
||||
stxa,
|
||||
|
||||
/// A.56 Subtract
|
||||
/// This uses the arithmetic_3op field.
|
||||
// TODO add other operations.
|
||||
@ -241,6 +262,15 @@ pub const Inst = struct {
|
||||
inst: Index,
|
||||
},
|
||||
|
||||
/// ASI-tagged memory operations.
|
||||
/// Used by e.g. ldxa, stxa
|
||||
mem_asi: struct {
|
||||
rd: Register,
|
||||
rs1: Register,
|
||||
rs2: Register = .g0,
|
||||
asi: ASI,
|
||||
},
|
||||
|
||||
/// Membar mask, controls the barrier behavior
|
||||
/// Used by e.g. membar
|
||||
membar_mask: struct {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user