x86_64: implement more operations on vectors with 1-bit elements

This commit is contained in:
Jacob Young 2023-12-04 01:27:13 -05:00
parent 485e20884c
commit 50993a8f08
3 changed files with 111 additions and 66 deletions

View File

@ -2478,8 +2478,11 @@ fn regClassForType(self: *Self, ty: Type) RegisterManager.RegisterBitSet {
else => abi.RegisterClass.sse,
},
.Vector => switch (ty.childType(mod).toIntern()) {
.bool_type => abi.RegisterClass.gp,
else => abi.RegisterClass.sse,
.bool_type, .u1_type => abi.RegisterClass.gp,
else => if (ty.isAbiInt(mod) and ty.intInfo(mod).bits == 1)
abi.RegisterClass.gp
else
abi.RegisterClass.sse,
},
else => abi.RegisterClass.gp,
};
@ -5152,7 +5155,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
try self.spillEflagsIfOccupied();
if (array_ty.isVector(mod) and elem_ty.toIntern() == .bool_type) {
if (array_ty.isVector(mod) and elem_ty.bitSize(mod) == 1) {
const index_reg = switch (index_mcv) {
.register => |reg| reg,
else => try self.copyToTmpRegister(index_ty, index_mcv),
@ -15475,26 +15478,59 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
break :result .{ .load_frame = .{ .index = frame_index } };
},
.Array, .Vector => {
const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, mod));
const elem_ty = result_ty.childType(mod);
const elem_size: u32 = @intCast(elem_ty.abiSize(mod));
if (result_ty.isVector(mod) and elem_ty.bitSize(mod) == 1) {
const result_size: u32 = @intCast(result_ty.abiSize(mod));
const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
try self.asmRegisterRegister(
.{ ._, .xor },
registerAlias(dst_reg, @min(result_size, 4)),
registerAlias(dst_reg, @min(result_size, 4)),
);
for (elements, 0..) |elem, elem_i| {
const elem_mcv = try self.resolveInst(elem);
const mat_elem_mcv = switch (elem_mcv) {
.load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
else => elem_mcv,
};
const elem_off: i32 = @intCast(elem_size * elem_i);
try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv);
for (elements, 0..) |elem, elem_i| {
const elem_reg = try self.copyToTmpRegister(elem_ty, .{ .air_ref = elem });
const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg);
defer self.register_manager.unlockReg(elem_lock);
try self.asmRegisterImmediate(
.{ ._, .@"and" },
registerAlias(elem_reg, @min(result_size, 4)),
Immediate.u(1),
);
if (elem_i > 0) try self.asmRegisterImmediate(
.{ ._l, .sh },
registerAlias(elem_reg, result_size),
Immediate.u(@intCast(elem_i)),
);
try self.asmRegisterRegister(
.{ ._, .@"or" },
registerAlias(dst_reg, result_size),
registerAlias(elem_reg, result_size),
);
}
break :result .{ .register = dst_reg };
} else {
const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, mod));
const elem_size: u32 = @intCast(elem_ty.abiSize(mod));
for (elements, 0..) |elem, elem_i| {
const elem_mcv = try self.resolveInst(elem);
const mat_elem_mcv = switch (elem_mcv) {
.load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
else => elem_mcv,
};
const elem_off: i32 = @intCast(elem_size * elem_i);
try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv);
}
if (result_ty.sentinel(mod)) |sentinel| try self.genSetMem(
.{ .frame = frame_index },
@intCast(elem_size * elements.len),
elem_ty,
try self.genTypedValue(.{ .ty = elem_ty, .val = sentinel }),
);
break :result .{ .load_frame = .{ .index = frame_index } };
}
if (result_ty.sentinel(mod)) |sentinel| try self.genSetMem(
.{ .frame = frame_index },
@intCast(elem_size * elements.len),
elem_ty,
try self.genTypedValue(.{ .ty = elem_ty, .val = sentinel }),
);
break :result .{ .load_frame = .{ .index = frame_index } };
},
else => unreachable,
}

View File

@ -391,53 +391,63 @@ pub fn generateSymbol(
.vector_type => |vector_type| {
const abi_size = math.cast(usize, typed_value.ty.abiSize(mod)) orelse
return error.Overflow;
switch (vector_type.child) {
.bool_type => {
const bytes = try code.addManyAsSlice(abi_size);
@memset(bytes, 0xaa);
var index: usize = 0;
const len = math.cast(usize, vector_type.len) orelse return error.Overflow;
while (index < len) : (index += 1) {
const bit_index = switch (endian) {
.big => len - 1 - index,
.little => index,
};
const byte = &bytes[bit_index / 8];
const mask = @as(u8, 1) << @truncate(bit_index);
if (switch (switch (aggregate.storage) {
.bytes => unreachable,
.elems => |elems| elems[index],
.repeated_elem => |elem| elem,
}) {
.bool_true => true,
.bool_false => false,
else => |elem| {
assert(mod.intern_pool.indexToKey(elem).undef == .bool_type);
continue;
if (Type.fromInterned(vector_type.child).bitSize(mod) == 1) {
const bytes = try code.addManyAsSlice(abi_size);
@memset(bytes, 0xaa);
var index: usize = 0;
const len = math.cast(usize, vector_type.len) orelse return error.Overflow;
while (index < len) : (index += 1) {
const bit_index = switch (endian) {
.big => len - 1 - index,
.little => index,
};
const byte = &bytes[bit_index / 8];
const mask = @as(u8, 1) << @truncate(bit_index);
if (switch (switch (aggregate.storage) {
.bytes => unreachable,
.elems => |elems| elems[index],
.repeated_elem => |elem| elem,
}) {
.bool_true => true,
.bool_false => false,
else => |elem| switch (mod.intern_pool.indexToKey(elem)) {
.undef => continue,
.int => |int| switch (int.storage) {
.u64 => |x| switch (x) {
0 => false,
1 => true,
else => unreachable,
},
.i64 => |x| switch (x) {
-1 => true,
0 => false,
else => unreachable,
},
else => unreachable,
},
}) byte.* |= mask else byte.* &= ~mask;
}
},
else => switch (aggregate.storage) {
.bytes => |bytes| try code.appendSlice(bytes),
.elems, .repeated_elem => {
var index: u64 = 0;
while (index < vector_type.len) : (index += 1) {
switch (try generateSymbol(bin_file, src_loc, .{
.ty = Type.fromInterned(vector_type.child),
.val = Value.fromInterned(switch (aggregate.storage) {
.bytes => unreachable,
.elems => |elems| elems[
math.cast(usize, index) orelse return error.Overflow
],
.repeated_elem => |elem| elem,
}),
}, code, debug_output, reloc_info)) {
.ok => {},
.fail => |em| return .{ .fail = em },
}
else => unreachable,
},
}) byte.* |= mask else byte.* &= ~mask;
}
} else switch (aggregate.storage) {
.bytes => |bytes| try code.appendSlice(bytes),
.elems, .repeated_elem => {
var index: u64 = 0;
while (index < vector_type.len) : (index += 1) {
switch (try generateSymbol(bin_file, src_loc, .{
.ty = Type.fromInterned(vector_type.child),
.val = Value.fromInterned(switch (aggregate.storage) {
.bytes => unreachable,
.elems => |elems| elems[
math.cast(usize, index) orelse return error.Overflow
],
.repeated_elem => |elem| elem,
}),
}, code, debug_output, reloc_info)) {
.ok => {},
.fail => |em| return .{ .fail = em },
}
},
}
},
}

View File

@ -2420,7 +2420,6 @@ test "@intFromFloat on vector" {
test "@intFromBool on vector" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO