diff --git a/doc/langref.html.in b/doc/langref.html.in
index 8a303640e6..61fc06fd02 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -6542,12 +6542,21 @@ async fn func(y: *i32) void {
       {#header_close#}
 
       {#header_open|@byteSwap#}
-      <pre>{#syntax#}@byteSwap(comptime T: type, integer: T) T{#endsyntax#}</pre>
+      <pre>{#syntax#}@byteSwap(comptime T: type, operand: T) T{#endsyntax#}</pre>
       <p>{#syntax#}T{#endsyntax#} must be an integer type with bit count evenly divisible by 8.</p>
+      <p>{#syntax#}operand{#endsyntax#} may be an {#link|integer|Integers#} or {#link|vector|Vectors#}.</p>
       <p>
       Swaps the byte order of the integer. This converts a big endian integer to a little endian integer,
       and converts a little endian integer to a big endian integer.
       </p>
+      <p>
+      Note that for the purposes of memory layout with respect to endianness, the integer type should be
+      related to the number of bytes reported by {#link|@sizeOf#} bytes. This is demonstrated with
+      {#syntax#}u24{#endsyntax#}. {#syntax#}@sizeOf(u24) == 4{#endsyntax#}, which means that a
+      {#syntax#}u24{#endsyntax#} stored in memory takes 4 bytes, and those 4 bytes are what are swapped on
+      a little vs big endian system. On the other hand, if {#syntax#}T{#endsyntax#} is specified to
+      be {#syntax#}u24{#endsyntax#}, then only 3 bytes are reversed.
+      </p>
       {#header_close#}
 
       {#header_open|@bitReverse#}
diff --git a/src/all_types.hpp b/src/all_types.hpp
index deb56cbb40..7887c06158 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -1771,6 +1771,7 @@ struct ZigLLVMFnKey {
         } overflow_arithmetic;
         struct {
             uint32_t bit_count;
+            uint32_t vector_len; // 0 means not a vector
         } bswap;
         struct {
             uint32_t bit_count;
diff --git a/src/analyze.cpp b/src/analyze.cpp
index ac70d5646f..66b72b935d 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -6896,7 +6896,8 @@ uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey x) {
             return (uint32_t)(x.data.floating.bit_count) * ((uint32_t)x.id + 1025) +
                    (uint32_t)(x.data.floating.vector_len) * (((uint32_t)x.id << 5) + 1025);
         case ZigLLVMFnIdBswap:
-            return (uint32_t)(x.data.bswap.bit_count) * (uint32_t)3661994335;
+            return (uint32_t)(x.data.bswap.bit_count) * ((uint32_t)3661994335) +
+                   (uint32_t)(x.data.bswap.vector_len) * (((uint32_t)x.id << 5) + 1025);
         case ZigLLVMFnIdBitReverse:
             return (uint32_t)(x.data.bit_reverse.bit_count) * (uint32_t)2621398431;
         case ZigLLVMFnIdOverflowArithmetic:
@@ -6919,7 +6920,8 @@ bool zig_llvm_fn_key_eql(ZigLLVMFnKey a, ZigLLVMFnKey b) {
         case ZigLLVMFnIdPopCount:
             return a.data.pop_count.bit_count == b.data.pop_count.bit_count;
         case ZigLLVMFnIdBswap:
-            return a.data.bswap.bit_count == b.data.bswap.bit_count;
+            return a.data.bswap.bit_count == b.data.bswap.bit_count &&
+                   a.data.bswap.vector_len == b.data.bswap.vector_len;
         case ZigLLVMFnIdBitReverse:
             return a.data.bit_reverse.bit_count == b.data.bit_reverse.bit_count;
         case ZigLLVMFnIdFloatOp:
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 7676b3bbd0..54c02b288a 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4505,7 +4505,11 @@ static LLVMValueRef ir_render_optional_unwrap_ptr(CodeGen *g, IrExecutable *exec
     }
 }
 
-static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *int_type, BuiltinFnId fn_id) {
+static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *expr_type, BuiltinFnId fn_id) {
+    bool is_vector = expr_type->id == ZigTypeIdVector;
+    ZigType *int_type = is_vector ? expr_type->data.vector.elem_type : expr_type;
+    assert(int_type->id == ZigTypeIdInt);
+    uint32_t vector_len = is_vector ? expr_type->data.vector.len : 0;
     ZigLLVMFnKey key = {};
     const char *fn_name;
     uint32_t n_args;
@@ -4529,6 +4533,7 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *int_type, BuiltinFnI
         n_args = 1;
         key.id = ZigLLVMFnIdBswap;
         key.data.bswap.bit_count = (uint32_t)int_type->data.integral.bit_count;
+        key.data.bswap.vector_len = vector_len;
     } else if (fn_id == BuiltinFnIdBitReverse) {
         fn_name = "bitreverse";
         n_args = 1;
@@ -4543,12 +4548,15 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *int_type, BuiltinFnI
         return existing_entry->value;
 
     char llvm_name[64];
-    sprintf(llvm_name, "llvm.%s.i%" PRIu32, fn_name, int_type->data.integral.bit_count);
+    if (is_vector)
+        sprintf(llvm_name, "llvm.%s.v%" PRIu32 "i%" PRIu32, fn_name, vector_len, int_type->data.integral.bit_count);
+    else
+        sprintf(llvm_name, "llvm.%s.i%" PRIu32, fn_name, int_type->data.integral.bit_count);
     LLVMTypeRef param_types[] = {
-        get_llvm_type(g, int_type),
+        get_llvm_type(g, expr_type),
         LLVMInt1Type(),
     };
-    LLVMTypeRef fn_type = LLVMFunctionType(get_llvm_type(g, int_type), param_types, n_args, false);
+    LLVMTypeRef fn_type = LLVMFunctionType(get_llvm_type(g, expr_type), param_types, n_args, false);
     LLVMValueRef fn_val = LLVMAddFunction(g->module, llvm_name, fn_type);
     assert(LLVMGetIntrinsicID(fn_val));
 
@@ -5542,25 +5550,36 @@ static LLVMValueRef ir_render_mul_add(CodeGen *g, IrExecutable *executable, IrIn
 
 static LLVMValueRef ir_render_bswap(CodeGen *g, IrExecutable *executable, IrInstructionBswap *instruction) {
     LLVMValueRef op = ir_llvm_value(g, instruction->op);
-    ZigType *int_type = instruction->base.value.type;
+    ZigType *expr_type = instruction->base.value.type;
+    bool is_vector = expr_type->id == ZigTypeIdVector;
+    ZigType *int_type = is_vector ? expr_type->data.vector.elem_type : expr_type;
     assert(int_type->id == ZigTypeIdInt);
     if (int_type->data.integral.bit_count % 16 == 0) {
-        LLVMValueRef fn_val = get_int_builtin_fn(g, instruction->base.value.type, BuiltinFnIdBswap);
+        LLVMValueRef fn_val = get_int_builtin_fn(g, expr_type, BuiltinFnIdBswap);
         return LLVMBuildCall(g->builder, fn_val, &op, 1, "");
     }
     // Not an even number of bytes, so we zext 1 byte, then bswap, shift right 1 byte, truncate
     ZigType *extended_type = get_int_type(g, int_type->data.integral.is_signed,
             int_type->data.integral.bit_count + 8);
+    LLVMValueRef shift_amt = LLVMConstInt(get_llvm_type(g, extended_type), 8, false);
+    if (is_vector) {
+        extended_type = get_vector_type(g, expr_type->data.vector.len, extended_type);
+        LLVMValueRef *values = allocate_nonzero<LLVMValueRef>(expr_type->data.vector.len);
+        for (uint32_t i = 0; i < expr_type->data.vector.len; i += 1) {
+            values[i] = shift_amt;
+        }
+        shift_amt = LLVMConstVector(values, expr_type->data.vector.len);
+        free(values);
+    }
     // aabbcc
     LLVMValueRef extended = LLVMBuildZExt(g->builder, op, get_llvm_type(g, extended_type), "");
     // 00aabbcc
     LLVMValueRef fn_val = get_int_builtin_fn(g, extended_type, BuiltinFnIdBswap);
     LLVMValueRef swapped = LLVMBuildCall(g->builder, fn_val, &extended, 1, "");
     // ccbbaa00
-    LLVMValueRef shifted = ZigLLVMBuildLShrExact(g->builder, swapped,
-            LLVMConstInt(get_llvm_type(g, extended_type), 8, false), "");
+    LLVMValueRef shifted = ZigLLVMBuildLShrExact(g->builder, swapped, shift_amt, "");
     // 00ccbbaa
-    return LLVMBuildTrunc(g->builder, shifted, get_llvm_type(g, int_type), "");
+    return LLVMBuildTrunc(g->builder, shifted, get_llvm_type(g, expr_type), "");
 }
 
 static LLVMValueRef ir_render_bit_reverse(CodeGen *g, IrExecutable *executable, IrInstructionBitReverse *instruction) {
@@ -5581,7 +5600,7 @@ static LLVMValueRef ir_render_vector_to_array(CodeGen *g, IrExecutable *executab
     LLVMValueRef vector = ir_llvm_value(g, instruction->vector);
 
     ZigType *elem_type = array_type->data.array.child_type;
-    bool bitcast_ok = (elem_type->size_in_bits * 8) == elem_type->abi_size;
+    bool bitcast_ok = elem_type->size_in_bits == elem_type->abi_size * 8;
     if (bitcast_ok) {
         LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, result_loc,
                 LLVMPointerType(get_llvm_type(g, instruction->vector->value.type), 0), "");
@@ -5615,7 +5634,7 @@ static LLVMValueRef ir_render_array_to_vector(CodeGen *g, IrExecutable *executab
     LLVMTypeRef vector_type_ref = get_llvm_type(g, vector_type);
 
     ZigType *elem_type = vector_type->data.vector.elem_type;
-    bool bitcast_ok = (elem_type->size_in_bits * 8) == elem_type->abi_size;
+    bool bitcast_ok = elem_type->size_in_bits == elem_type->abi_size * 8;
     if (bitcast_ok) {
         LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, array_ptr,
                 LLVMPointerType(vector_type_ref, 0), "");
@@ -8888,7 +8907,7 @@ void add_cc_args(CodeGen *g, ZigList<const char *> &args, const char *out_dep_pa
         args.append(g->framework_dirs.at(i));
     }
 
-    //note(dimenus): appending libc headers before c_headers breaks intrinsics 
+    //note(dimenus): appending libc headers before c_headers breaks intrinsics
     //and other compiler specific items
     // According to Rich Felker libc headers are supposed to go before C language headers.
     args.append("-isystem");
diff --git a/src/ir.cpp b/src/ir.cpp
index cbc00f0cfe..1eba53ef45 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -11068,8 +11068,15 @@ static ZigType *ir_resolve_int_type(IrAnalyze *ira, IrInstruction *type_value) {
         return ira->codegen->builtin_types.entry_invalid;
 
     if (ty->id != ZigTypeIdInt) {
-        ir_add_error(ira, type_value,
+        ErrorMsg *msg = ir_add_error(ira, type_value,
             buf_sprintf("expected integer type, found '%s'", buf_ptr(&ty->name)));
+        if (ty->id == ZigTypeIdVector &&
+            ty->data.vector.elem_type->id == ZigTypeIdInt)
+        {
+            add_error_note(ira->codegen, msg, type_value->source_node,
+                buf_sprintf("represent vectors with their element types, i.e. '%s'",
+                    buf_ptr(&ty->data.vector.elem_type->name)));
+        }
         return ira->codegen->builtin_types.entry_invalid;
     }
 
@@ -25253,21 +25260,35 @@ static IrInstruction *ir_analyze_instruction_float_op(IrAnalyze *ira, IrInstruct
 }
 
 static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstructionBswap *instruction) {
+    Error err;
+
     ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
     if (type_is_invalid(int_type))
         return ira->codegen->invalid_instruction;
 
-    IrInstruction *op = ir_implicit_cast(ira, instruction->op->child, int_type);
+    IrInstruction *uncasted_op = instruction->op->child;
+    if (type_is_invalid(uncasted_op->value.type))
+        return ira->codegen->invalid_instruction;
+
+    uint32_t vector_len; // UINT32_MAX means not a vector
+    if (uncasted_op->value.type->id == ZigTypeIdArray &&
+        is_valid_vector_elem_type(uncasted_op->value.type->data.array.child_type))
+    {
+        vector_len = uncasted_op->value.type->data.array.len;
+    } else if (uncasted_op->value.type->id == ZigTypeIdVector) {
+        vector_len = uncasted_op->value.type->data.vector.len;
+    } else {
+        vector_len = UINT32_MAX;
+    }
+
+    bool is_vector = (vector_len != UINT32_MAX);
+    ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
+
+    IrInstruction *op = ir_implicit_cast(ira, uncasted_op, op_type);
     if (type_is_invalid(op->value.type))
         return ira->codegen->invalid_instruction;
 
-    if (int_type->data.integral.bit_count == 0) {
-        IrInstruction *result = ir_const(ira, &instruction->base, int_type);
-        bigint_init_unsigned(&result->value.data.x_bigint, 0);
-        return result;
-    }
-
-    if (int_type->data.integral.bit_count == 8)
+    if (int_type->data.integral.bit_count == 8 || int_type->data.integral.bit_count == 0)
         return op;
 
     if (int_type->data.integral.bit_count % 8 != 0) {
@@ -25282,20 +25303,44 @@ static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstruction
         if (val == nullptr)
             return ira->codegen->invalid_instruction;
         if (val->special == ConstValSpecialUndef)
-            return ir_const_undef(ira, &instruction->base, int_type);
+            return ir_const_undef(ira, &instruction->base, op_type);
 
-        IrInstruction *result = ir_const(ira, &instruction->base, int_type);
+        IrInstruction *result = ir_const(ira, &instruction->base, op_type);
         size_t buf_size = int_type->data.integral.bit_count / 8;
         uint8_t *buf = allocate_nonzero<uint8_t>(buf_size);
-        bigint_write_twos_complement(&val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
-        bigint_read_twos_complement(&result->value.data.x_bigint, buf, int_type->data.integral.bit_count, false,
-                int_type->data.integral.is_signed);
+        if (is_vector) {
+            expand_undef_array(ira->codegen, val);
+            result->value.data.x_array.data.s_none.elements = create_const_vals(op_type->data.vector.len);
+            for (unsigned i = 0; i < op_type->data.vector.len; i += 1) {
+                ConstExprValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
+                if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
+                    op_elem_val, UndefOk)))
+                {
+                    return ira->codegen->invalid_instruction;
+                }
+                ConstExprValue *result_elem_val = &result->value.data.x_array.data.s_none.elements[i];
+                result_elem_val->type = int_type;
+                result_elem_val->special = op_elem_val->special;
+                if (op_elem_val->special == ConstValSpecialUndef)
+                    continue;
+
+                bigint_write_twos_complement(&op_elem_val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
+                bigint_read_twos_complement(&result->value.data.x_array.data.s_none.elements[i].data.x_bigint,
+                        buf, int_type->data.integral.bit_count, false,
+                        int_type->data.integral.is_signed);
+            }
+        } else {
+            bigint_write_twos_complement(&val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
+            bigint_read_twos_complement(&result->value.data.x_bigint, buf, int_type->data.integral.bit_count, false,
+                    int_type->data.integral.is_signed);
+        }
+        free(buf);
         return result;
     }
 
     IrInstruction *result = ir_build_bswap(&ira->new_irb, instruction->base.scope,
             instruction->base.source_node, nullptr, op);
-    result->value.type = int_type;
+    result->value.type = op_type;
     return result;
 }
 
diff --git a/test/stage1/behavior/byteswap.zig b/test/stage1/behavior/byteswap.zig
index 3e7c34cb85..d8fc554808 100644
--- a/test/stage1/behavior/byteswap.zig
+++ b/test/stage1/behavior/byteswap.zig
@@ -1,32 +1,62 @@
 const std = @import("std");
 const expect = std.testing.expect;
 
-test "@byteSwap" {
-    comptime testByteSwap();
-    testByteSwap();
+test "@byteSwap integers" {
+    const ByteSwapIntTest = struct {
+        fn run() void {
+            t(u0, 0, 0);
+            t(u8, 0x12, 0x12);
+            t(u16, 0x1234, 0x3412);
+            t(u24, 0x123456, 0x563412);
+            t(u32, 0x12345678, 0x78563412);
+            t(u40, 0x123456789a, 0x9a78563412);
+            t(i48, 0x123456789abc, @bitCast(i48, u48(0xbc9a78563412)));
+            t(u56, 0x123456789abcde, 0xdebc9a78563412);
+            t(u64, 0x123456789abcdef1, 0xf1debc9a78563412);
+            t(u128, 0x123456789abcdef11121314151617181, 0x8171615141312111f1debc9a78563412);
+
+            t(u0, u0(0), 0);
+            t(i8, i8(-50), -50);
+            t(i16, @bitCast(i16, u16(0x1234)), @bitCast(i16, u16(0x3412)));
+            t(i24, @bitCast(i24, u24(0x123456)), @bitCast(i24, u24(0x563412)));
+            t(i32, @bitCast(i32, u32(0x12345678)), @bitCast(i32, u32(0x78563412)));
+            t(u40, @bitCast(i40, u40(0x123456789a)), u40(0x9a78563412));
+            t(i48, @bitCast(i48, u48(0x123456789abc)), @bitCast(i48, u48(0xbc9a78563412)));
+            t(i56, @bitCast(i56, u56(0x123456789abcde)), @bitCast(i56, u56(0xdebc9a78563412)));
+            t(i64, @bitCast(i64, u64(0x123456789abcdef1)), @bitCast(i64, u64(0xf1debc9a78563412)));
+            t(
+                i128,
+                @bitCast(i128, u128(0x123456789abcdef11121314151617181)),
+                @bitCast(i128, u128(0x8171615141312111f1debc9a78563412)),
+            );
+        }
+        fn t(comptime I: type, input: I, expected_output: I) void {
+            std.testing.expectEqual(expected_output, @byteSwap(I, input));
+        }
+    };
+    comptime ByteSwapIntTest.run();
+    ByteSwapIntTest.run();
 }
 
-fn testByteSwap() void {
-    expect(@byteSwap(u0, 0) == 0);
-    expect(@byteSwap(u8, 0x12) == 0x12);
-    expect(@byteSwap(u16, 0x1234) == 0x3412);
-    expect(@byteSwap(u24, 0x123456) == 0x563412);
-    expect(@byteSwap(u32, 0x12345678) == 0x78563412);
-    expect(@byteSwap(u40, 0x123456789a) == 0x9a78563412);
-    expect(@byteSwap(i48, 0x123456789abc) == @bitCast(i48, u48(0xbc9a78563412)));
-    expect(@byteSwap(u56, 0x123456789abcde) == 0xdebc9a78563412);
-    expect(@byteSwap(u64, 0x123456789abcdef1) == 0xf1debc9a78563412);
-    expect(@byteSwap(u128, 0x123456789abcdef11121314151617181) == 0x8171615141312111f1debc9a78563412);
+test "@byteSwap vectors" {
+    const ByteSwapVectorTest = struct {
+        fn run() void {
+            t(u8, 2, [_]u8{ 0x12, 0x13 }, [_]u8{ 0x12, 0x13 });
+            t(u16, 2, [_]u16{ 0x1234, 0x2345 }, [_]u16{ 0x3412, 0x4523 });
+            t(u24, 2, [_]u24{ 0x123456, 0x234567 }, [_]u24{ 0x563412, 0x674523 });
+        }
 
-    expect(@byteSwap(u0, u0(0)) == 0);
-    expect(@byteSwap(i8, i8(-50)) == -50);
-    expect(@byteSwap(i16, @bitCast(i16, u16(0x1234))) == @bitCast(i16, u16(0x3412)));
-    expect(@byteSwap(i24, @bitCast(i24, u24(0x123456))) == @bitCast(i24, u24(0x563412)));
-    expect(@byteSwap(i32, @bitCast(i32, u32(0x12345678))) == @bitCast(i32, u32(0x78563412)));
-    expect(@byteSwap(u40, @bitCast(i40, u40(0x123456789a))) == u40(0x9a78563412));
-    expect(@byteSwap(i48, @bitCast(i48, u48(0x123456789abc))) == @bitCast(i48, u48(0xbc9a78563412)));
-    expect(@byteSwap(i56, @bitCast(i56, u56(0x123456789abcde))) == @bitCast(i56, u56(0xdebc9a78563412)));
-    expect(@byteSwap(i64, @bitCast(i64, u64(0x123456789abcdef1))) == @bitCast(i64, u64(0xf1debc9a78563412)));
-    expect(@byteSwap(i128, @bitCast(i128, u128(0x123456789abcdef11121314151617181))) ==
-        @bitCast(i128, u128(0x8171615141312111f1debc9a78563412)));
+        fn t(
+            comptime I: type,
+            comptime n: comptime_int,
+            input: @Vector(n, I),
+            expected_vector: @Vector(n, I),
+        ) void {
+            const actual_output: [n]I = @byteSwap(I, input);
+            const expected_output: [n]I = expected_vector;
+            std.testing.expectEqual(expected_output, actual_output);
+        }
+    };
+    comptime ByteSwapVectorTest.run();
+    ByteSwapVectorTest.run();
 }