From 9395162a7c41689bcd1c0c48f9eabffc1485fc74 Mon Sep 17 00:00:00 2001
From: Isaac Hier <ihier@uber.com>
Date: Mon, 2 Jul 2018 16:56:40 -0400
Subject: [PATCH 01/35] Debug enum issue

---
 src/ir.cpp                              |  1 +
 test/behavior.zig                       |  1 +
 test/cases/switch_usize_enum_prongs.zig | 11 +++++++++++
 3 files changed, 13 insertions(+)
 create mode 100644 test/cases/switch_usize_enum_prongs.zig

diff --git a/src/ir.cpp b/src/ir.cpp
index b40c2dc36d..c16f3c09b8 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -19149,6 +19149,7 @@ static TypeTableEntry *ir_analyze_instruction_check_switch_prongs(IrAnalyze *ira
             if (!end_val)
                 return ira->codegen->builtin_types.entry_invalid;
 
+            printf("%s\n", buf_ptr(&start_val->type->name));
             assert(start_val->type->id == TypeTableEntryIdInt || start_val->type->id == TypeTableEntryIdComptimeInt);
             assert(end_val->type->id == TypeTableEntryIdInt || end_val->type->id == TypeTableEntryIdComptimeInt);
             AstNode *prev_node = rangeset_add_range(&rs, &start_val->data.x_bigint, &end_val->data.x_bigint,
diff --git a/test/behavior.zig b/test/behavior.zig
index d47eb8fd6c..803d4a5a08 100644
--- a/test/behavior.zig
+++ b/test/behavior.zig
@@ -52,6 +52,7 @@ comptime {
     _ = @import("cases/switch.zig");
     _ = @import("cases/switch_prong_err_enum.zig");
     _ = @import("cases/switch_prong_implicit_cast.zig");
+    _ = @import("cases/switch_usize_enum_prongs.zig");
     _ = @import("cases/syntax.zig");
     _ = @import("cases/this.zig");
     _ = @import("cases/try.zig");
diff --git a/test/cases/switch_usize_enum_prongs.zig b/test/cases/switch_usize_enum_prongs.zig
new file mode 100644
index 0000000000..b49615e887
--- /dev/null
+++ b/test/cases/switch_usize_enum_prongs.zig
@@ -0,0 +1,11 @@
+const E = enum(usize) { One, Two };
+
+test "aoeou" {
+    foo(1);
+}
+
+fn foo(x: usize) void {
+    switch (x) {
+        E.One => {},
+    }
+}

From 9cff23dbf9ff3da716a1c4397f9411eba09f6cac Mon Sep 17 00:00:00 2001
From: Isaac Hier <isaachier@gmail.com>
Date: Wed, 4 Jul 2018 13:27:10 -0400
Subject: [PATCH 02/35] Fix assertion crash on enum switch values

---
 src/ir.cpp                              |  7 ++++++-
 test/behavior.zig                       |  1 -
 test/cases/switch_usize_enum_prongs.zig | 11 -----------
 test/compile_errors.zig                 | 18 ++++++++++++++++++
 4 files changed, 24 insertions(+), 13 deletions(-)
 delete mode 100644 test/cases/switch_usize_enum_prongs.zig

diff --git a/src/ir.cpp b/src/ir.cpp
index c16f3c09b8..37d673bbd7 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -19149,9 +19149,14 @@ static TypeTableEntry *ir_analyze_instruction_check_switch_prongs(IrAnalyze *ira
             if (!end_val)
                 return ira->codegen->builtin_types.entry_invalid;
 
-            printf("%s\n", buf_ptr(&start_val->type->name));
+            if (start_val->type->id == TypeTableEntryIdEnum)
+                return ira->codegen->builtin_types.entry_invalid;
             assert(start_val->type->id == TypeTableEntryIdInt || start_val->type->id == TypeTableEntryIdComptimeInt);
+
+            if (end_val->type->id == TypeTableEntryIdEnum)
+                return ira->codegen->builtin_types.entry_invalid;
             assert(end_val->type->id == TypeTableEntryIdInt || end_val->type->id == TypeTableEntryIdComptimeInt);
+
             AstNode *prev_node = rangeset_add_range(&rs, &start_val->data.x_bigint, &end_val->data.x_bigint,
                     start_value->source_node);
             if (prev_node != nullptr) {
diff --git a/test/behavior.zig b/test/behavior.zig
index 803d4a5a08..d47eb8fd6c 100644
--- a/test/behavior.zig
+++ b/test/behavior.zig
@@ -52,7 +52,6 @@ comptime {
     _ = @import("cases/switch.zig");
     _ = @import("cases/switch_prong_err_enum.zig");
     _ = @import("cases/switch_prong_implicit_cast.zig");
-    _ = @import("cases/switch_usize_enum_prongs.zig");
     _ = @import("cases/syntax.zig");
     _ = @import("cases/this.zig");
     _ = @import("cases/try.zig");
diff --git a/test/cases/switch_usize_enum_prongs.zig b/test/cases/switch_usize_enum_prongs.zig
deleted file mode 100644
index b49615e887..0000000000
--- a/test/cases/switch_usize_enum_prongs.zig
+++ /dev/null
@@ -1,11 +0,0 @@
-const E = enum(usize) { One, Two };
-
-test "aoeou" {
-    foo(1);
-}
-
-fn foo(x: usize) void {
-    switch (x) {
-        E.One => {},
-    }
-}
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 7291a48a8f..8bd5480395 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -358,6 +358,24 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
         ".tmp_source.zig:3:14: note: other value is here",
     );
 
+
+    cases.add(
+        "invalid cast from integral type to enum",
+        \\const E = enum(usize) { One, Two };
+        \\
+        \\export fn entry() void {
+        \\    foo(1);
+        \\}
+        \\
+        \\fn foo(x: usize) void {
+        \\    switch (x) {
+        \\        E.One => {},
+        \\    }
+        \\}
+    ,
+        ".tmp_source.zig:9:10: error: expected type 'usize', found 'E'"
+    );
+
     cases.add(
         "range operator in switch used on error set",
         \\export fn entry() void {

From 1a5bd8888174ef2eb1881c1dd81d418b44625cc7 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Fri, 6 Jul 2018 12:03:07 -0400
Subject: [PATCH 03/35] alternate implementation of previous commit

This strategy adds another field to the SwitchBr instruction,
which is the result of the CheckSwitchProngs instruction. The
type of the result is void, and is unused, except that the SwitchBr
instruction will not perform analysis if the CheckSwitchProngs
instruction did not pass analysis. This allows the CheckSwitchProngs
instruction to do implicit casting for its type checking, while
preventing duplicate compile error messages.
---
 src/all_types.hpp |  1 +
 src/ir.cpp        | 44 +++++++++++++++++++++++++++-----------------
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/src/all_types.hpp b/src/all_types.hpp
index 5d449491c8..4d97be468c 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -2193,6 +2193,7 @@ struct IrInstructionSwitchBr {
     size_t case_count;
     IrInstructionSwitchBrCase *cases;
     IrInstruction *is_comptime;
+    IrInstruction *switch_prongs_void;
 };
 
 struct IrInstructionSwitchVar {
diff --git a/src/ir.cpp b/src/ir.cpp
index 37d673bbd7..204ebb332a 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -1719,7 +1719,8 @@ static IrInstruction *ir_build_ctz_from(IrBuilder *irb, IrInstruction *old_instr
 }
 
 static IrInstruction *ir_build_switch_br(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *target_value,
-        IrBasicBlock *else_block, size_t case_count, IrInstructionSwitchBrCase *cases, IrInstruction *is_comptime)
+        IrBasicBlock *else_block, size_t case_count, IrInstructionSwitchBrCase *cases, IrInstruction *is_comptime,
+        IrInstruction *switch_prongs_void)
 {
     IrInstructionSwitchBr *instruction = ir_build_instruction<IrInstructionSwitchBr>(irb, scope, source_node);
     instruction->base.value.type = irb->codegen->builtin_types.entry_unreachable;
@@ -1729,10 +1730,12 @@ static IrInstruction *ir_build_switch_br(IrBuilder *irb, Scope *scope, AstNode *
     instruction->case_count = case_count;
     instruction->cases = cases;
     instruction->is_comptime = is_comptime;
+    instruction->switch_prongs_void = switch_prongs_void;
 
     ir_ref_instruction(target_value, irb->current_basic_block);
     if (is_comptime) ir_ref_instruction(is_comptime, irb->current_basic_block);
     ir_ref_bb(else_block);
+    if (switch_prongs_void) ir_ref_instruction(switch_prongs_void, irb->current_basic_block);
 
     for (size_t i = 0; i < case_count; i += 1) {
         ir_ref_instruction(cases[i].value, irb->current_basic_block);
@@ -1744,10 +1747,10 @@ static IrInstruction *ir_build_switch_br(IrBuilder *irb, Scope *scope, AstNode *
 
 static IrInstruction *ir_build_switch_br_from(IrBuilder *irb, IrInstruction *old_instruction,
         IrInstruction *target_value, IrBasicBlock *else_block, size_t case_count,
-        IrInstructionSwitchBrCase *cases, IrInstruction *is_comptime)
+        IrInstructionSwitchBrCase *cases, IrInstruction *is_comptime, IrInstruction *switch_prongs_void)
 {
     IrInstruction *new_instruction = ir_build_switch_br(irb, old_instruction->scope, old_instruction->source_node,
-            target_value, else_block, case_count, cases, is_comptime);
+            target_value, else_block, case_count, cases, is_comptime, switch_prongs_void);
     ir_link_new_instruction(new_instruction, old_instruction);
     return new_instruction;
 }
@@ -6035,13 +6038,13 @@ static IrInstruction *ir_gen_switch_expr(IrBuilder *irb, Scope *scope, AstNode *
 
     }
 
-    ir_build_check_switch_prongs(irb, scope, node, target_value, check_ranges.items, check_ranges.length,
+    IrInstruction *switch_prongs_void = ir_build_check_switch_prongs(irb, scope, node, target_value, check_ranges.items, check_ranges.length,
             else_prong != nullptr);
 
     if (cases.length == 0) {
         ir_build_br(irb, scope, node, else_block, is_comptime);
     } else {
-        ir_build_switch_br(irb, scope, node, target_value, else_block, cases.length, cases.items, is_comptime);
+        ir_build_switch_br(irb, scope, node, target_value, else_block, cases.length, cases.items, is_comptime, switch_prongs_void);
     }
 
     if (!else_prong) {
@@ -6692,7 +6695,7 @@ static IrInstruction *ir_gen_await_expr(IrBuilder *irb, Scope *parent_scope, Ast
     cases[1].value = ir_build_const_u8(irb, parent_scope, node, 1);
     cases[1].block = cleanup_block;
     ir_build_switch_br(irb, parent_scope, node, suspend_code, irb->exec->coro_suspend_block,
-            2, cases, const_bool_false);
+            2, cases, const_bool_false, nullptr);
 
     ir_set_cursor_at_end_and_append_block(irb, cleanup_block);
     ir_gen_defers_for_block(irb, parent_scope, outer_scope, true);
@@ -6773,7 +6776,7 @@ static IrInstruction *ir_gen_suspend(IrBuilder *irb, Scope *parent_scope, AstNod
     cases[1].value = ir_mark_gen(ir_build_const_u8(irb, parent_scope, node, 1));
     cases[1].block = cleanup_block;
     ir_mark_gen(ir_build_switch_br(irb, parent_scope, node, suspend_code, irb->exec->coro_suspend_block,
-            2, cases, const_bool_false));
+            2, cases, const_bool_false, nullptr));
 
     ir_set_cursor_at_end_and_append_block(irb, cleanup_block);
     ir_gen_defers_for_block(irb, parent_scope, outer_scope, true);
@@ -7078,7 +7081,7 @@ bool ir_gen(CodeGen *codegen, AstNode *node, Scope *scope, IrExecutable *ir_exec
         cases[0].block = invalid_resume_block;
         cases[1].value = ir_build_const_u8(irb, scope, node, 1);
         cases[1].block = irb->exec->coro_final_cleanup_block;
-        ir_build_switch_br(irb, scope, node, suspend_code, irb->exec->coro_suspend_block, 2, cases, const_bool_false);
+        ir_build_switch_br(irb, scope, node, suspend_code, irb->exec->coro_suspend_block, 2, cases, const_bool_false, nullptr);
 
         ir_set_cursor_at_end_and_append_block(irb, irb->exec->coro_suspend_block);
         ir_build_coro_end(irb, scope, node);
@@ -15297,6 +15300,13 @@ static TypeTableEntry *ir_analyze_instruction_switch_br(IrAnalyze *ira,
     if (type_is_invalid(target_value->value.type))
         return ir_unreach_error(ira);
 
+    if (switch_br_instruction->switch_prongs_void != nullptr) {
+        if (type_is_invalid(switch_br_instruction->switch_prongs_void->other->value.type)) {
+            return ir_unreach_error(ira);
+        }
+    }
+
+
     size_t case_count = switch_br_instruction->case_count;
 
     bool is_comptime;
@@ -15387,7 +15397,7 @@ static TypeTableEntry *ir_analyze_instruction_switch_br(IrAnalyze *ira,
 
     IrBasicBlock *new_else_block = ir_get_new_bb(ira, switch_br_instruction->else_block, &switch_br_instruction->base);
     ir_build_switch_br_from(&ira->new_irb, &switch_br_instruction->base,
-            target_value, new_else_block, case_count, cases, nullptr);
+            target_value, new_else_block, case_count, cases, nullptr, nullptr);
     return ir_finish_anal(ira, ira->codegen->builtin_types.entry_unreachable);
 }
 
@@ -19136,27 +19146,27 @@ static TypeTableEntry *ir_analyze_instruction_check_switch_prongs(IrAnalyze *ira
             IrInstruction *start_value = range->start->other;
             if (type_is_invalid(start_value->value.type))
                 return ira->codegen->builtin_types.entry_invalid;
+            IrInstruction *casted_start_value = ir_implicit_cast(ira, start_value, switch_type);
+            if (type_is_invalid(casted_start_value->value.type))
+                return ira->codegen->builtin_types.entry_invalid;
 
             IrInstruction *end_value = range->end->other;
             if (type_is_invalid(end_value->value.type))
                 return ira->codegen->builtin_types.entry_invalid;
+            IrInstruction *casted_end_value = ir_implicit_cast(ira, end_value, switch_type);
+            if (type_is_invalid(casted_end_value->value.type))
+                return ira->codegen->builtin_types.entry_invalid;
 
-            ConstExprValue *start_val = ir_resolve_const(ira, start_value, UndefBad);
+            ConstExprValue *start_val = ir_resolve_const(ira, casted_start_value, UndefBad);
             if (!start_val)
                 return ira->codegen->builtin_types.entry_invalid;
 
-            ConstExprValue *end_val = ir_resolve_const(ira, end_value, UndefBad);
+            ConstExprValue *end_val = ir_resolve_const(ira, casted_end_value, UndefBad);
             if (!end_val)
                 return ira->codegen->builtin_types.entry_invalid;
 
-            if (start_val->type->id == TypeTableEntryIdEnum)
-                return ira->codegen->builtin_types.entry_invalid;
             assert(start_val->type->id == TypeTableEntryIdInt || start_val->type->id == TypeTableEntryIdComptimeInt);
-
-            if (end_val->type->id == TypeTableEntryIdEnum)
-                return ira->codegen->builtin_types.entry_invalid;
             assert(end_val->type->id == TypeTableEntryIdInt || end_val->type->id == TypeTableEntryIdComptimeInt);
-
             AstNode *prev_node = rangeset_add_range(&rs, &start_val->data.x_bigint, &end_val->data.x_bigint,
                     start_value->source_node);
             if (prev_node != nullptr) {

From 6d793c0ea3679fe420199676e92e435c81617258 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Fri, 6 Jul 2018 16:20:31 -0400
Subject: [PATCH 04/35] langref: add more internal links

---
 doc/langref.html.in | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 3cdcdc6e88..5c1cc130ac 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -679,7 +679,7 @@ fn divide(a: i32, b: i32) i32 {
       {#header_open|Float Literals#}
       <p>
       Float literals have type <code>comptime_float</code> which is guaranteed to hold at least all possible values
-      that the largest other floating point type can hold. Float literals implicitly cast to any other type.
+      that the largest other floating point type can hold. Float literals {#link|implicitly cast|Implicit Casts#} to any other type.
       </p>
       {#code_begin|syntax#}
 const floating_point = 123.0E+77;
@@ -1604,7 +1604,7 @@ test "variable alignment" {
     }
 }
       {#code_end#}
-      <p>In the same way that a <code>*i32</code> can be implicitly cast to a
+      <p>In the same way that a <code>*i32</code> can be {#link|implicitly cast|Implicit Casts#} to a
       <code>*const i32</code>, a pointer with a larger alignment can be implicitly
       cast to a pointer with a smaller alignment, but not vice versa.
       </p>
@@ -2968,7 +2968,7 @@ test "fn reflection" {
       However right now it is hard coded to be a <code>u16</code>. See <a href="https://github.com/ziglang/zig/issues/786">#768</a>.
       </p>
       <p>
-      You can implicitly cast an error from a subset to its superset:
+      You can {#link|implicitly cast|Implicit Casts#} an error from a subset to its superset:
       </p>
       {#code_begin|test#}
 const std = @import("std");
@@ -3101,7 +3101,7 @@ test "parse u64" {
       <p>
       Within the function definition, you can see some return statements that return
       an error, and at the bottom a return statement that returns a <code>u64</code>.
-      Both types implicitly cast to <code>error!u64</code>.
+      Both types {#link|implicitly cast|Implicit Casts#} to <code>error!u64</code>.
       </p>
       <p>
       What it looks like to use this function varies depending on what you're

From 1cf7511dc9d449473748675a5e734e81ea7c85c2 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Fri, 6 Jul 2018 16:20:46 -0400
Subject: [PATCH 05/35] add compile error notes for where struct definitions
 are

closes #1202
---
 src/analyze.cpp         | 37 ++++++++++++++++++++++++++++++++
 src/analyze.hpp         |  1 +
 src/ir.cpp              | 47 +++++++++++++++++++++++++++++++----------
 test/compile_errors.zig | 42 ++++++++++++++++++++++++++++++------
 4 files changed, 110 insertions(+), 17 deletions(-)

diff --git a/src/analyze.cpp b/src/analyze.cpp
index ca582dfc4c..643a85634e 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -212,6 +212,43 @@ static uint8_t bits_needed_for_unsigned(uint64_t x) {
     return (upper >= x) ? base : (base + 1);
 }
 
+AstNode *type_decl_node(TypeTableEntry *type_entry) {
+    switch (type_entry->id) {
+        case TypeTableEntryIdInvalid:
+            zig_unreachable();
+        case TypeTableEntryIdStruct:
+            return type_entry->data.structure.decl_node;
+        case TypeTableEntryIdEnum:
+            return type_entry->data.enumeration.decl_node;
+        case TypeTableEntryIdUnion:
+            return type_entry->data.unionation.decl_node;
+        case TypeTableEntryIdOpaque:
+        case TypeTableEntryIdMetaType:
+        case TypeTableEntryIdVoid:
+        case TypeTableEntryIdBool:
+        case TypeTableEntryIdUnreachable:
+        case TypeTableEntryIdInt:
+        case TypeTableEntryIdFloat:
+        case TypeTableEntryIdPointer:
+        case TypeTableEntryIdArray:
+        case TypeTableEntryIdComptimeFloat:
+        case TypeTableEntryIdComptimeInt:
+        case TypeTableEntryIdUndefined:
+        case TypeTableEntryIdNull:
+        case TypeTableEntryIdOptional:
+        case TypeTableEntryIdErrorUnion:
+        case TypeTableEntryIdErrorSet:
+        case TypeTableEntryIdFn:
+        case TypeTableEntryIdNamespace:
+        case TypeTableEntryIdBlock:
+        case TypeTableEntryIdBoundFn:
+        case TypeTableEntryIdArgTuple:
+        case TypeTableEntryIdPromise:
+            return nullptr;
+    }
+    zig_unreachable();
+}
+
 bool type_is_complete(TypeTableEntry *type_entry) {
     switch (type_entry->id) {
         case TypeTableEntryIdInvalid:
diff --git a/src/analyze.hpp b/src/analyze.hpp
index c2730197e2..5168509fe0 100644
--- a/src/analyze.hpp
+++ b/src/analyze.hpp
@@ -202,5 +202,6 @@ uint32_t get_coro_frame_align_bytes(CodeGen *g);
 bool fn_type_can_fail(FnTypeId *fn_type_id);
 bool type_can_fail(TypeTableEntry *type_entry);
 bool fn_eval_cacheable(Scope *scope, TypeTableEntry *return_type);
+AstNode *type_decl_node(TypeTableEntry *type_entry);
 
 #endif
diff --git a/src/ir.cpp b/src/ir.cpp
index 204ebb332a..3ad7c77645 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -82,6 +82,7 @@ struct ConstCastSliceMismatch;
 struct ConstCastErrUnionErrSetMismatch;
 struct ConstCastErrUnionPayloadMismatch;
 struct ConstCastErrSetMismatch;
+struct ConstCastTypeMismatch;
 
 struct ConstCastOnly {
     ConstCastResultId id;
@@ -92,6 +93,7 @@ struct ConstCastOnly {
         ConstCastOptionalMismatch *optional;
         ConstCastErrUnionPayloadMismatch *error_union_payload;
         ConstCastErrUnionErrSetMismatch *error_union_error_set;
+        ConstCastTypeMismatch *type_mismatch;
         ConstCastOnly *return_type;
         ConstCastOnly *async_allocator_type;
         ConstCastOnly *null_wrap_ptr_child;
@@ -100,6 +102,11 @@ struct ConstCastOnly {
     } data;
 };
 
+struct ConstCastTypeMismatch {
+    TypeTableEntry *wanted_type;
+    TypeTableEntry *actual_type;
+};
+
 struct ConstCastOptionalMismatch {
     ConstCastOnly child;
     TypeTableEntry *wanted_child;
@@ -8128,15 +8135,7 @@ static ConstCastOnly types_match_const_cast_only(IrAnalyze *ira, TypeTableEntry
     }
 
     // pointer const
-    if (wanted_type->id == TypeTableEntryIdPointer &&
-        actual_type->id == TypeTableEntryIdPointer &&
-        (actual_type->data.pointer.ptr_len == wanted_type->data.pointer.ptr_len) &&
-        (!actual_type->data.pointer.is_const || wanted_type->data.pointer.is_const) &&
-        (!actual_type->data.pointer.is_volatile || wanted_type->data.pointer.is_volatile) &&
-        actual_type->data.pointer.bit_offset == wanted_type->data.pointer.bit_offset &&
-        actual_type->data.pointer.unaligned_bit_count == wanted_type->data.pointer.unaligned_bit_count &&
-        actual_type->data.pointer.alignment >= wanted_type->data.pointer.alignment)
-    {
+    if (wanted_type->id == TypeTableEntryIdPointer && actual_type->id == TypeTableEntryIdPointer) {
         ConstCastOnly child = types_match_const_cast_only(ira, wanted_type->data.pointer.child_type,
                 actual_type->data.pointer.child_type, source_node, !wanted_type->data.pointer.is_const);
         if (child.id != ConstCastResultIdOk) {
@@ -8145,8 +8144,17 @@ static ConstCastOnly types_match_const_cast_only(IrAnalyze *ira, TypeTableEntry
             result.data.pointer_mismatch->child = child;
             result.data.pointer_mismatch->wanted_child = wanted_type->data.pointer.child_type;
             result.data.pointer_mismatch->actual_child = actual_type->data.pointer.child_type;
+            return result;
+        }
+        if ((actual_type->data.pointer.ptr_len == wanted_type->data.pointer.ptr_len) &&
+            (!actual_type->data.pointer.is_const || wanted_type->data.pointer.is_const) &&
+            (!actual_type->data.pointer.is_volatile || wanted_type->data.pointer.is_volatile) &&
+            actual_type->data.pointer.bit_offset == wanted_type->data.pointer.bit_offset &&
+            actual_type->data.pointer.unaligned_bit_count == wanted_type->data.pointer.unaligned_bit_count &&
+            actual_type->data.pointer.alignment >= wanted_type->data.pointer.alignment)
+        {
+            return result;
         }
-        return result;
     }
 
     // slice const
@@ -8341,6 +8349,9 @@ static ConstCastOnly types_match_const_cast_only(IrAnalyze *ira, TypeTableEntry
     }
 
     result.id = ConstCastResultIdType;
+    result.data.type_mismatch = allocate_nonzero<ConstCastTypeMismatch>(1);
+    result.data.type_mismatch->wanted_type = wanted_type;
+    result.data.type_mismatch->actual_type = actual_type;
     return result;
 }
 
@@ -10154,6 +10165,21 @@ static void report_recursive_error(IrAnalyze *ira, AstNode *source_node, ConstCa
             report_recursive_error(ira, source_node, &cast_result->data.error_union_payload->child, msg);
             break;
         }
+        case ConstCastResultIdType: {
+            AstNode *wanted_decl_node = type_decl_node(cast_result->data.type_mismatch->wanted_type);
+            AstNode *actual_decl_node = type_decl_node(cast_result->data.type_mismatch->actual_type);
+            if (wanted_decl_node != nullptr) {
+                add_error_note(ira->codegen, parent_msg, wanted_decl_node,
+                    buf_sprintf("%s declared here",
+                        buf_ptr(&cast_result->data.type_mismatch->wanted_type->name)));
+            }
+            if (actual_decl_node != nullptr) {
+                add_error_note(ira->codegen, parent_msg, actual_decl_node,
+                    buf_sprintf("%s declared here",
+                        buf_ptr(&cast_result->data.type_mismatch->actual_type->name)));
+            }
+            break;
+        }
         case ConstCastResultIdFnAlign: // TODO
         case ConstCastResultIdFnCC: // TODO
         case ConstCastResultIdFnVarArgs: // TODO
@@ -10163,7 +10189,6 @@ static void report_recursive_error(IrAnalyze *ira, AstNode *source_node, ConstCa
         case ConstCastResultIdFnGenericArgCount: // TODO
         case ConstCastResultIdFnArg: // TODO
         case ConstCastResultIdFnArgNoAlias: // TODO
-        case ConstCastResultIdType: // TODO
         case ConstCastResultIdUnresolvedInferredErrSet: // TODO
         case ConstCastResultIdAsyncAllocatorType: // TODO
         case ConstCastResultIdNullWrapPtr: // TODO
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 8bd5480395..d508c7c36c 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -1,6 +1,40 @@
 const tests = @import("tests.zig");
 
 pub fn addCases(cases: *tests.CompileErrorContext) void {
+    cases.addCase(x: {
+        const tc = cases.create(
+            "wrong same named struct",
+            \\const a = @import("a.zig");
+            \\const b = @import("b.zig");
+            \\
+            \\export fn entry() void {
+            \\    var a1: a.Foo = undefined;
+            \\    bar(&a1);
+            \\}
+            \\
+            \\fn bar(x: *b.Foo) void {}
+        ,
+            ".tmp_source.zig:6:10: error: expected type '*Foo', found '*Foo'",
+            ".tmp_source.zig:6:10: note: pointer type child 'Foo' cannot cast into pointer type child 'Foo'",
+            "a.zig:1:17: note: Foo declared here",
+            "b.zig:1:17: note: Foo declared here",
+        );
+
+        tc.addSourceFile("a.zig",
+            \\pub const Foo = struct {
+            \\    x: i32,
+            \\};
+        );
+
+        tc.addSourceFile("b.zig",
+            \\pub const Foo = struct {
+            \\    z: f64,
+            \\};
+        );
+
+        break :x tc;
+    });
+
     cases.add(
         "enum field value references enum",
         \\pub const Foo = extern enum {
@@ -358,9 +392,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
         ".tmp_source.zig:3:14: note: other value is here",
     );
 
-
-    cases.add(
-        "invalid cast from integral type to enum",
+    cases.add("invalid cast from integral type to enum",
         \\const E = enum(usize) { One, Two };
         \\
         \\export fn entry() void {
@@ -372,9 +404,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
         \\        E.One => {},
         \\    }
         \\}
-    ,
-        ".tmp_source.zig:9:10: error: expected type 'usize', found 'E'"
-    );
+    , ".tmp_source.zig:9:10: error: expected type 'usize', found 'E'");
 
     cases.add(
         "range operator in switch used on error set",

From 4ad4cd26541258a84faf97e9fe07a69fadc57c66 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Fri, 6 Jul 2018 17:27:44 -0400
Subject: [PATCH 06/35] fix iterating over a void slice

closes #1203
---
 src/codegen.cpp     | 24 ++++++++++++++++++------
 test/cases/void.zig | 12 ++++++++++++
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/src/codegen.cpp b/src/codegen.cpp
index 9c37c174d6..26ee106959 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -2992,18 +2992,26 @@ static LLVMValueRef ir_render_elem_ptr(CodeGen *g, IrExecutable *executable, IrI
         return LLVMBuildInBoundsGEP(g->builder, array_ptr, indices, 1, "");
     } else if (array_type->id == TypeTableEntryIdStruct) {
         assert(array_type->data.structure.is_slice);
+        if (!type_has_bits(instruction->base.value.type)) {
+            if (safety_check_on) {
+                assert(LLVMGetTypeKind(LLVMTypeOf(array_ptr)) == LLVMIntegerTypeKind);
+                add_bounds_check(g, subscript_value, LLVMIntEQ, nullptr, LLVMIntULT, array_ptr);
+            }
+            return nullptr;
+        }
+
         assert(LLVMGetTypeKind(LLVMTypeOf(array_ptr)) == LLVMPointerTypeKind);
         assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(array_ptr))) == LLVMStructTypeKind);
 
         if (safety_check_on) {
-            size_t len_index = array_type->data.structure.fields[1].gen_index;
+            size_t len_index = array_type->data.structure.fields[slice_len_index].gen_index;
             assert(len_index != SIZE_MAX);
             LLVMValueRef len_ptr = LLVMBuildStructGEP(g->builder, array_ptr, (unsigned)len_index, "");
             LLVMValueRef len = gen_load_untyped(g, len_ptr, 0, false, "");
             add_bounds_check(g, subscript_value, LLVMIntEQ, nullptr, LLVMIntULT, len);
         }
 
-        size_t ptr_index = array_type->data.structure.fields[0].gen_index;
+        size_t ptr_index = array_type->data.structure.fields[slice_ptr_index].gen_index;
         assert(ptr_index != SIZE_MAX);
         LLVMValueRef ptr_ptr = LLVMBuildStructGEP(g->builder, array_ptr, (unsigned)ptr_index, "");
         LLVMValueRef ptr = gen_load_untyped(g, ptr_ptr, 0, false, "");
@@ -3983,11 +3991,15 @@ static LLVMValueRef ir_render_slice(CodeGen *g, IrExecutable *executable, IrInst
             add_bounds_check(g, start_val, LLVMIntEQ, nullptr, LLVMIntULE, end_val);
         }
 
-        LLVMValueRef ptr_field_ptr = LLVMBuildStructGEP(g->builder, tmp_struct_ptr, slice_ptr_index, "");
-        LLVMValueRef slice_start_ptr = LLVMBuildInBoundsGEP(g->builder, array_ptr, &start_val, 1, "");
-        gen_store_untyped(g, slice_start_ptr, ptr_field_ptr, 0, false);
+        if (type_has_bits(array_type)) {
+            size_t gen_ptr_index = instruction->base.value.type->data.structure.fields[slice_ptr_index].gen_index;
+            LLVMValueRef ptr_field_ptr = LLVMBuildStructGEP(g->builder, tmp_struct_ptr, gen_ptr_index, "");
+            LLVMValueRef slice_start_ptr = LLVMBuildInBoundsGEP(g->builder, array_ptr, &start_val, 1, "");
+            gen_store_untyped(g, slice_start_ptr, ptr_field_ptr, 0, false);
+        }
 
-        LLVMValueRef len_field_ptr = LLVMBuildStructGEP(g->builder, tmp_struct_ptr, slice_len_index, "");
+        size_t gen_len_index = instruction->base.value.type->data.structure.fields[slice_len_index].gen_index;
+        LLVMValueRef len_field_ptr = LLVMBuildStructGEP(g->builder, tmp_struct_ptr, gen_len_index, "");
         LLVMValueRef len_value = LLVMBuildNSWSub(g->builder, end_val, start_val, "");
         gen_store_untyped(g, len_value, len_field_ptr, 0, false);
 
diff --git a/test/cases/void.zig b/test/cases/void.zig
index ef91690878..7121ac664b 100644
--- a/test/cases/void.zig
+++ b/test/cases/void.zig
@@ -16,3 +16,15 @@ test "compare void with void compile time known" {
         assert(foo.a == {});
     }
 }
+
+test "iterate over a void slice" {
+    var j: usize = 0;
+    for (times(10)) |_, i| {
+        assert(i == j);
+        j += 1;
+    }
+}
+
+fn times(n: usize) []const void {
+    return ([*]void)(undefined)[0..n];
+}

From e19f0b5d9c26f4d309df9cfc84b7e5dc04b10ed5 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Fri, 6 Jul 2018 18:24:09 -0400
Subject: [PATCH 07/35] remove outdated semantic analysis documentation

---
 doc/semantic_analysis.md | 74 ----------------------------------------
 1 file changed, 74 deletions(-)
 delete mode 100644 doc/semantic_analysis.md

diff --git a/doc/semantic_analysis.md b/doc/semantic_analysis.md
deleted file mode 100644
index 6e860aac42..0000000000
--- a/doc/semantic_analysis.md
+++ /dev/null
@@ -1,74 +0,0 @@
-# How Semantic Analysis Works
-
-We start with a set of files. Typically the user only has one entry point file,
-which imports the other files they want to use. However, the compiler may
-choose to add more files to the compilation, for example bootstrap.zig which
-contains the code that calls main.
-
-Our goal now is to treat everything that is marked with the `export` keyword
-as a root node, and then parse and semantically analyze as little as possible
-in order to fulfill these exports.
-
-So, some parts of the code very well may have uncaught semantic errors, but as
-long as the code is not referenced in any way, the compiler will not complain
-because the code may as well not exist. This is similar to the fact that code
-excluded from compilation with an `#ifdef` in C is not analyzed. Avoiding
-analyzing unused code will save compilation time - one of Zig's goals.
-
-So, for each file, we iterate over the top level declarations. The set of top
-level declarations are:
-
- * Function Definition
- * Global Variable Declaration
- * Container Declaration (struct or enum)
- * Error Value Declaration
- * Use Declaration
-
-Each of these can have `export` attached to them except for error value
-declarations and use declarations.
-
-When we see a top level declaration during this iteration, we determine its
-unique name identifier within the file. For example, for a function definition,
-the unique name identifier is simply its name. Using this name we add the top
-level declaration to a map.
-
-If the top level declaration is exported, we add it to a set of exported top
-level identifiers.
-
-If the top level declaration is a use declaration, we add it to a set of use
-declarations.
-
-If the top level declaration is an error value declaration, we assign it a value
-and increment the count of error values.
-
-After this preliminary iteration over the top level declarations, we iterate
-over the use declarations and resolve them. To resolve a use declaration, we
-analyze the associated expression, verify that its type is the namespace type,
-and then add all the items from the namespace into the top level declaration
-map for the current file.
-
-To analyze an expression, we recurse the abstract syntax tree of the
-expression. Whenever we must look up a symbol, if the symbol exists already,
-we can use it. Otherwise, we look it up in the top level declaration map.
-If it exists, we can use it. Otherwise, we interrupt resolving this use
-declaration to resolve the next one. If a dependency loop is detected, emit
-an error. If all use declarations are resolved yet the symbol we need still
-does not exist, emit an error.
-
-To analyze an `@import` expression, find the referenced file, parse it, and
-add it to the set of files to perform semantic analysis on.
-
-Proceed through the rest of the use declarations the same way.
-
-If we make it through the use declarations without an error, then we have a
-complete map of all globals that exist in the current file.
-
-Next we iterate over the set of exported top level declarations.
-
-If it's a function definition, add it to the set of exported function
-definitions and resolve the function prototype only. Otherwise, resolve the
-top level declaration completely. This may involve recursively resolving other
-top level declarations that expressions depend on.
-
-Finally, iterate over the set of exported function definitions and analyze the
-bodies.

From d8295c188946b0f07d62420c2f08c940f70b03ac Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Sat, 7 Jul 2018 00:25:32 -0400
Subject: [PATCH 08/35] add @popCount intrinsic

---
 doc/langref.html.in     | 15 +++++++--
 src/all_types.hpp       | 12 ++++++++
 src/analyze.cpp         |  4 +++
 src/bigint.cpp          | 31 +++++++++++++++++++
 src/bigint.hpp          |  2 ++
 src/codegen.cpp         | 21 ++++++++++++-
 src/ir.cpp              | 68 +++++++++++++++++++++++++++++++++++++++++
 src/ir_print.cpp        |  9 ++++++
 test/behavior.zig       |  7 +++--
 test/cases/popcount.zig | 24 +++++++++++++++
 test/compile_errors.zig | 18 +++++++++++
 11 files changed, 205 insertions(+), 6 deletions(-)
 create mode 100644 test/cases/popcount.zig

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 5c1cc130ac..8eaffb64ad 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -5013,7 +5013,7 @@ comptime {
       <p>
       If <code>x</code> is zero, <code>@clz</code> returns <code>T.bit_count</code>.
       </p>
-
+      {#see_also|@ctz|@popCount#}
       {#header_close#}
       {#header_open|@cmpxchgStrong#}
       <pre><code class="zig">@cmpxchgStrong(comptime T: type, ptr: *T, expected_value: T, new_value: T, success_order: AtomicOrder, fail_order: AtomicOrder) ?T</code></pre>
@@ -5149,6 +5149,7 @@ test "main" {
       <p>
       If <code>x</code> is zero, <code>@ctz</code> returns <code>T.bit_count</code>.
       </p>
+      {#see_also|@clz|@popCount#}
       {#header_close#}
       {#header_open|@divExact#}
       <pre><code class="zig">@divExact(numerator: T, denominator: T) T</code></pre>
@@ -5631,6 +5632,16 @@ test "call foo" {
       </ul>
       {#see_also|Root Source File#}
       {#header_close#}
+      {#header_open|@popCount#}
+      <pre><code class="zig">@popCount(integer: var) var</code></pre>
+      <p>Counts the number of bits set in an integer.</p>
+      <p>
+      If <code>integer</code> is known at {#link|comptime#}, the return type is <code>comptime_int</code>.
+      Otherwise, the return type is an unsigned integer with the minimum number
+      of bits that can represent the bit count of the integer type.
+      </p>
+      {#see_also|@ctz|@clz#}
+      {#header_close#}
       {#header_open|@ptrCast#}
       <pre><code class="zig">@ptrCast(comptime DestType: type, value: var) DestType</code></pre>
       <p>
@@ -7337,7 +7348,7 @@ hljs.registerLanguage("zig", function(t) {
         a = t.IR + "\\s*\\(",
         c = {
             keyword: "const align var extern stdcallcc nakedcc volatile export pub noalias inline struct packed enum union break return try catch test continue unreachable comptime and or asm defer errdefer if else switch while for fn use bool f32 f64 void type noreturn error i8 u8 i16 u16 i32 u32 i64 u64 isize usize i8w u8w i16w i32w u32w i64w u64w isizew usizew c_short c_ushort c_int c_uint c_long c_ulong c_longlong c_ulonglong resume cancel await async orelse",
-            built_in: "atomicLoad breakpoint returnAddress frameAddress fieldParentPtr setFloatMode IntType OpaqueType compileError compileLog setCold setRuntimeSafety setEvalBranchQuota offsetOf memcpy inlineCall setGlobalLinkage divTrunc divFloor enumTagName intToPtr ptrToInt panic ptrCast intCast floatCast intToFloat floatToInt boolToInt bytesToSlice sliceToBytes errSetCast bitCast rem mod memset sizeOf alignOf alignCast maxValue minValue memberCount memberName memberType typeOf addWithOverflow subWithOverflow mulWithOverflow shlWithOverflow shlExact shrExact cInclude cDefine cUndef ctz clz import cImport errorName embedFile cmpxchgStrong cmpxchgWeak fence divExact truncate atomicRmw sqrt field typeInfo typeName newStackCall errorToInt intToError enumToInt intToEnum",
+            built_in: "atomicLoad breakpoint returnAddress frameAddress fieldParentPtr setFloatMode IntType OpaqueType compileError compileLog setCold setRuntimeSafety setEvalBranchQuota offsetOf memcpy inlineCall setGlobalLinkage divTrunc divFloor enumTagName intToPtr ptrToInt panic ptrCast intCast floatCast intToFloat floatToInt boolToInt bytesToSlice sliceToBytes errSetCast bitCast rem mod memset sizeOf alignOf alignCast maxValue minValue memberCount memberName memberType typeOf addWithOverflow subWithOverflow mulWithOverflow shlWithOverflow shlExact shrExact cInclude cDefine cUndef ctz clz popCount import cImport errorName embedFile cmpxchgStrong cmpxchgWeak fence divExact truncate atomicRmw sqrt field typeInfo typeName newStackCall errorToInt intToError enumToInt intToEnum",
             literal: "true false null undefined"
         },
         n = [e, t.CLCM, t.CBCM, s, r];
diff --git a/src/all_types.hpp b/src/all_types.hpp
index 4d97be468c..6dcf1894d8 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -1352,6 +1352,7 @@ enum BuiltinFnId {
     BuiltinFnIdCompileLog,
     BuiltinFnIdCtz,
     BuiltinFnIdClz,
+    BuiltinFnIdPopCount,
     BuiltinFnIdImport,
     BuiltinFnIdCImport,
     BuiltinFnIdErrName,
@@ -1477,6 +1478,7 @@ bool type_id_eql(TypeId a, TypeId b);
 enum ZigLLVMFnId {
     ZigLLVMFnIdCtz,
     ZigLLVMFnIdClz,
+    ZigLLVMFnIdPopCount,
     ZigLLVMFnIdOverflowArithmetic,
     ZigLLVMFnIdFloor,
     ZigLLVMFnIdCeil,
@@ -1499,6 +1501,9 @@ struct ZigLLVMFnKey {
         struct {
             uint32_t bit_count;
         } clz;
+        struct {
+            uint32_t bit_count;
+        } pop_count;
         struct {
             uint32_t bit_count;
         } floating;
@@ -2050,6 +2055,7 @@ enum IrInstructionId {
     IrInstructionIdUnionTag,
     IrInstructionIdClz,
     IrInstructionIdCtz,
+    IrInstructionIdPopCount,
     IrInstructionIdImport,
     IrInstructionIdCImport,
     IrInstructionIdCInclude,
@@ -2545,6 +2551,12 @@ struct IrInstructionClz {
     IrInstruction *value;
 };
 
+struct IrInstructionPopCount {
+    IrInstruction base;
+
+    IrInstruction *value;
+};
+
 struct IrInstructionUnionTag {
     IrInstruction base;
 
diff --git a/src/analyze.cpp b/src/analyze.cpp
index 643a85634e..9b60f7374a 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -5976,6 +5976,8 @@ uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey x) {
             return (uint32_t)(x.data.ctz.bit_count) * (uint32_t)810453934;
         case ZigLLVMFnIdClz:
             return (uint32_t)(x.data.clz.bit_count) * (uint32_t)2428952817;
+        case ZigLLVMFnIdPopCount:
+            return (uint32_t)(x.data.clz.bit_count) * (uint32_t)101195049;
         case ZigLLVMFnIdFloor:
             return (uint32_t)(x.data.floating.bit_count) * (uint32_t)1899859168;
         case ZigLLVMFnIdCeil:
@@ -5998,6 +6000,8 @@ bool zig_llvm_fn_key_eql(ZigLLVMFnKey a, ZigLLVMFnKey b) {
             return a.data.ctz.bit_count == b.data.ctz.bit_count;
         case ZigLLVMFnIdClz:
             return a.data.clz.bit_count == b.data.clz.bit_count;
+        case ZigLLVMFnIdPopCount:
+            return a.data.pop_count.bit_count == b.data.pop_count.bit_count;
         case ZigLLVMFnIdFloor:
         case ZigLLVMFnIdCeil:
         case ZigLLVMFnIdSqrt:
diff --git a/src/bigint.cpp b/src/bigint.cpp
index bb227a7c3d..bf18b9a1bf 100644
--- a/src/bigint.cpp
+++ b/src/bigint.cpp
@@ -1593,6 +1593,37 @@ void bigint_append_buf(Buf *buf, const BigInt *op, uint64_t base) {
     }
 }
 
+size_t bigint_popcount_unsigned(const BigInt *bi) {
+    assert(!bi->is_negative);
+    if (bi->digit_count == 0)
+        return 0;
+
+    size_t count = 0;
+    size_t bit_count = bi->digit_count * 64;
+    for (size_t i = 0; i < bit_count; i += 1) {
+        if (bit_at_index(bi, i))
+            count += 1;
+    }
+    return count;
+}
+
+size_t bigint_popcount_signed(const BigInt *bi, size_t bit_count) {
+    if (bit_count == 0)
+        return 0;
+    if (bi->digit_count == 0)
+        return 0;
+
+    BigInt twos_comp = {0};
+    to_twos_complement(&twos_comp, bi, bit_count);
+
+    size_t count = 0;
+    for (size_t i = 0; i < bit_count; i += 1) {
+        if (bit_at_index(&twos_comp, i))
+            count += 1;
+    }
+    return count;
+}
+
 size_t bigint_ctz(const BigInt *bi, size_t bit_count) {
     if (bit_count == 0)
         return 0;
diff --git a/src/bigint.hpp b/src/bigint.hpp
index 9f044c8722..48b222a227 100644
--- a/src/bigint.hpp
+++ b/src/bigint.hpp
@@ -81,6 +81,8 @@ void bigint_append_buf(Buf *buf, const BigInt *op, uint64_t base);
 
 size_t bigint_ctz(const BigInt *bi, size_t bit_count);
 size_t bigint_clz(const BigInt *bi, size_t bit_count);
+size_t bigint_popcount_signed(const BigInt *bi, size_t bit_count);
+size_t bigint_popcount_unsigned(const BigInt *bi);
 
 size_t bigint_bits_needed(const BigInt *op);
 
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 26ee106959..54e2da7d61 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -3426,14 +3426,22 @@ static LLVMValueRef ir_render_unwrap_maybe(CodeGen *g, IrExecutable *executable,
 static LLVMValueRef get_int_builtin_fn(CodeGen *g, TypeTableEntry *int_type, BuiltinFnId fn_id) {
     ZigLLVMFnKey key = {};
     const char *fn_name;
+    uint32_t n_args;
     if (fn_id == BuiltinFnIdCtz) {
         fn_name = "cttz";
+        n_args = 2;
         key.id = ZigLLVMFnIdCtz;
         key.data.ctz.bit_count = (uint32_t)int_type->data.integral.bit_count;
     } else if (fn_id == BuiltinFnIdClz) {
         fn_name = "ctlz";
+        n_args = 2;
         key.id = ZigLLVMFnIdClz;
         key.data.clz.bit_count = (uint32_t)int_type->data.integral.bit_count;
+    } else if (fn_id == BuiltinFnIdPopCount) {
+        fn_name = "ctpop";
+        n_args = 1;
+        key.id = ZigLLVMFnIdPopCount;
+        key.data.pop_count.bit_count = (uint32_t)int_type->data.integral.bit_count;
     } else {
         zig_unreachable();
     }
@@ -3448,7 +3456,7 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, TypeTableEntry *int_type, Bui
         int_type->type_ref,
         LLVMInt1Type(),
     };
-    LLVMTypeRef fn_type = LLVMFunctionType(int_type->type_ref, param_types, 2, false);
+    LLVMTypeRef fn_type = LLVMFunctionType(int_type->type_ref, param_types, n_args, false);
     LLVMValueRef fn_val = LLVMAddFunction(g->module, llvm_name, fn_type);
     assert(LLVMGetIntrinsicID(fn_val));
 
@@ -3481,6 +3489,14 @@ static LLVMValueRef ir_render_ctz(CodeGen *g, IrExecutable *executable, IrInstru
     return gen_widen_or_shorten(g, false, int_type, instruction->base.value.type, wrong_size_int);
 }
 
+static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) {
+    TypeTableEntry *int_type = instruction->value->value.type;
+    LLVMValueRef fn_val = get_int_builtin_fn(g, int_type, BuiltinFnIdPopCount);
+    LLVMValueRef operand = ir_llvm_value(g, instruction->value);
+    LLVMValueRef wrong_size_int = LLVMBuildCall(g->builder, fn_val, &operand, 1, "");
+    return gen_widen_or_shorten(g, false, int_type, instruction->base.value.type, wrong_size_int);
+}
+
 static LLVMValueRef ir_render_switch_br(CodeGen *g, IrExecutable *executable, IrInstructionSwitchBr *instruction) {
     LLVMValueRef target_value = ir_llvm_value(g, instruction->target_value);
     LLVMBasicBlockRef else_block = instruction->else_block->llvm_block;
@@ -4831,6 +4847,8 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
             return ir_render_clz(g, executable, (IrInstructionClz *)instruction);
         case IrInstructionIdCtz:
             return ir_render_ctz(g, executable, (IrInstructionCtz *)instruction);
+        case IrInstructionIdPopCount:
+            return ir_render_pop_count(g, executable, (IrInstructionPopCount *)instruction);
         case IrInstructionIdSwitchBr:
             return ir_render_switch_br(g, executable, (IrInstructionSwitchBr *)instruction);
         case IrInstructionIdPhi:
@@ -6342,6 +6360,7 @@ static void define_builtin_fns(CodeGen *g) {
     create_builtin_fn(g, BuiltinFnIdCUndef, "cUndef", 1);
     create_builtin_fn(g, BuiltinFnIdCtz, "ctz", 1);
     create_builtin_fn(g, BuiltinFnIdClz, "clz", 1);
+    create_builtin_fn(g, BuiltinFnIdPopCount, "popCount", 1);
     create_builtin_fn(g, BuiltinFnIdImport, "import", 1);
     create_builtin_fn(g, BuiltinFnIdCImport, "cImport", 1);
     create_builtin_fn(g, BuiltinFnIdErrName, "errorName", 1);
diff --git a/src/ir.cpp b/src/ir.cpp
index 3ad7c77645..98b1bd85ad 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -427,6 +427,10 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionCtz *) {
     return IrInstructionIdCtz;
 }
 
+static constexpr IrInstructionId ir_instruction_id(IrInstructionPopCount *) {
+    return IrInstructionIdPopCount;
+}
+
 static constexpr IrInstructionId ir_instruction_id(IrInstructionUnionTag *) {
     return IrInstructionIdUnionTag;
 }
@@ -1725,6 +1729,15 @@ static IrInstruction *ir_build_ctz_from(IrBuilder *irb, IrInstruction *old_instr
     return new_instruction;
 }
 
+static IrInstruction *ir_build_pop_count(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *value) {
+    IrInstructionPopCount *instruction = ir_build_instruction<IrInstructionPopCount>(irb, scope, source_node);
+    instruction->value = value;
+
+    ir_ref_instruction(value, irb->current_basic_block);
+
+    return &instruction->base;
+}
+
 static IrInstruction *ir_build_switch_br(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *target_value,
         IrBasicBlock *else_block, size_t case_count, IrInstructionSwitchBrCase *cases, IrInstruction *is_comptime,
         IrInstruction *switch_prongs_void)
@@ -3841,6 +3854,16 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
                 IrInstruction *ctz = ir_build_ctz(irb, scope, node, arg0_value);
                 return ir_lval_wrap(irb, scope, ctz, lval);
             }
+        case BuiltinFnIdPopCount:
+            {
+                AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
+                IrInstruction *arg0_value = ir_gen_node(irb, arg0_node, scope);
+                if (arg0_value == irb->codegen->invalid_instruction)
+                    return arg0_value;
+
+                IrInstruction *instr = ir_build_pop_count(irb, scope, node, arg0_value);
+                return ir_lval_wrap(irb, scope, instr, lval);
+            }
         case BuiltinFnIdClz:
             {
                 AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
@@ -15275,6 +15298,48 @@ static TypeTableEntry *ir_analyze_instruction_clz(IrAnalyze *ira, IrInstructionC
     }
 }
 
+static TypeTableEntry *ir_analyze_instruction_pop_count(IrAnalyze *ira, IrInstructionPopCount *instruction) {
+    IrInstruction *value = instruction->value->other;
+    if (type_is_invalid(value->value.type))
+        return ira->codegen->builtin_types.entry_invalid;
+
+    if (value->value.type->id != TypeTableEntryIdInt && value->value.type->id != TypeTableEntryIdComptimeInt) {
+        ir_add_error(ira, value,
+            buf_sprintf("expected integer type, found '%s'", buf_ptr(&value->value.type->name)));
+        return ira->codegen->builtin_types.entry_invalid;
+    }
+
+    if (instr_is_comptime(value)) {
+        ConstExprValue *val = ir_resolve_const(ira, value, UndefBad);
+        if (!val)
+            return ira->codegen->builtin_types.entry_invalid;
+        if (bigint_cmp_zero(&val->data.x_bigint) != CmpLT) {
+            size_t result = bigint_popcount_unsigned(&val->data.x_bigint);
+            ConstExprValue *out_val = ir_build_const_from(ira, &instruction->base);
+            bigint_init_unsigned(&out_val->data.x_bigint, result);
+            return ira->codegen->builtin_types.entry_num_lit_int;
+        }
+        if (value->value.type->id == TypeTableEntryIdComptimeInt) {
+            Buf *val_buf = buf_alloc();
+            bigint_append_buf(val_buf, &val->data.x_bigint, 10);
+            ir_add_error(ira, &instruction->base,
+                buf_sprintf("@popCount on negative %s value %s",
+                    buf_ptr(&value->value.type->name), buf_ptr(val_buf)));
+            return ira->codegen->builtin_types.entry_invalid;
+        }
+        size_t result = bigint_popcount_signed(&val->data.x_bigint, value->value.type->data.integral.bit_count);
+        ConstExprValue *out_val = ir_build_const_from(ira, &instruction->base);
+        bigint_init_unsigned(&out_val->data.x_bigint, result);
+        return ira->codegen->builtin_types.entry_num_lit_int;
+    }
+
+    IrInstruction *result = ir_build_pop_count(&ira->new_irb, instruction->base.scope,
+            instruction->base.source_node, value);
+    result->value.type = get_smallest_unsigned_int_type(ira->codegen, value->value.type->data.integral.bit_count);
+    ir_link_new_instruction(result, &instruction->base);
+    return result->value.type;
+}
+
 static IrInstruction *ir_analyze_union_tag(IrAnalyze *ira, IrInstruction *source_instr, IrInstruction *value) {
     if (type_is_invalid(value->value.type))
         return ira->codegen->invalid_instruction;
@@ -20534,6 +20599,8 @@ static TypeTableEntry *ir_analyze_instruction_nocast(IrAnalyze *ira, IrInstructi
             return ir_analyze_instruction_clz(ira, (IrInstructionClz *)instruction);
         case IrInstructionIdCtz:
             return ir_analyze_instruction_ctz(ira, (IrInstructionCtz *)instruction);
+        case IrInstructionIdPopCount:
+            return ir_analyze_instruction_pop_count(ira, (IrInstructionPopCount *)instruction);
         case IrInstructionIdSwitchBr:
             return ir_analyze_instruction_switch_br(ira, (IrInstructionSwitchBr *)instruction);
         case IrInstructionIdSwitchTarget:
@@ -20892,6 +20959,7 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdUnwrapOptional:
         case IrInstructionIdClz:
         case IrInstructionIdCtz:
+        case IrInstructionIdPopCount:
         case IrInstructionIdSwitchVar:
         case IrInstructionIdSwitchTarget:
         case IrInstructionIdUnionTag:
diff --git a/src/ir_print.cpp b/src/ir_print.cpp
index 5e5a71382c..780cf9e756 100644
--- a/src/ir_print.cpp
+++ b/src/ir_print.cpp
@@ -501,6 +501,12 @@ static void ir_print_ctz(IrPrint *irp, IrInstructionCtz *instruction) {
     fprintf(irp->f, ")");
 }
 
+static void ir_print_pop_count(IrPrint *irp, IrInstructionPopCount *instruction) {
+    fprintf(irp->f, "@popCount(");
+    ir_print_other_instruction(irp, instruction->value);
+    fprintf(irp->f, ")");
+}
+
 static void ir_print_switch_br(IrPrint *irp, IrInstructionSwitchBr *instruction) {
     fprintf(irp->f, "switch (");
     ir_print_other_instruction(irp, instruction->target_value);
@@ -1425,6 +1431,9 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction) {
         case IrInstructionIdCtz:
             ir_print_ctz(irp, (IrInstructionCtz *)instruction);
             break;
+        case IrInstructionIdPopCount:
+            ir_print_pop_count(irp, (IrInstructionPopCount *)instruction);
+            break;
         case IrInstructionIdClz:
             ir_print_clz(irp, (IrInstructionClz *)instruction);
             break;
diff --git a/test/behavior.zig b/test/behavior.zig
index d47eb8fd6c..450dded56c 100644
--- a/test/behavior.zig
+++ b/test/behavior.zig
@@ -8,17 +8,17 @@ comptime {
     _ = @import("cases/atomics.zig");
     _ = @import("cases/bitcast.zig");
     _ = @import("cases/bool.zig");
+    _ = @import("cases/bugs/1111.zig");
     _ = @import("cases/bugs/394.zig");
     _ = @import("cases/bugs/655.zig");
     _ = @import("cases/bugs/656.zig");
     _ = @import("cases/bugs/828.zig");
     _ = @import("cases/bugs/920.zig");
-    _ = @import("cases/bugs/1111.zig");
     _ = @import("cases/byval_arg_var.zig");
     _ = @import("cases/cast.zig");
     _ = @import("cases/const_slice_child.zig");
-    _ = @import("cases/coroutines.zig");
     _ = @import("cases/coroutine_await_struct.zig");
+    _ = @import("cases/coroutines.zig");
     _ = @import("cases/defer.zig");
     _ = @import("cases/enum.zig");
     _ = @import("cases/enum_with_members.zig");
@@ -36,11 +36,12 @@ comptime {
     _ = @import("cases/math.zig");
     _ = @import("cases/merge_error_sets.zig");
     _ = @import("cases/misc.zig");
-    _ = @import("cases/optional.zig");
     _ = @import("cases/namespace_depends_on_compile_var/index.zig");
     _ = @import("cases/new_stack_call.zig");
     _ = @import("cases/null.zig");
+    _ = @import("cases/optional.zig");
     _ = @import("cases/pointers.zig");
+    _ = @import("cases/popcount.zig");
     _ = @import("cases/pub_enum/index.zig");
     _ = @import("cases/ref_var_in_if_after_if_2nd_switch_prong.zig");
     _ = @import("cases/reflection.zig");
diff --git a/test/cases/popcount.zig b/test/cases/popcount.zig
new file mode 100644
index 0000000000..7dc7f28c0e
--- /dev/null
+++ b/test/cases/popcount.zig
@@ -0,0 +1,24 @@
+const assert = @import("std").debug.assert;
+
+test "@popCount" {
+    comptime testPopCount();
+    testPopCount();
+}
+
+fn testPopCount() void {
+    {
+        var x: u32 = 0xaa;
+        assert(@popCount(x) == 4);
+    }
+    {
+        var x: u32 = 0xaaaaaaaa;
+        assert(@popCount(x) == 16);
+    }
+    {
+        var x: i16 = -1;
+        assert(@popCount(x) == 16);
+    }
+    comptime {
+        assert(@popCount(0b11111111000110001100010000100001000011000011100101010001) == 24);
+    }
+}
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index d508c7c36c..9071f0ad7e 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -1,6 +1,24 @@
 const tests = @import("tests.zig");
 
 pub fn addCases(cases: *tests.CompileErrorContext) void {
+    cases.add(
+        "@popCount - non-integer",
+        \\export fn entry(x: f32) u32 {
+        \\    return @popCount(x);
+        \\}
+    ,
+        ".tmp_source.zig:2:22: error: expected integer type, found 'f32'",
+    );
+
+    cases.add(
+        "@popCount - negative comptime_int",
+        \\comptime {
+        \\    _ = @popCount(-1);
+        \\}
+    ,
+        ".tmp_source.zig:2:9: error: @popCount on negative comptime_int value -1",
+    );
+
     cases.addCase(x: {
         const tc = cases.create(
             "wrong same named struct",

From eb326e15530dd6dca4ccbe7dbfde7bf048de813e Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Thu, 5 Jul 2018 15:09:02 -0400
Subject: [PATCH 09/35] M:N threading

 * add std.atomic.QueueMpsc.isEmpty
 * make std.debug.global_allocator thread-safe
 * std.event.Loop: now you have to choose between
   - initSingleThreaded
   - initMultiThreaded
 * std.event.Loop multiplexes coroutines onto kernel threads
 * Remove std.event.Loop.stop. Instead the event loop run() function
   returns once there are no pending coroutines.
 * fix crash in ir.cpp for calling methods under some conditions
 * small progress self-hosted compiler, analyzing top level declarations
 * Introduce std.event.Lock for synchronizing coroutines
 * introduce std.event.Locked(T) for data that only 1 coroutine should
   modify at once.
 * make the self hosted compiler use multi threaded event loop
 * make std.heap.DirectAllocator thread-safe

See #174

TODO:
 * call sched_getaffinity instead of hard coding thread pool size 4
 * support for Windows and MacOS
 * #1194
 * #1197
---
 src-self-hosted/main.zig   |   5 +-
 src-self-hosted/module.zig | 255 +++++++++++++++-
 src/ir.cpp                 |   2 +-
 std/atomic/queue_mpsc.zig  |  17 ++
 std/debug/index.zig        |   7 +-
 std/event.zig              | 582 ++++++++++++++++++++++++++++++++-----
 std/heap.zig               |  30 +-
 std/mem.zig                |   2 +-
 std/os/index.zig           |  39 ++-
 std/os/linux/index.zig     |   8 +
 10 files changed, 833 insertions(+), 114 deletions(-)

diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig
index d17fc94c82..fe94a4460a 100644
--- a/src-self-hosted/main.zig
+++ b/src-self-hosted/main.zig
@@ -384,7 +384,8 @@ fn buildOutputType(allocator: *Allocator, args: []const []const u8, out_type: Mo
     const zig_lib_dir = introspect.resolveZigLibDir(allocator) catch os.exit(1);
     defer allocator.free(zig_lib_dir);
 
-    var loop = try event.Loop.init(allocator);
+    var loop: event.Loop = undefined;
+    try loop.initMultiThreaded(allocator);
 
     var module = try Module.create(
         &loop,
@@ -493,8 +494,6 @@ async fn processBuildEvents(module: *Module, watch: bool) void {
         switch (build_event) {
             Module.Event.Ok => {
                 std.debug.warn("Build succeeded\n");
-                // for now we stop after 1
-                module.loop.stop();
                 return;
             },
             Module.Event.Error => |err| {
diff --git a/src-self-hosted/module.zig b/src-self-hosted/module.zig
index cf27c826c8..5ce1a7965a 100644
--- a/src-self-hosted/module.zig
+++ b/src-self-hosted/module.zig
@@ -2,6 +2,7 @@ const std = @import("std");
 const os = std.os;
 const io = std.io;
 const mem = std.mem;
+const Allocator = mem.Allocator;
 const Buffer = std.Buffer;
 const llvm = @import("llvm.zig");
 const c = @import("c.zig");
@@ -13,6 +14,7 @@ const ArrayList = std.ArrayList;
 const errmsg = @import("errmsg.zig");
 const ast = std.zig.ast;
 const event = std.event;
+const assert = std.debug.assert;
 
 pub const Module = struct {
     loop: *event.Loop,
@@ -81,6 +83,8 @@ pub const Module = struct {
     link_out_file: ?[]const u8,
     events: *event.Channel(Event),
 
+    exported_symbol_names: event.Locked(Decl.Table),
+
     // TODO handle some of these earlier and report them in a way other than error codes
     pub const BuildError = error{
         OutOfMemory,
@@ -232,6 +236,7 @@ pub const Module = struct {
             .test_name_prefix = null,
             .emit_file_type = Emit.Binary,
             .link_out_file = null,
+            .exported_symbol_names = event.Locked(Decl.Table).init(loop, Decl.Table.init(loop.allocator)),
         });
     }
 
@@ -272,38 +277,91 @@ pub const Module = struct {
                 return;
             };
             await (async self.events.put(Event.Ok) catch unreachable);
+            // for now we stop after 1
+            return;
         }
     }
 
     async fn addRootSrc(self: *Module) !void {
         const root_src_path = self.root_src_path orelse @panic("TODO handle null root src path");
+        // TODO async/await os.path.real
         const root_src_real_path = os.path.real(self.a(), root_src_path) catch |err| {
             try printError("unable to get real path '{}': {}", root_src_path, err);
             return err;
         };
         errdefer self.a().free(root_src_real_path);
 
+        // TODO async/await readFileAlloc()
         const source_code = io.readFileAlloc(self.a(), root_src_real_path) catch |err| {
             try printError("unable to open '{}': {}", root_src_real_path, err);
             return err;
         };
         errdefer self.a().free(source_code);
 
-        var tree = try std.zig.parse(self.a(), source_code);
-        defer tree.deinit();
+        var parsed_file = ParsedFile{
+            .tree = try std.zig.parse(self.a(), source_code),
+            .realpath = root_src_real_path,
+        };
+        errdefer parsed_file.tree.deinit();
 
-        //var it = tree.root_node.decls.iterator();
-        //while (it.next()) |decl_ptr| {
-        //    const decl = decl_ptr.*;
-        //    switch (decl.id) {
-        //        ast.Node.Comptime => @panic("TODO"),
-        //        ast.Node.VarDecl => @panic("TODO"),
-        //        ast.Node.UseDecl => @panic("TODO"),
-        //        ast.Node.FnDef => @panic("TODO"),
-        //        ast.Node.TestDecl => @panic("TODO"),
-        //        else => unreachable,
-        //    }
-        //}
+        const tree = &parsed_file.tree;
+
+        // create empty struct for it
+        const decls = try Scope.Decls.create(self.a(), null);
+        errdefer decls.destroy();
+
+        var it = tree.root_node.decls.iterator(0);
+        while (it.next()) |decl_ptr| {
+            const decl = decl_ptr.*;
+            switch (decl.id) {
+                ast.Node.Id.Comptime => @panic("TODO"),
+                ast.Node.Id.VarDecl => @panic("TODO"),
+                ast.Node.Id.FnProto => {
+                    const fn_proto = @fieldParentPtr(ast.Node.FnProto, "base", decl);
+
+                    const name = if (fn_proto.name_token) |name_token| tree.tokenSlice(name_token) else {
+                        @panic("TODO add compile error");
+                        //try self.addCompileError(
+                        //    &parsed_file,
+                        //    fn_proto.fn_token,
+                        //    fn_proto.fn_token + 1,
+                        //    "missing function name",
+                        //);
+                        continue;
+                    };
+
+                    const fn_decl = try self.a().create(Decl.Fn{
+                        .base = Decl{
+                            .id = Decl.Id.Fn,
+                            .name = name,
+                            .visib = parseVisibToken(tree, fn_proto.visib_token),
+                            .resolution = Decl.Resolution.Unresolved,
+                        },
+                        .value = Decl.Fn.Val{ .Unresolved = {} },
+                        .fn_proto = fn_proto,
+                    });
+                    errdefer self.a().destroy(fn_decl);
+
+                    // TODO make this parallel
+                    try await try async self.addTopLevelDecl(tree, &fn_decl.base);
+                },
+                ast.Node.Id.TestDecl => @panic("TODO"),
+                else => unreachable,
+            }
+        }
+    }
+
+    async fn addTopLevelDecl(self: *Module, tree: *ast.Tree, decl: *Decl) !void {
+        const is_export = decl.isExported(tree);
+
+        {
+            const exported_symbol_names = await try async self.exported_symbol_names.acquire();
+            defer exported_symbol_names.release();
+
+            if (try exported_symbol_names.value.put(decl.name, decl)) |other_decl| {
+                @panic("TODO report compile error");
+            }
+        }
     }
 
     pub fn link(self: *Module, out_file: ?[]const u8) !void {
@@ -350,3 +408,172 @@ fn printError(comptime format: []const u8, args: ...) !void {
     const out_stream = &stderr_file_out_stream.stream;
     try out_stream.print(format, args);
 }
+
+fn parseVisibToken(tree: *ast.Tree, optional_token_index: ?ast.TokenIndex) Visib {
+    if (optional_token_index) |token_index| {
+        const token = tree.tokens.at(token_index);
+        assert(token.id == Token.Id.Keyword_pub);
+        return Visib.Pub;
+    } else {
+        return Visib.Private;
+    }
+}
+
+pub const Scope = struct {
+    id: Id,
+    parent: ?*Scope,
+
+    pub const Id = enum {
+        Decls,
+        Block,
+    };
+
+    pub const Decls = struct {
+        base: Scope,
+        table: Decl.Table,
+
+        pub fn create(a: *Allocator, parent: ?*Scope) !*Decls {
+            const self = try a.create(Decls{
+                .base = Scope{
+                    .id = Id.Decls,
+                    .parent = parent,
+                },
+                .table = undefined,
+            });
+            errdefer a.destroy(self);
+
+            self.table = Decl.Table.init(a);
+            errdefer self.table.deinit();
+
+            return self;
+        }
+
+        pub fn destroy(self: *Decls) void {
+            self.table.deinit();
+            self.table.allocator.destroy(self);
+            self.* = undefined;
+        }
+    };
+
+    pub const Block = struct {
+        base: Scope,
+    };
+};
+
+pub const Visib = enum {
+    Private,
+    Pub,
+};
+
+pub const Decl = struct {
+    id: Id,
+    name: []const u8,
+    visib: Visib,
+    resolution: Resolution,
+
+    pub const Table = std.HashMap([]const u8, *Decl, mem.hash_slice_u8, mem.eql_slice_u8);
+
+    pub fn isExported(base: *const Decl, tree: *ast.Tree) bool {
+        switch (base.id) {
+            Id.Fn => {
+                const fn_decl = @fieldParentPtr(Fn, "base", base);
+                return fn_decl.isExported(tree);
+            },
+            else => return false,
+        }
+    }
+
+    pub const Resolution = enum {
+        Unresolved,
+        InProgress,
+        Invalid,
+        Ok,
+    };
+
+    pub const Id = enum {
+        Var,
+        Fn,
+        CompTime,
+    };
+
+    pub const Var = struct {
+        base: Decl,
+    };
+
+    pub const Fn = struct {
+        base: Decl,
+        value: Val,
+        fn_proto: *const ast.Node.FnProto,
+
+        // TODO https://github.com/ziglang/zig/issues/683 and then make this anonymous
+        pub const Val = union {
+            Unresolved: void,
+            Ok: *Value.Fn,
+        };
+
+        pub fn externLibName(self: Fn, tree: *ast.Tree) ?[]const u8 {
+            return if (self.fn_proto.extern_export_inline_token) |tok_index| x: {
+                const token = tree.tokens.at(tok_index);
+                break :x switch (token.id) {
+                    Token.Id.Extern => tree.tokenSlicePtr(token),
+                    else => null,
+                };
+            } else null;
+        }
+
+        pub fn isExported(self: Fn, tree: *ast.Tree) bool {
+            if (self.fn_proto.extern_export_inline_token) |tok_index| {
+                const token = tree.tokens.at(tok_index);
+                return token.id == Token.Id.Keyword_export;
+            } else {
+                return false;
+            }
+        }
+    };
+
+    pub const CompTime = struct {
+        base: Decl,
+    };
+};
+
+pub const Value = struct {
+    pub const Fn = struct {};
+};
+
+pub const Type = struct {
+    id: Id,
+
+    pub const Id = enum {
+        Type,
+        Void,
+        Bool,
+        NoReturn,
+        Int,
+        Float,
+        Pointer,
+        Array,
+        Struct,
+        ComptimeFloat,
+        ComptimeInt,
+        Undefined,
+        Null,
+        Optional,
+        ErrorUnion,
+        ErrorSet,
+        Enum,
+        Union,
+        Fn,
+        Opaque,
+        Promise,
+    };
+
+    pub const Struct = struct {
+        base: Type,
+        decls: *Scope.Decls,
+    };
+};
+
+pub const ParsedFile = struct {
+    tree: ast.Tree,
+    realpath: []const u8,
+};
diff --git a/src/ir.cpp b/src/ir.cpp
index 98b1bd85ad..3fc8306339 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -13278,7 +13278,7 @@ static TypeTableEntry *ir_analyze_instruction_call(IrAnalyze *ira, IrInstruction
             FnTableEntry *fn_table_entry = fn_ref->value.data.x_bound_fn.fn;
             IrInstruction *first_arg_ptr = fn_ref->value.data.x_bound_fn.first_arg;
             return ir_analyze_fn_call(ira, call_instruction, fn_table_entry, fn_table_entry->type_entry,
-                nullptr, first_arg_ptr, is_comptime, call_instruction->fn_inline);
+                fn_ref, first_arg_ptr, is_comptime, call_instruction->fn_inline);
         } else {
             ir_add_error_node(ira, fn_ref->source_node,
                 buf_sprintf("type '%s' not a function", buf_ptr(&fn_ref->value.type->name)));
diff --git a/std/atomic/queue_mpsc.zig b/std/atomic/queue_mpsc.zig
index 8030565d7a..bc0a94258b 100644
--- a/std/atomic/queue_mpsc.zig
+++ b/std/atomic/queue_mpsc.zig
@@ -15,6 +15,8 @@ pub fn QueueMpsc(comptime T: type) type {
 
         pub const Node = std.atomic.Stack(T).Node;
 
+        /// Not thread-safe. The call to init() must complete before any other functions are called.
+        /// No deinitialization required.
         pub fn init() Self {
             return Self{
                 .inboxes = []std.atomic.Stack(T){
@@ -26,12 +28,15 @@ pub fn QueueMpsc(comptime T: type) type {
             };
         }
 
+        /// Fully thread-safe. put() may be called from any thread at any time.
         pub fn put(self: *Self, node: *Node) void {
             const inbox_index = @atomicLoad(usize, &self.inbox_index, AtomicOrder.SeqCst);
             const inbox = &self.inboxes[inbox_index];
             inbox.push(node);
         }
 
+        /// Must be called by only 1 consumer at a time. Every call to get() and isEmpty() must complete before
+        /// the next call to get().
         pub fn get(self: *Self) ?*Node {
             if (self.outbox.pop()) |node| {
                 return node;
@@ -43,6 +48,18 @@ pub fn QueueMpsc(comptime T: type) type {
             }
             return self.outbox.pop();
         }
+
+        /// Must be called by only 1 consumer at a time. Every call to get() and isEmpty() must complete before
+        /// the next call to isEmpty().
+        pub fn isEmpty(self: *Self) bool {
+            if (!self.outbox.isEmpty()) return false;
+            const prev_inbox_index = @atomicRmw(usize, &self.inbox_index, AtomicRmwOp.Xor, 0x1, AtomicOrder.SeqCst);
+            const prev_inbox = &self.inboxes[prev_inbox_index];
+            while (prev_inbox.pop()) |node| {
+                self.outbox.push(node);
+            }
+            return self.outbox.isEmpty();
+        }
     };
 }
 
diff --git a/std/debug/index.zig b/std/debug/index.zig
index 57b2dfc300..a5e1c313f0 100644
--- a/std/debug/index.zig
+++ b/std/debug/index.zig
@@ -11,6 +11,11 @@ const builtin = @import("builtin");
 
 pub const FailingAllocator = @import("failing_allocator.zig").FailingAllocator;
 
+pub const runtime_safety = switch (builtin.mode) {
+    builtin.Mode.Debug, builtin.Mode.ReleaseSafe => true,
+    builtin.Mode.ReleaseFast, builtin.Mode.ReleaseSmall => false,
+};
+
 /// Tries to write to stderr, unbuffered, and ignores any error returned.
 /// Does not append a newline.
 /// TODO atomic/multithread support
@@ -1098,7 +1103,7 @@ fn readILeb128(in_stream: var) !i64 {
 
 /// This should only be used in temporary test programs.
 pub const global_allocator = &global_fixed_allocator.allocator;
-var global_fixed_allocator = std.heap.FixedBufferAllocator.init(global_allocator_mem[0..]);
+var global_fixed_allocator = std.heap.ThreadSafeFixedBufferAllocator.init(global_allocator_mem[0..]);
 var global_allocator_mem: [100 * 1024]u8 = undefined;
 
 // TODO make thread safe
diff --git a/std/event.zig b/std/event.zig
index c6ac04a9d0..2d69d0cb16 100644
--- a/std/event.zig
+++ b/std/event.zig
@@ -11,53 +11,69 @@ pub const TcpServer = struct {
     handleRequestFn: async<*mem.Allocator> fn (*TcpServer, *const std.net.Address, *const std.os.File) void,
 
     loop: *Loop,
-    sockfd: i32,
+    sockfd: ?i32,
     accept_coro: ?promise,
     listen_address: std.net.Address,
 
     waiting_for_emfile_node: PromiseNode,
+    listen_resume_node: event.Loop.ResumeNode,
 
     const PromiseNode = std.LinkedList(promise).Node;
 
-    pub fn init(loop: *Loop) !TcpServer {
-        const sockfd = try std.os.posixSocket(posix.AF_INET, posix.SOCK_STREAM | posix.SOCK_CLOEXEC | posix.SOCK_NONBLOCK, posix.PROTO_tcp);
-        errdefer std.os.close(sockfd);
-
+    pub fn init(loop: *Loop) TcpServer {
         // TODO can't initialize handler coroutine here because we need well defined copy elision
         return TcpServer{
             .loop = loop,
-            .sockfd = sockfd,
+            .sockfd = null,
             .accept_coro = null,
             .handleRequestFn = undefined,
             .waiting_for_emfile_node = undefined,
             .listen_address = undefined,
+            .listen_resume_node = event.Loop.ResumeNode{
+                .id = event.Loop.ResumeNode.Id.Basic,
+                .handle = undefined,
+            },
         };
     }
 
-    pub fn listen(self: *TcpServer, address: *const std.net.Address, handleRequestFn: async<*mem.Allocator> fn (*TcpServer, *const std.net.Address, *const std.os.File) void) !void {
+    pub fn listen(
+        self: *TcpServer,
+        address: *const std.net.Address,
+        handleRequestFn: async<*mem.Allocator> fn (*TcpServer, *const std.net.Address, *const std.os.File) void,
+    ) !void {
         self.handleRequestFn = handleRequestFn;
 
-        try std.os.posixBind(self.sockfd, &address.os_addr);
-        try std.os.posixListen(self.sockfd, posix.SOMAXCONN);
-        self.listen_address = std.net.Address.initPosix(try std.os.posixGetSockName(self.sockfd));
+        const sockfd = try std.os.posixSocket(posix.AF_INET, posix.SOCK_STREAM | posix.SOCK_CLOEXEC | posix.SOCK_NONBLOCK, posix.PROTO_tcp);
+        errdefer std.os.close(sockfd);
+        self.sockfd = sockfd;
+
+        try std.os.posixBind(sockfd, &address.os_addr);
+        try std.os.posixListen(sockfd, posix.SOMAXCONN);
+        self.listen_address = std.net.Address.initPosix(try std.os.posixGetSockName(sockfd));
 
         self.accept_coro = try async<self.loop.allocator> TcpServer.handler(self);
         errdefer cancel self.accept_coro.?;
 
-        try self.loop.addFd(self.sockfd, self.accept_coro.?);
-        errdefer self.loop.removeFd(self.sockfd);
+        self.listen_resume_node.handle = self.accept_coro.?;
+        try self.loop.addFd(sockfd, &self.listen_resume_node);
+        errdefer self.loop.removeFd(sockfd);
+    }
+
+    /// Stop listening
+    pub fn close(self: *TcpServer) void {
+        self.loop.removeFd(self.sockfd.?);
+        std.os.close(self.sockfd.?);
     }
 
     pub fn deinit(self: *TcpServer) void {
-        self.loop.removeFd(self.sockfd);
         if (self.accept_coro) |accept_coro| cancel accept_coro;
-        std.os.close(self.sockfd);
+        if (self.sockfd) |sockfd| std.os.close(sockfd);
     }
 
     pub async fn handler(self: *TcpServer) void {
         while (true) {
             var accepted_addr: std.net.Address = undefined;
-            if (std.os.posixAccept(self.sockfd, &accepted_addr.os_addr, posix.SOCK_NONBLOCK | posix.SOCK_CLOEXEC)) |accepted_fd| {
+            if (std.os.posixAccept(self.sockfd.?, &accepted_addr.os_addr, posix.SOCK_NONBLOCK | posix.SOCK_CLOEXEC)) |accepted_fd| {
                 var socket = std.os.File.openHandle(accepted_fd);
                 _ = async<self.loop.allocator> self.handleRequestFn(self, accepted_addr, socket) catch |err| switch (err) {
                     error.OutOfMemory => {
@@ -95,32 +111,65 @@ pub const TcpServer = struct {
 
 pub const Loop = struct {
     allocator: *mem.Allocator,
-    keep_running: bool,
     next_tick_queue: std.atomic.QueueMpsc(promise),
     os_data: OsData,
-
-    const OsData = switch (builtin.os) {
-        builtin.Os.linux => struct {
-            epollfd: i32,
-        },
-        else => struct {},
-    };
+    dispatch_lock: u8, // TODO make this a bool
+    pending_event_count: usize,
+    extra_threads: []*std.os.Thread,
+    final_resume_node: ResumeNode,
 
     pub const NextTickNode = std.atomic.QueueMpsc(promise).Node;
 
+    pub const ResumeNode = struct {
+        id: Id,
+        handle: promise,
+
+        pub const Id = enum {
+            Basic,
+            Stop,
+            EventFd,
+        };
+
+        pub const EventFd = struct {
+            base: ResumeNode,
+            eventfd: i32,
+        };
+    };
+
+    /// After initialization, call run().
+    /// TODO copy elision / named return values so that the threads referencing *Loop
+    /// have the correct pointer value.
+    fn initSingleThreaded(self: *Loop, allocator: *mem.Allocator) !void {
+        return self.initInternal(allocator, 1);
+    }
+
     /// The allocator must be thread-safe because we use it for multiplexing
     /// coroutines onto kernel threads.
-    pub fn init(allocator: *mem.Allocator) !Loop {
-        var self = Loop{
-            .keep_running = true,
+    /// After initialization, call run().
+    /// TODO copy elision / named return values so that the threads referencing *Loop
+    /// have the correct pointer value.
+    fn initMultiThreaded(self: *Loop, allocator: *mem.Allocator) !void {
+        // TODO check the actual cpu core count
+        return self.initInternal(allocator, 4);
+    }
+
+    /// Thread count is the total thread count. The thread pool size will be
+    /// max(thread_count - 1, 0)
+    fn initInternal(self: *Loop, allocator: *mem.Allocator, thread_count: usize) !void {
+        self.* = Loop{
+            .pending_event_count = 0,
             .allocator = allocator,
             .os_data = undefined,
             .next_tick_queue = std.atomic.QueueMpsc(promise).init(),
+            .dispatch_lock = 1, // start locked so threads go directly into epoll wait
+            .extra_threads = undefined,
+            .final_resume_node = ResumeNode{
+                .id = ResumeNode.Id.Stop,
+                .handle = undefined,
+            },
         };
-        try self.initOsData();
+        try self.initOsData(thread_count);
         errdefer self.deinitOsData();
-
-        return self;
     }
 
     /// must call stop before deinit
@@ -128,13 +177,70 @@ pub const Loop = struct {
         self.deinitOsData();
     }
 
-    const InitOsDataError = std.os.LinuxEpollCreateError;
+    const InitOsDataError = std.os.LinuxEpollCreateError || mem.Allocator.Error || std.os.LinuxEventFdError ||
+        std.os.SpawnThreadError || std.os.LinuxEpollCtlError;
 
-    fn initOsData(self: *Loop) InitOsDataError!void {
+    const wakeup_bytes = []u8{0x1} ** 8;
+
+    fn initOsData(self: *Loop, thread_count: usize) InitOsDataError!void {
         switch (builtin.os) {
             builtin.Os.linux => {
-                self.os_data.epollfd = try std.os.linuxEpollCreate(std.os.linux.EPOLL_CLOEXEC);
+                const extra_thread_count = thread_count - 1;
+                self.os_data.available_eventfd_resume_nodes = std.atomic.Stack(ResumeNode.EventFd).init();
+                self.os_data.eventfd_resume_nodes = try self.allocator.alloc(
+                    std.atomic.Stack(ResumeNode.EventFd).Node,
+                    extra_thread_count,
+                );
+                errdefer self.allocator.free(self.os_data.eventfd_resume_nodes);
+
+                errdefer {
+                    while (self.os_data.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
+                }
+                for (self.os_data.eventfd_resume_nodes) |*eventfd_node| {
+                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
+                        .data = ResumeNode.EventFd{
+                            .base = ResumeNode{
+                                .id = ResumeNode.Id.EventFd,
+                                .handle = undefined,
+                            },
+                            .eventfd = try std.os.linuxEventFd(1, posix.EFD_CLOEXEC | posix.EFD_NONBLOCK),
+                        },
+                        .next = undefined,
+                    };
+                    self.os_data.available_eventfd_resume_nodes.push(eventfd_node);
+                }
+
+                self.os_data.epollfd = try std.os.linuxEpollCreate(posix.EPOLL_CLOEXEC);
                 errdefer std.os.close(self.os_data.epollfd);
+
+                self.os_data.final_eventfd = try std.os.linuxEventFd(0, posix.EFD_CLOEXEC | posix.EFD_NONBLOCK);
+                errdefer std.os.close(self.os_data.final_eventfd);
+
+                self.os_data.final_eventfd_event = posix.epoll_event{
+                    .events = posix.EPOLLIN,
+                    .data = posix.epoll_data{ .ptr = @ptrToInt(&self.final_resume_node) },
+                };
+                try std.os.linuxEpollCtl(
+                    self.os_data.epollfd,
+                    posix.EPOLL_CTL_ADD,
+                    self.os_data.final_eventfd,
+                    &self.os_data.final_eventfd_event,
+                );
+                self.extra_threads = try self.allocator.alloc(*std.os.Thread, extra_thread_count);
+                errdefer self.allocator.free(self.extra_threads);
+
+                var extra_thread_index: usize = 0;
+                errdefer {
+                    while (extra_thread_index != 0) {
+                        extra_thread_index -= 1;
+                        // writing 8 bytes to an eventfd cannot fail
+                        std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
+                        self.extra_threads[extra_thread_index].wait();
+                    }
+                }
+                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
+                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
+                }
             },
             else => {},
         }
@@ -142,65 +248,154 @@ pub const Loop = struct {
 
     fn deinitOsData(self: *Loop) void {
         switch (builtin.os) {
-            builtin.Os.linux => std.os.close(self.os_data.epollfd),
+            builtin.Os.linux => {
+                std.os.close(self.os_data.final_eventfd);
+                while (self.os_data.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
+                std.os.close(self.os_data.epollfd);
+                self.allocator.free(self.os_data.eventfd_resume_nodes);
+                self.allocator.free(self.extra_threads);
+            },
             else => {},
         }
     }
 
-    pub fn addFd(self: *Loop, fd: i32, prom: promise) !void {
+    /// resume_node must live longer than the promise that it holds a reference to.
+    pub fn addFd(self: *Loop, fd: i32, resume_node: *ResumeNode) !void {
+        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
+        errdefer {
+            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+        }
+        try self.addFdNoCounter(fd, resume_node);
+    }
+
+    fn addFdNoCounter(self: *Loop, fd: i32, resume_node: *ResumeNode) !void {
         var ev = std.os.linux.epoll_event{
             .events = std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET,
-            .data = std.os.linux.epoll_data{ .ptr = @ptrToInt(prom) },
+            .data = std.os.linux.epoll_data{ .ptr = @ptrToInt(resume_node) },
         };
         try std.os.linuxEpollCtl(self.os_data.epollfd, std.os.linux.EPOLL_CTL_ADD, fd, &ev);
     }
 
     pub fn removeFd(self: *Loop, fd: i32) void {
+        self.removeFdNoCounter(fd);
+        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+    }
+
+    fn removeFdNoCounter(self: *Loop, fd: i32) void {
         std.os.linuxEpollCtl(self.os_data.epollfd, std.os.linux.EPOLL_CTL_DEL, fd, undefined) catch {};
     }
-    async fn waitFd(self: *Loop, fd: i32) !void {
+
+    pub async fn waitFd(self: *Loop, fd: i32) !void {
         defer self.removeFd(fd);
+        var resume_node = ResumeNode{
+            .id = ResumeNode.Id.Basic,
+            .handle = undefined,
+        };
         suspend |p| {
-            try self.addFd(fd, p);
+            resume_node.handle = p;
+            try self.addFd(fd, &resume_node);
         }
+        var a = &resume_node; // TODO better way to explicitly put memory in coro frame
     }
 
-    pub fn stop(self: *Loop) void {
-        // TODO make atomic
-        self.keep_running = false;
-        // TODO activate an fd in the epoll set which should cancel all the promises
-    }
-
-    /// bring your own linked list node. this means it can't fail.
+    /// Bring your own linked list node. This means it can't fail.
     pub fn onNextTick(self: *Loop, node: *NextTickNode) void {
+        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
         self.next_tick_queue.put(node);
     }
 
     pub fn run(self: *Loop) void {
-        while (self.keep_running) {
-            // TODO multiplex the next tick queue and the epoll event results onto a thread pool
-            while (self.next_tick_queue.get()) |node| {
-                resume node.data;
-            }
-            if (!self.keep_running) break;
-
-            self.dispatchOsEvents();
+        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+        self.workerRun();
+        for (self.extra_threads) |extra_thread| {
+            extra_thread.wait();
         }
     }
 
-    fn dispatchOsEvents(self: *Loop) void {
-        switch (builtin.os) {
-            builtin.Os.linux => {
-                var events: [16]std.os.linux.epoll_event = undefined;
-                const count = std.os.linuxEpollWait(self.os_data.epollfd, events[0..], -1);
-                for (events[0..count]) |ev| {
-                    const p = @intToPtr(promise, ev.data.ptr);
-                    resume p;
+    fn workerRun(self: *Loop) void {
+        start_over: while (true) {
+            if (@atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst) == 0) {
+                while (self.next_tick_queue.get()) |next_tick_node| {
+                    const handle = next_tick_node.data;
+                    if (self.next_tick_queue.isEmpty()) {
+                        // last node, just resume it
+                        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                        resume handle;
+                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        continue :start_over;
+                    }
+
+                    // non-last node, stick it in the epoll set so that
+                    // other threads can get to it
+                    if (self.os_data.available_eventfd_resume_nodes.pop()) |resume_stack_node| {
+                        const eventfd_node = &resume_stack_node.data;
+                        eventfd_node.base.handle = handle;
+                        // the pending count is already accounted for
+                        self.addFdNoCounter(eventfd_node.eventfd, &eventfd_node.base) catch |_| {
+                            // fine, we didn't need it anyway
+                            _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                            self.os_data.available_eventfd_resume_nodes.push(resume_stack_node);
+                            resume handle;
+                            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                            continue :start_over;
+                        };
+                    } else {
+                        // threads are too busy, can't add another eventfd to wake one up
+                        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                        resume handle;
+                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        continue :start_over;
+                    }
                 }
-            },
-            else => {},
+
+                const pending_event_count = @atomicLoad(usize, &self.pending_event_count, AtomicOrder.SeqCst);
+                if (pending_event_count == 0) {
+                    // cause all the threads to stop
+                    // writing 8 bytes to an eventfd cannot fail
+                    std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
+                    return;
+                }
+
+                _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+            }
+
+            // only process 1 event so we don't steal from other threads
+            var events: [1]std.os.linux.epoll_event = undefined;
+            const count = std.os.linuxEpollWait(self.os_data.epollfd, events[0..], -1);
+            for (events[0..count]) |ev| {
+                const resume_node = @intToPtr(*ResumeNode, ev.data.ptr);
+                const handle = resume_node.handle;
+                const resume_node_id = resume_node.id;
+                switch (resume_node_id) {
+                    ResumeNode.Id.Basic => {},
+                    ResumeNode.Id.Stop => return,
+                    ResumeNode.Id.EventFd => {
+                        const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
+                        self.removeFdNoCounter(event_fd_node.eventfd);
+                        const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
+                        self.os_data.available_eventfd_resume_nodes.push(stack_node);
+                    },
+                }
+                resume handle;
+                if (resume_node_id == ResumeNode.Id.EventFd) {
+                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                }
+            }
         }
     }
+
+    const OsData = switch (builtin.os) {
+        builtin.Os.linux => struct {
+            epollfd: i32,
+            // pre-allocated eventfds. all permanently active.
+            // this is how we send promises to be resumed on other threads.
+            available_eventfd_resume_nodes: std.atomic.Stack(ResumeNode.EventFd),
+            eventfd_resume_nodes: []std.atomic.Stack(ResumeNode.EventFd).Node,
+            final_eventfd: i32,
+            final_eventfd_event: posix.epoll_event,
+        },
+        else => struct {},
+    };
 };
 
 /// many producer, many consumer, thread-safe, lock-free, runtime configurable buffer size
@@ -304,9 +499,7 @@ pub fn Channel(comptime T: type) type {
             // TODO integrate this function with named return values
             // so we can get rid of this extra result copy
             var result: T = undefined;
-            var debug_handle: usize = undefined;
             suspend |handle| {
-                debug_handle = @ptrToInt(handle);
                 var my_tick_node = Loop.NextTickNode{
                     .next = undefined,
                     .data = handle,
@@ -438,9 +631,8 @@ test "listen on a port, send bytes, receive bytes" {
             const self = @fieldParentPtr(Self, "tcp_server", tcp_server);
             var socket = _socket.*; // TODO https://github.com/ziglang/zig/issues/733
             defer socket.close();
-            const next_handler = async errorableHandler(self, _addr, socket) catch |err| switch (err) {
-                error.OutOfMemory => @panic("unable to handle connection: out of memory"),
-            };
+            // TODO guarantee elision of this allocation
+            const next_handler = async errorableHandler(self, _addr, socket) catch unreachable;
             (await next_handler) catch |err| {
                 std.debug.panic("unable to handle connection: {}\n", err);
             };
@@ -461,17 +653,18 @@ test "listen on a port, send bytes, receive bytes" {
     const ip4addr = std.net.parseIp4("127.0.0.1") catch unreachable;
     const addr = std.net.Address.initIp4(ip4addr, 0);
 
-    var loop = try Loop.init(std.debug.global_allocator);
-    var server = MyServer{ .tcp_server = try TcpServer.init(&loop) };
+    var loop: Loop = undefined;
+    try loop.initSingleThreaded(std.debug.global_allocator);
+    var server = MyServer{ .tcp_server = TcpServer.init(&loop) };
     defer server.tcp_server.deinit();
     try server.tcp_server.listen(addr, MyServer.handler);
 
-    const p = try async<std.debug.global_allocator> doAsyncTest(&loop, server.tcp_server.listen_address);
+    const p = try async<std.debug.global_allocator> doAsyncTest(&loop, server.tcp_server.listen_address, &server.tcp_server);
     defer cancel p;
     loop.run();
 }
 
-async fn doAsyncTest(loop: *Loop, address: *const std.net.Address) void {
+async fn doAsyncTest(loop: *Loop, address: *const std.net.Address, server: *TcpServer) void {
     errdefer @panic("test failure");
 
     var socket_file = try await try async event.connect(loop, address);
@@ -481,7 +674,7 @@ async fn doAsyncTest(loop: *Loop, address: *const std.net.Address) void {
     const amt_read = try socket_file.read(buf[0..]);
     const msg = buf[0..amt_read];
     assert(mem.eql(u8, msg, "hello from server\n"));
-    loop.stop();
+    server.close();
 }
 
 test "std.event.Channel" {
@@ -490,7 +683,9 @@ test "std.event.Channel" {
 
     const allocator = &da.allocator;
 
-    var loop = try Loop.init(allocator);
+    var loop: Loop = undefined;
+    // TODO make a multi threaded test
+    try loop.initSingleThreaded(allocator);
     defer loop.deinit();
 
     const channel = try Channel(i32).create(&loop, 0);
@@ -515,11 +710,248 @@ async fn testChannelGetter(loop: *Loop, channel: *Channel(i32)) void {
     const value2_promise = try async channel.get();
     const value2 = await value2_promise;
     assert(value2 == 4567);
-
-    loop.stop();
 }
 
 async fn testChannelPutter(channel: *Channel(i32)) void {
     await (async channel.put(1234) catch @panic("out of memory"));
     await (async channel.put(4567) catch @panic("out of memory"));
 }
+
+/// Thread-safe async/await lock.
+/// Does not make any syscalls - coroutines which are waiting for the lock are suspended, and
+/// are resumed when the lock is released, in order.
+pub const Lock = struct {
+    loop: *Loop,
+    shared_bit: u8, // TODO make this a bool
+    queue: Queue,
+    queue_empty_bit: u8, // TODO make this a bool
+
+    const Queue = std.atomic.QueueMpsc(promise);
+
+    pub const Held = struct {
+        lock: *Lock,
+
+        pub fn release(self: Held) void {
+            // Resume the next item from the queue.
+            if (self.lock.queue.get()) |node| {
+                self.lock.loop.onNextTick(node);
+                return;
+            }
+
+            // We need to release the lock.
+            _ = @atomicRmw(u8, &self.lock.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+            _ = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+            // There might be a queue item. If we know the queue is empty, we can be done,
+            // because the other actor will try to obtain the lock.
+            // But if there's a queue item, we are the actor which must loop and attempt
+            // to grab the lock again.
+            if (@atomicLoad(u8, &self.lock.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
+                return;
+            }
+
+            while (true) {
+                const old_bit = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                if (old_bit != 0) {
+                    // We did not obtain the lock. Great, the queue is someone else's problem.
+                    return;
+                }
+
+                // Resume the next item from the queue.
+                if (self.lock.queue.get()) |node| {
+                    self.lock.loop.onNextTick(node);
+                    return;
+                }
+
+                // Release the lock again.
+                _ = @atomicRmw(u8, &self.lock.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                _ = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+                // Find out if we can be done.
+                if (@atomicLoad(u8, &self.lock.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
+                    return;
+                }
+            }
+        }
+    };
+
+    pub fn init(loop: *Loop) Lock {
+        return Lock{
+            .loop = loop,
+            .shared_bit = 0,
+            .queue = Queue.init(),
+            .queue_empty_bit = 1,
+        };
+    }
+
+    /// Must be called when not locked. Not thread safe.
+    /// All calls to acquire() and release() must complete before calling deinit().
+    pub fn deinit(self: *Lock) void {
+        assert(self.shared_bit == 0);
+        while (self.queue.get()) |node| cancel node.data;
+    }
+
+    pub async fn acquire(self: *Lock) Held {
+        var my_tick_node: Loop.NextTickNode = undefined;
+
+        s: suspend |handle| {
+            my_tick_node.data = handle;
+            self.queue.put(&my_tick_node);
+
+            // At this point, we are in the queue, so we might have already been resumed and this coroutine
+            // frame might be destroyed. For the rest of the suspend block we cannot access the coroutine frame.
+
+            // We set this bit so that later we can rely on the fact, that if queue_empty_bit is 1, some actor
+            // will attempt to grab the lock.
+            _ = @atomicRmw(u8, &self.queue_empty_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+            while (true) {
+                const old_bit = @atomicRmw(u8, &self.shared_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                if (old_bit != 0) {
+                    // We did not obtain the lock. Trust that our queue entry will resume us, and allow
+                    // suspend to complete.
+                    break;
+                }
+                // We got the lock. However we might have already been resumed from the queue.
+                if (self.queue.get()) |node| {
+                    // Whether this node is us or someone else, we tail resume it.
+                    resume node.data;
+                    break;
+                } else {
+                    // We already got resumed, and there are none left in the queue, which means that
+                    // we aren't even supposed to hold the lock right now.
+                    _ = @atomicRmw(u8, &self.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                    _ = @atomicRmw(u8, &self.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+                    // There might be a queue item. If we know the queue is empty, we can be done,
+                    // because the other actor will try to obtain the lock.
+                    // But if there's a queue item, we are the actor which must loop and attempt
+                    // to grab the lock again.
+                    if (@atomicLoad(u8, &self.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
+                        break;
+                    } else {
+                        continue;
+                    }
+                }
+                unreachable;
+            }
+        }
+
+        // TODO this workaround to force my_tick_node to be in the coroutine frame should
+        // not be necessary
+        var trash1 = &my_tick_node;
+
+        return Held{ .lock = self };
+    }
+};
+
+/// Thread-safe async/await lock that protects one piece of data.
+/// Does not make any syscalls - coroutines which are waiting for the lock are suspended, and
+/// are resumed when the lock is released, in order.
+pub fn Locked(comptime T: type) type {
+    return struct {
+        lock: Lock,
+        private_data: T,
+
+        const Self = this;
+
+        pub const HeldLock = struct {
+            value: *T,
+            held: Lock.Held,
+
+            pub fn release(self: HeldLock) void {
+                self.held.release();
+            }
+        };
+
+        pub fn init(loop: *Loop, data: T) Self {
+            return Self{
+                .lock = Lock.init(loop),
+                .private_data = data,
+            };
+        }
+
+        pub fn deinit(self: *Self) void {
+            self.lock.deinit();
+        }
+
+        pub async fn acquire(self: *Self) HeldLock {
+            return HeldLock{
+            // TODO guaranteed allocation elision
+                .held = await (async self.lock.acquire() catch unreachable),
+                .value = &self.private_data,
+            };
+        }
+    };
+}
+
+test "std.event.Lock" {
+    var da = std.heap.DirectAllocator.init();
+    defer da.deinit();
+
+    const allocator = &da.allocator;
+
+    var loop: Loop = undefined;
+    try loop.initMultiThreaded(allocator);
+    defer loop.deinit();
+
+    var lock = Lock.init(&loop);
+    defer lock.deinit();
+
+    const handle = try async<allocator> testLock(&loop, &lock);
+    defer cancel handle;
+    loop.run();
+
+    assert(mem.eql(i32, shared_test_data, [1]i32{3 * 10} ** 10));
+}
+
+async fn testLock(loop: *Loop, lock: *Lock) void {
+    const handle1 = async lockRunner(lock) catch @panic("out of memory");
+    var tick_node1 = Loop.NextTickNode{
+        .next = undefined,
+        .data = handle1,
+    };
+    loop.onNextTick(&tick_node1);
+
+    const handle2 = async lockRunner(lock) catch @panic("out of memory");
+    var tick_node2 = Loop.NextTickNode{
+        .next = undefined,
+        .data = handle2,
+    };
+    loop.onNextTick(&tick_node2);
+
+    const handle3 = async lockRunner(lock) catch @panic("out of memory");
+    var tick_node3 = Loop.NextTickNode{
+        .next = undefined,
+        .data = handle3,
+    };
+    loop.onNextTick(&tick_node3);
+
+    await handle1;
+    await handle2;
+    await handle3;
+
+    // TODO this is to force tick node memory to be in the coro frame
+    // there should be a way to make it explicit where the memory is
+    var a = &tick_node1;
+    var b = &tick_node2;
+    var c = &tick_node3;
+}
+
+var shared_test_data = [1]i32{0} ** 10;
+var shared_test_index: usize = 0;
+
+async fn lockRunner(lock: *Lock) void {
+    suspend; // resumed by onNextTick
+
+    var i: usize = 0;
+    while (i < 10) : (i += 1) {
+        const handle = await (async lock.acquire() catch @panic("out of memory"));
+        defer handle.release();
+
+        shared_test_index = 0;
+        while (shared_test_index < shared_test_data.len) : (shared_test_index += 1) {
+            shared_test_data[shared_test_index] = shared_test_data[shared_test_index] + 1;
+        }
+    }
+}
diff --git a/std/heap.zig b/std/heap.zig
index 2e02733da1..bcace34afe 100644
--- a/std/heap.zig
+++ b/std/heap.zig
@@ -38,7 +38,7 @@ fn cFree(self: *Allocator, old_mem: []u8) void {
 }
 
 /// This allocator makes a syscall directly for every allocation and free.
-/// TODO make this thread-safe. The windows implementation will need some atomics.
+/// Thread-safe and lock-free.
 pub const DirectAllocator = struct {
     allocator: Allocator,
     heap_handle: ?HeapHandle,
@@ -74,34 +74,34 @@ pub const DirectAllocator = struct {
                 const alloc_size = if (alignment <= os.page_size) n else n + alignment;
                 const addr = p.mmap(null, alloc_size, p.PROT_READ | p.PROT_WRITE, p.MAP_PRIVATE | p.MAP_ANONYMOUS, -1, 0);
                 if (addr == p.MAP_FAILED) return error.OutOfMemory;
-
                 if (alloc_size == n) return @intToPtr([*]u8, addr)[0..n];
 
-                var aligned_addr = addr & ~usize(alignment - 1);
-                aligned_addr += alignment;
+                const aligned_addr = (addr & ~usize(alignment - 1)) + alignment;
 
-                //We can unmap the unused portions of our mmap, but we must only
-                //  pass munmap bytes that exist outside our allocated pages or it
-                //  will happily eat us too
+                // We can unmap the unused portions of our mmap, but we must only
+                // pass munmap bytes that exist outside our allocated pages or it
+                // will happily eat us too.
 
-                //Since alignment > page_size, we are by definition on a page boundry
+                // Since alignment > page_size, we are by definition on a page boundary.
                 const unused_start = addr;
                 const unused_len = aligned_addr - 1 - unused_start;
 
-                var err = p.munmap(unused_start, unused_len);
-                debug.assert(p.getErrno(err) == 0);
+                const err = p.munmap(unused_start, unused_len);
+                assert(p.getErrno(err) == 0);
 
-                //It is impossible that there is an unoccupied page at the top of our
-                //  mmap.
+                // It is impossible that there is an unoccupied page at the top of our
+                // mmap.
 
                 return @intToPtr([*]u8, aligned_addr)[0..n];
             },
             Os.windows => {
                 const amt = n + alignment + @sizeOf(usize);
-                const heap_handle = self.heap_handle orelse blk: {
+                const optional_heap_handle = @atomicLoad(?HeapHandle, ?self.heap_handle, builtin.AtomicOrder.SeqCst);
+                const heap_handle = optional_heap_handle orelse blk: {
                     const hh = os.windows.HeapCreate(os.windows.HEAP_NO_SERIALIZE, amt, 0) orelse return error.OutOfMemory;
-                    self.heap_handle = hh;
-                    break :blk hh;
+                    const other_hh = @cmpxchgStrong(?HeapHandle, &self.heap_handle, null, hh, builtin.AtomicOrder.SeqCst, builtin.AtomicOrder.SeqCst) orelse break :blk hh;
+                    _ = os.windows.HeapDestroy(hh);
+                    break :blk other_hh;
                 };
                 const ptr = os.windows.HeapAlloc(heap_handle, 0, amt) orelse return error.OutOfMemory;
                 const root_addr = @ptrToInt(ptr);
diff --git a/std/mem.zig b/std/mem.zig
index b52d3e9f68..555e1e249d 100644
--- a/std/mem.zig
+++ b/std/mem.zig
@@ -6,7 +6,7 @@ const builtin = @import("builtin");
 const mem = this;
 
 pub const Allocator = struct {
-    const Error = error{OutOfMemory};
+    pub const Error = error{OutOfMemory};
 
     /// Allocate byte_count bytes and return them in a slice, with the
     /// slice's pointer aligned at least to alignment bytes.
diff --git a/std/os/index.zig b/std/os/index.zig
index 52b36c351c..74a1b64f6e 100644
--- a/std/os/index.zig
+++ b/std/os/index.zig
@@ -2309,6 +2309,30 @@ pub fn linuxEpollWait(epfd: i32, events: []linux.epoll_event, timeout: i32) usiz
     }
 }
 
+pub const LinuxEventFdError = error{
+    InvalidFlagValue,
+    SystemResources,
+    ProcessFdQuotaExceeded,
+    SystemFdQuotaExceeded,
+
+    Unexpected,
+};
+
+pub fn linuxEventFd(initval: u32, flags: u32) LinuxEventFdError!i32 {
+    const rc = posix.eventfd(initval, flags);
+    const err = posix.getErrno(rc);
+    switch (err) {
+        0 => return @intCast(i32, rc),
+        else => return unexpectedErrorPosix(err),
+
+        posix.EINVAL => return LinuxEventFdError.InvalidFlagValue,
+        posix.EMFILE => return LinuxEventFdError.ProcessFdQuotaExceeded,
+        posix.ENFILE => return LinuxEventFdError.SystemFdQuotaExceeded,
+        posix.ENODEV => return LinuxEventFdError.SystemResources,
+        posix.ENOMEM => return LinuxEventFdError.SystemResources,
+    }
+}
+
 pub const PosixGetSockNameError = error{
     /// Insufficient resources were available in the system to perform the operation.
     SystemResources,
@@ -2605,10 +2629,17 @@ pub fn spawnThread(context: var, comptime startFn: var) SpawnThreadError!*Thread
 
     const MainFuncs = struct {
         extern fn linuxThreadMain(ctx_addr: usize) u8 {
-            if (@sizeOf(Context) == 0) {
-                return startFn({});
-            } else {
-                return startFn(@intToPtr(*const Context, ctx_addr).*);
+            const arg = if (@sizeOf(Context) == 0) {} else @intToPtr(*const Context, ctx_addr).*;
+
+            switch (@typeId(@typeOf(startFn).ReturnType)) {
+                builtin.TypeId.Int => {
+                    return startFn(arg);
+                },
+                builtin.TypeId.Void => {
+                    startFn(arg);
+                    return 0;
+                },
+                else => @compileError("expected return type of startFn to be 'u8', 'noreturn', 'void', or '!void'"),
             }
         }
         extern fn posixThreadMain(ctx: ?*c_void) ?*c_void {
diff --git a/std/os/linux/index.zig b/std/os/linux/index.zig
index 65aa659c82..1c15be4887 100644
--- a/std/os/linux/index.zig
+++ b/std/os/linux/index.zig
@@ -523,6 +523,10 @@ pub const CLONE_NEWPID = 0x20000000;
 pub const CLONE_NEWNET = 0x40000000;
 pub const CLONE_IO = 0x80000000;
 
+pub const EFD_SEMAPHORE = 1;
+pub const EFD_CLOEXEC = O_CLOEXEC;
+pub const EFD_NONBLOCK = O_NONBLOCK;
+
 pub const MS_RDONLY = 1;
 pub const MS_NOSUID = 2;
 pub const MS_NODEV = 4;
@@ -1221,6 +1225,10 @@ pub fn epoll_wait(epoll_fd: i32, events: [*]epoll_event, maxevents: u32, timeout
     return syscall4(SYS_epoll_wait, @intCast(usize, epoll_fd), @ptrToInt(events), @intCast(usize, maxevents), @intCast(usize, timeout));
 }
 
+pub fn eventfd(count: u32, flags: u32) usize {
+    return syscall2(SYS_eventfd2, count, flags);
+}
+
 pub fn timerfd_create(clockid: i32, flags: u32) usize {
     return syscall2(SYS_timerfd_create, @intCast(usize, clockid), @intCast(usize, flags));
 }

From 57f36c420124b3b65d3036f10c4e8c675be29cf4 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Thu, 5 Jul 2018 18:16:46 -0400
Subject: [PATCH 10/35] std.event.Loop: use EPOLLONESHOT to save 1 syscall

when a thread pool worker accepts a coroutine to resume
---
 std/event.zig | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/std/event.zig b/std/event.zig
index 2d69d0cb16..5fd87b8fdd 100644
--- a/std/event.zig
+++ b/std/event.zig
@@ -132,6 +132,7 @@ pub const Loop = struct {
 
         pub const EventFd = struct {
             base: ResumeNode,
+            epoll_op: u32,
             eventfd: i32,
         };
     };
@@ -204,6 +205,7 @@ pub const Loop = struct {
                                 .handle = undefined,
                             },
                             .eventfd = try std.os.linuxEventFd(1, posix.EFD_CLOEXEC | posix.EFD_NONBLOCK),
+                            .epoll_op = posix.EPOLL_CTL_ADD,
                         },
                         .next = undefined,
                     };
@@ -265,15 +267,20 @@ pub const Loop = struct {
         errdefer {
             _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
         }
-        try self.addFdNoCounter(fd, resume_node);
+        try self.modFd(
+            fd,
+            posix.EPOLL_CTL_ADD,
+            std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET,
+            resume_node,
+        );
     }
 
-    fn addFdNoCounter(self: *Loop, fd: i32, resume_node: *ResumeNode) !void {
+    pub fn modFd(self: *Loop, fd: i32, op: u32, events: u32, resume_node: *ResumeNode) !void {
         var ev = std.os.linux.epoll_event{
-            .events = std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET,
+            .events = events,
             .data = std.os.linux.epoll_data{ .ptr = @ptrToInt(resume_node) },
         };
-        try std.os.linuxEpollCtl(self.os_data.epollfd, std.os.linux.EPOLL_CTL_ADD, fd, &ev);
+        try std.os.linuxEpollCtl(self.os_data.epollfd, op, fd, &ev);
     }
 
     pub fn removeFd(self: *Loop, fd: i32) void {
@@ -331,7 +338,8 @@ pub const Loop = struct {
                         const eventfd_node = &resume_stack_node.data;
                         eventfd_node.base.handle = handle;
                         // the pending count is already accounted for
-                        self.addFdNoCounter(eventfd_node.eventfd, &eventfd_node.base) catch |_| {
+                        const epoll_events = posix.EPOLLONESHOT | std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET;
+                        self.modFd(eventfd_node.eventfd, eventfd_node.epoll_op, epoll_events, &eventfd_node.base) catch |_| {
                             // fine, we didn't need it anyway
                             _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
                             self.os_data.available_eventfd_resume_nodes.push(resume_stack_node);
@@ -371,7 +379,7 @@ pub const Loop = struct {
                     ResumeNode.Id.Stop => return,
                     ResumeNode.Id.EventFd => {
                         const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
-                        self.removeFdNoCounter(event_fd_node.eventfd);
+                        event_fd_node.epoll_op = posix.EPOLL_CTL_MOD;
                         const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
                         self.os_data.available_eventfd_resume_nodes.push(stack_node);
                     },
@@ -902,7 +910,7 @@ test "std.event.Lock" {
     defer cancel handle;
     loop.run();
 
-    assert(mem.eql(i32, shared_test_data, [1]i32{3 * 10} ** 10));
+    assert(mem.eql(i32, shared_test_data, [1]i32{3 * @intCast(i32, shared_test_data.len)} ** shared_test_data.len));
 }
 
 async fn testLock(loop: *Loop, lock: *Lock) void {
@@ -945,7 +953,7 @@ async fn lockRunner(lock: *Lock) void {
     suspend; // resumed by onNextTick
 
     var i: usize = 0;
-    while (i < 10) : (i += 1) {
+    while (i < shared_test_data.len) : (i += 1) {
         const handle = await (async lock.acquire() catch @panic("out of memory"));
         defer handle.release();
 

From c15a6fa9d0e11398f65e8ecc1903e07f4c57add6 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Sat, 7 Jul 2018 01:23:18 -0400
Subject: [PATCH 11/35] add std.os.cpuCount and have std.event.Loop use it for
 thread pool size

---
 std/event.zig          |  4 +--
 std/heap.zig           | 67 ++++++++++++++++++++++++++++++++++++++++++
 std/os/index.zig       | 39 ++++++++++++++++++++++++
 std/os/linux/index.zig |  4 +++
 std/os/test.zig        |  5 ++++
 5 files changed, 117 insertions(+), 2 deletions(-)

diff --git a/std/event.zig b/std/event.zig
index 5fd87b8fdd..f0c45f61bc 100644
--- a/std/event.zig
+++ b/std/event.zig
@@ -150,8 +150,8 @@ pub const Loop = struct {
     /// TODO copy elision / named return values so that the threads referencing *Loop
     /// have the correct pointer value.
     fn initMultiThreaded(self: *Loop, allocator: *mem.Allocator) !void {
-        // TODO check the actual cpu core count
-        return self.initInternal(allocator, 4);
+        const core_count = try std.os.cpuCount(allocator);
+        return self.initInternal(allocator, core_count);
     }
 
     /// Thread count is the total thread count. The thread pool size will be
diff --git a/std/heap.zig b/std/heap.zig
index bcace34afe..6d3fd05cdb 100644
--- a/std/heap.zig
+++ b/std/heap.zig
@@ -361,6 +361,73 @@ pub const ThreadSafeFixedBufferAllocator = struct {
     fn free(allocator: *Allocator, bytes: []u8) void {}
 };
 
+pub fn stackFallback(comptime size: usize, fallback_allocator: *Allocator) StackFallbackAllocator(size) {
+    return StackFallbackAllocator(size){
+        .buffer = undefined,
+        .fallback_allocator = fallback_allocator,
+        .fixed_buffer_allocator = undefined,
+        .allocator = Allocator{
+            .allocFn = StackFallbackAllocator(size).alloc,
+            .reallocFn = StackFallbackAllocator(size).realloc,
+            .freeFn = StackFallbackAllocator(size).free,
+        },
+    };
+}
+
+pub fn StackFallbackAllocator(comptime size: usize) type {
+    return struct {
+        const Self = this;
+
+        buffer: [size]u8,
+        allocator: Allocator,
+        fallback_allocator: *Allocator,
+        fixed_buffer_allocator: FixedBufferAllocator,
+
+        pub fn get(self: *Self) *Allocator {
+            self.fixed_buffer_allocator = FixedBufferAllocator.init(self.buffer[0..]);
+            return &self.allocator;
+        }
+
+        fn alloc(allocator: *Allocator, n: usize, alignment: u29) ![]u8 {
+            const self = @fieldParentPtr(Self, "allocator", allocator);
+            return FixedBufferAllocator.alloc(&self.fixed_buffer_allocator.allocator, n, alignment) catch
+                self.fallback_allocator.allocFn(self.fallback_allocator, n, alignment);
+        }
+
+        fn realloc(allocator: *Allocator, old_mem: []u8, new_size: usize, alignment: u29) ![]u8 {
+            const self = @fieldParentPtr(Self, "allocator", allocator);
+            const in_buffer = @ptrToInt(old_mem.ptr) >= @ptrToInt(&self.buffer) and
+                @ptrToInt(old_mem.ptr) < @ptrToInt(&self.buffer) + self.buffer.len;
+            if (in_buffer) {
+                return FixedBufferAllocator.realloc(
+                    &self.fixed_buffer_allocator.allocator,
+                    old_mem,
+                    new_size,
+                    alignment,
+                ) catch {
+                    const result = try self.fallback_allocator.allocFn(
+                        self.fallback_allocator,
+                        new_size,
+                        alignment,
+                    );
+                    mem.copy(u8, result, old_mem);
+                    return result;
+                };
+            }
+            return self.fallback_allocator.reallocFn(self.fallback_allocator, old_mem, new_size, alignment);
+        }
+
+        fn free(allocator: *Allocator, bytes: []u8) void {
+            const self = @fieldParentPtr(Self, "allocator", allocator);
+            const in_buffer = @ptrToInt(bytes.ptr) >= @ptrToInt(&self.buffer) and
+                @ptrToInt(bytes.ptr) < @ptrToInt(&self.buffer) + self.buffer.len;
+            if (!in_buffer) {
+                return self.fallback_allocator.freeFn(self.fallback_allocator, bytes);
+            }
+        }
+    };
+}
+
 test "c_allocator" {
     if (builtin.link_libc) {
         var slice = c_allocator.alloc(u8, 50) catch return;
diff --git a/std/os/index.zig b/std/os/index.zig
index 74a1b64f6e..c36aae91da 100644
--- a/std/os/index.zig
+++ b/std/os/index.zig
@@ -2748,3 +2748,42 @@ pub fn posixFStat(fd: i32) !posix.Stat {
 
     return stat;
 }
+
+pub const CpuCountError = error{
+    OutOfMemory,
+    PermissionDenied,
+    Unexpected,
+};
+
+pub fn cpuCount(fallback_allocator: *mem.Allocator) CpuCountError!usize {
+    const usize_count = 16;
+    const allocator = std.heap.stackFallback(usize_count * @sizeOf(usize), fallback_allocator).get();
+
+    var set = try allocator.alloc(usize, usize_count);
+    defer allocator.free(set);
+
+    while (true) {
+        const rc = posix.sched_getaffinity(0, set);
+        const err = posix.getErrno(rc);
+        switch (err) {
+            0 => {
+                if (rc < set.len * @sizeOf(usize)) {
+                    const result = set[0 .. rc / @sizeOf(usize)];
+                    var sum: usize = 0;
+                    for (result) |x| {
+                        sum += @popCount(x);
+                    }
+                    return sum;
+                } else {
+                    set = try allocator.realloc(usize, set, set.len * 2);
+                    continue;
+                }
+            },
+            posix.EFAULT => unreachable,
+            posix.EINVAL => unreachable,
+            posix.EPERM => return CpuCountError.PermissionDenied,
+            posix.ESRCH => unreachable,
+            else => return os.unexpectedErrorPosix(err),
+        }
+    }
+}
diff --git a/std/os/linux/index.zig b/std/os/linux/index.zig
index 1c15be4887..69bc30bad0 100644
--- a/std/os/linux/index.zig
+++ b/std/os/linux/index.zig
@@ -1197,6 +1197,10 @@ pub fn fremovexattr(fd: usize, name: [*]const u8) usize {
     return syscall2(SYS_fremovexattr, fd, @ptrToInt(name));
 }
 
+pub fn sched_getaffinity(pid: i32, set: []usize) usize {
+    return syscall3(SYS_sched_getaffinity, @bitCast(usize, isize(pid)), set.len * @sizeOf(usize), @ptrToInt(set.ptr));
+}
+
 pub const epoll_data = packed union {
     ptr: usize,
     fd: i32,
diff --git a/std/os/test.zig b/std/os/test.zig
index 5a977a569a..52e6ffdc1c 100644
--- a/std/os/test.zig
+++ b/std/os/test.zig
@@ -58,3 +58,8 @@ fn start2(ctx: *i32) u8 {
     _ = @atomicRmw(i32, ctx, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
     return 0;
 }
+
+test "cpu count" {
+    const cpu_count = try std.os.cpuCount(a);
+    assert(cpu_count >= 1);
+}

From ced3aae3b2371479c01b4abba42c751697185d7b Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Sat, 7 Jul 2018 20:31:50 -0400
Subject: [PATCH 12/35] cleaner output from zig build when there are compile
 errors

---
 std/debug/index.zig          | 62 ++++++++++++++++++++++++------------
 std/special/build_runner.zig |  9 ++++--
 2 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/std/debug/index.zig b/std/debug/index.zig
index 57b2dfc300..0e2a3a8d39 100644
--- a/std/debug/index.zig
+++ b/std/debug/index.zig
@@ -156,7 +156,7 @@ pub fn writeStackTrace(stack_trace: *const builtin.StackTrace, out_stream: var,
         frame_index = (frame_index + 1) % stack_trace.instruction_addresses.len;
     }) {
         const return_address = stack_trace.instruction_addresses[frame_index];
-        try printSourceAtAddress(debug_info, out_stream, return_address);
+        try printSourceAtAddress(debug_info, out_stream, return_address, tty_color);
     }
 }
 
@@ -189,13 +189,11 @@ pub fn writeCurrentStackTrace(out_stream: var, allocator: *mem.Allocator, debug_
                 }
             },
         }
-        try printSourceAtAddress(debug_info, out_stream, return_address);
+        try printSourceAtAddress(debug_info, out_stream, return_address, tty_color);
     }
 }
 
-fn printSourceAtAddress(debug_info: *ElfStackTrace, out_stream: var, address: usize) !void {
-    const ptr_hex = "0x{x}";
-
+fn printSourceAtAddress(debug_info: *ElfStackTrace, out_stream: var, address: usize, tty_color: bool) !void {
     switch (builtin.os) {
         builtin.Os.windows => return error.UnsupportedDebugInfo,
         builtin.Os.macosx => {
@@ -209,36 +207,58 @@ fn printSourceAtAddress(debug_info: *ElfStackTrace, out_stream: var, address: us
                 .address = address,
             };
             const symbol = debug_info.symbol_table.search(address) orelse &unknown;
-            try out_stream.print(WHITE ++ "{}" ++ RESET ++ ": " ++ DIM ++ ptr_hex ++ " in ??? (???)" ++ RESET ++ "\n", symbol.name, address);
+            try out_stream.print(WHITE ++ "{}" ++ RESET ++ ": " ++ DIM ++ "0x{x}" ++ " in ??? (???)" ++ RESET ++ "\n", symbol.name, address);
         },
         else => {
             const compile_unit = findCompileUnit(debug_info, address) catch {
-                try out_stream.print("???:?:?: " ++ DIM ++ ptr_hex ++ " in ??? (???)" ++ RESET ++ "\n    ???\n\n", address);
+                if (tty_color) {
+                    try out_stream.print("???:?:?: " ++ DIM ++ "0x{x} in ??? (???)" ++ RESET ++ "\n    ???\n\n", address);
+                } else {
+                    try out_stream.print("???:?:?: 0x{x} in ??? (???)\n    ???\n\n", address);
+                }
                 return;
             };
             const compile_unit_name = try compile_unit.die.getAttrString(debug_info, DW.AT_name);
             if (getLineNumberInfo(debug_info, compile_unit, address - 1)) |line_info| {
                 defer line_info.deinit();
-                try out_stream.print(WHITE ++ "{}:{}:{}" ++ RESET ++ ": " ++ DIM ++ ptr_hex ++ " in ??? ({})" ++ RESET ++ "\n", line_info.file_name, line_info.line, line_info.column, address, compile_unit_name);
-                if (printLineFromFile(debug_info.allocator(), out_stream, line_info)) {
-                    if (line_info.column == 0) {
-                        try out_stream.write("\n");
-                    } else {
-                        {
-                            var col_i: usize = 1;
-                            while (col_i < line_info.column) : (col_i += 1) {
-                                try out_stream.writeByte(' ');
+                if (tty_color) {
+                    try out_stream.print(
+                        WHITE ++ "{}:{}:{}" ++ RESET ++ ": " ++ DIM ++ "0x{x} in ??? ({})" ++ RESET ++ "\n",
+                        line_info.file_name,
+                        line_info.line,
+                        line_info.column,
+                        address,
+                        compile_unit_name,
+                    );
+                    if (printLineFromFile(debug_info.allocator(), out_stream, line_info)) {
+                        if (line_info.column == 0) {
+                            try out_stream.write("\n");
+                        } else {
+                            {
+                                var col_i: usize = 1;
+                                while (col_i < line_info.column) : (col_i += 1) {
+                                    try out_stream.writeByte(' ');
+                                }
                             }
+                            try out_stream.write(GREEN ++ "^" ++ RESET ++ "\n");
                         }
-                        try out_stream.write(GREEN ++ "^" ++ RESET ++ "\n");
+                    } else |err| switch (err) {
+                        error.EndOfFile => {},
+                        else => return err,
                     }
-                } else |err| switch (err) {
-                    error.EndOfFile => {},
-                    else => return err,
+                } else {
+                    try out_stream.print(
+                        "{}:{}:{}: 0x{x} in ??? ({})\n",
+                        line_info.file_name,
+                        line_info.line,
+                        line_info.column,
+                        address,
+                        compile_unit_name,
+                    );
                 }
             } else |err| switch (err) {
                 error.MissingDebugInfo, error.InvalidDebugInfo => {
-                    try out_stream.print(ptr_hex ++ " in ??? ({})\n", address, compile_unit_name);
+                    try out_stream.print("0x{x} in ??? ({})\n", address, compile_unit_name);
                 },
                 else => return err,
             }
diff --git a/std/special/build_runner.zig b/std/special/build_runner.zig
index e4f04df6d0..2f073b3e98 100644
--- a/std/special/build_runner.zig
+++ b/std/special/build_runner.zig
@@ -122,10 +122,13 @@ pub fn main() !void {
         return usageAndErr(&builder, true, try stderr_stream);
 
     builder.make(targets.toSliceConst()) catch |err| {
-        if (err == error.InvalidStepName) {
-            return usageAndErr(&builder, true, try stderr_stream);
+        switch (err) {
+            error.InvalidStepName => {
+                return usageAndErr(&builder, true, try stderr_stream);
+            },
+            error.UncleanExit => os.exit(1),
+            else => return err,
         }
-        return err;
     };
 }
 

From 410b4d9bdf8abb8dad2ac2e11038fe492b8be869 Mon Sep 17 00:00:00 2001
From: Josh Wolfe <thejoshwolfe@gmail.com>
Date: Sun, 8 Jul 2018 00:00:05 -0400
Subject: [PATCH 13/35] builder.addBuildOption

---
 std/build.zig | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/std/build.zig b/std/build.zig
index 99de9b5197..24fa85383a 100644
--- a/std/build.zig
+++ b/std/build.zig
@@ -814,6 +814,7 @@ pub const LibExeObjStep = struct {
     out_h_filename: []const u8,
     assembly_files: ArrayList([]const u8),
     packages: ArrayList(Pkg),
+    build_options_contents: std.Buffer,
 
     // C only stuff
     source_files: ArrayList([]const u8),
@@ -905,6 +906,7 @@ pub const LibExeObjStep = struct {
             .lib_paths = ArrayList([]const u8).init(builder.allocator),
             .object_src = undefined,
             .disable_libc = true,
+            .build_options_contents = std.Buffer.initSize(builder.allocator, 0) catch unreachable,
         };
         self.computeOutFileNames();
         return self;
@@ -945,6 +947,7 @@ pub const LibExeObjStep = struct {
             .out_h_filename = undefined,
             .assembly_files = undefined,
             .packages = undefined,
+            .build_options_contents = undefined,
         };
         self.computeOutFileNames();
         return self;
@@ -1096,6 +1099,12 @@ pub const LibExeObjStep = struct {
         self.include_dirs.append(self.builder.cache_root) catch unreachable;
     }
 
+    pub fn addBuildOption(self: *LibExeObjStep, comptime T: type, name: []const u8, value: T) void {
+        assert(self.is_zig);
+        const out = &std.io.BufferOutStream.init(&self.build_options_contents).stream;
+        out.print("pub const {} = {};\n", name, value) catch unreachable;
+    }
+
     pub fn addIncludeDir(self: *LibExeObjStep, path: []const u8) void {
         self.include_dirs.append(path) catch unreachable;
     }
@@ -1155,6 +1164,15 @@ pub const LibExeObjStep = struct {
             zig_args.append(builder.pathFromRoot(root_src)) catch unreachable;
         }
 
+        if (self.build_options_contents.len() > 0) {
+            const build_options_file = try os.path.join(builder.allocator, builder.cache_root, builder.fmt("{}_build_options.zig", self.name));
+            try std.io.writeFile(builder.allocator, build_options_file, self.build_options_contents.toSliceConst());
+            try zig_args.append("--pkg-begin");
+            try zig_args.append("build_options");
+            try zig_args.append(builder.pathFromRoot(build_options_file));
+            try zig_args.append("--pkg-end");
+        }
+
         for (self.object_files.toSliceConst()) |object_file| {
             zig_args.append("--object") catch unreachable;
             zig_args.append(builder.pathFromRoot(object_file)) catch unreachable;

From 50d70d5f498470790f6d58b5e3018e0d89c2c9f3 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Sun, 8 Jul 2018 02:43:30 -0400
Subject: [PATCH 14/35] tests passing with kqueue on macos

---
 std/c/darwin.zig  |  56 ++++++++++
 std/event.zig     | 277 +++++++++++++++++++++++++++++++++++-----------
 std/os/darwin.zig | 131 ++++++++++++++++++++++
 std/os/index.zig  |  56 ++++++++++
 4 files changed, 454 insertions(+), 66 deletions(-)

diff --git a/std/c/darwin.zig b/std/c/darwin.zig
index e3b53d9bea..cbaa2f6811 100644
--- a/std/c/darwin.zig
+++ b/std/c/darwin.zig
@@ -6,6 +6,13 @@ pub extern "c" fn __getdirentries64(fd: c_int, buf_ptr: [*]u8, buf_len: usize, b
 pub extern "c" fn mach_absolute_time() u64;
 pub extern "c" fn mach_timebase_info(tinfo: ?*mach_timebase_info_data) void;
 
+pub extern "c" fn kqueue() c_int;
+pub extern "c" fn kevent(kq: c_int, changelist: [*]const Kevent, nchanges: c_int,
+    eventlist: [*]Kevent, nevents: c_int, timeout: ?*const timespec) c_int;
+
+pub extern "c" fn kevent64(kq: c_int, changelist: [*]const kevent64_s, nchanges: c_int,
+    eventlist: [*]kevent64_s, nevents: c_int, flags: c_uint, timeout: ?*const timespec) c_int;
+
 pub use @import("../os/darwin_errno.zig");
 
 pub const _errno = __error;
@@ -86,3 +93,52 @@ pub const pthread_attr_t = extern struct {
     __sig: c_long,
     __opaque: [56]u8,
 };
+
+/// Renamed from `kevent` to `Kevent` to avoid conflict with function name.
+pub const Kevent = extern struct {
+    ident: usize,
+    filter: i16,
+    flags: u16,
+    fflags: u32,
+    data: isize,
+    udata: usize,
+};
+
+// sys/types.h on macos uses #pragma pack(4) so these checks are
+// to make sure the struct is laid out the same. These values were
+// produced from C code using the offsetof macro.
+const std = @import("../index.zig");
+const assert = std.debug.assert;
+
+comptime {
+    assert(@offsetOf(Kevent, "ident") == 0);
+    assert(@offsetOf(Kevent, "filter") == 8);
+    assert(@offsetOf(Kevent, "flags") == 10);
+    assert(@offsetOf(Kevent, "fflags") == 12);
+    assert(@offsetOf(Kevent, "data") == 16);
+    assert(@offsetOf(Kevent, "udata") == 24);
+}
+
+pub const kevent64_s = extern struct {
+    ident: u64,
+    filter: i16,
+    flags: u16,
+    fflags: u32,
+    data: i64,
+    udata: u64,
+    ext: [2]u64,
+};
+
+// sys/types.h on macos uses #pragma pack() so these checks are
+// to make sure the struct is laid out the same. These values were
+// produced from C code using the offsetof macro.
+comptime {
+    assert(@offsetOf(kevent64_s, "ident") == 0);
+    assert(@offsetOf(kevent64_s, "filter") == 8);
+    assert(@offsetOf(kevent64_s, "flags") == 10);
+    assert(@offsetOf(kevent64_s, "fflags") == 12);
+    assert(@offsetOf(kevent64_s, "data") == 16);
+    assert(@offsetOf(kevent64_s, "udata") == 24);
+    assert(@offsetOf(kevent64_s, "ext") == 32);
+}
+
diff --git a/std/event.zig b/std/event.zig
index f0c45f61bc..12aa2a3fc7 100644
--- a/std/event.zig
+++ b/std/event.zig
@@ -118,6 +118,11 @@ pub const Loop = struct {
     extra_threads: []*std.os.Thread,
     final_resume_node: ResumeNode,
 
+    // pre-allocated eventfds. all permanently active.
+    // this is how we send promises to be resumed on other threads.
+    available_eventfd_resume_nodes: std.atomic.Stack(ResumeNode.EventFd),
+    eventfd_resume_nodes: []std.atomic.Stack(ResumeNode.EventFd).Node,
+
     pub const NextTickNode = std.atomic.QueueMpsc(promise).Node;
 
     pub const ResumeNode = struct {
@@ -130,10 +135,17 @@ pub const Loop = struct {
             EventFd,
         };
 
-        pub const EventFd = struct {
-            base: ResumeNode,
-            epoll_op: u32,
-            eventfd: i32,
+        pub const EventFd = switch (builtin.os) {
+            builtin.Os.macosx => struct {
+                base: ResumeNode,
+                kevent: posix.Kevent,
+            },
+            builtin.Os.linux => struct {
+                base: ResumeNode,
+                epoll_op: u32,
+                eventfd: i32,
+            },
+            else => @compileError("unsupported OS"),
         };
     };
 
@@ -168,36 +180,41 @@ pub const Loop = struct {
                 .id = ResumeNode.Id.Stop,
                 .handle = undefined,
             },
+            .available_eventfd_resume_nodes = std.atomic.Stack(ResumeNode.EventFd).init(),
+            .eventfd_resume_nodes = undefined,
         };
-        try self.initOsData(thread_count);
+        const extra_thread_count = thread_count - 1;
+        self.eventfd_resume_nodes = try self.allocator.alloc(
+            std.atomic.Stack(ResumeNode.EventFd).Node,
+            extra_thread_count,
+        );
+        errdefer self.allocator.free(self.eventfd_resume_nodes);
+
+        self.extra_threads = try self.allocator.alloc(*std.os.Thread, extra_thread_count);
+        errdefer self.allocator.free(self.extra_threads);
+
+        try self.initOsData(extra_thread_count);
         errdefer self.deinitOsData();
     }
 
     /// must call stop before deinit
     pub fn deinit(self: *Loop) void {
         self.deinitOsData();
+        self.allocator.free(self.extra_threads);
     }
 
     const InitOsDataError = std.os.LinuxEpollCreateError || mem.Allocator.Error || std.os.LinuxEventFdError ||
-        std.os.SpawnThreadError || std.os.LinuxEpollCtlError;
+        std.os.SpawnThreadError || std.os.LinuxEpollCtlError || std.os.BsdKEventError;
 
     const wakeup_bytes = []u8{0x1} ** 8;
 
-    fn initOsData(self: *Loop, thread_count: usize) InitOsDataError!void {
+    fn initOsData(self: *Loop, extra_thread_count: usize) InitOsDataError!void {
         switch (builtin.os) {
             builtin.Os.linux => {
-                const extra_thread_count = thread_count - 1;
-                self.os_data.available_eventfd_resume_nodes = std.atomic.Stack(ResumeNode.EventFd).init();
-                self.os_data.eventfd_resume_nodes = try self.allocator.alloc(
-                    std.atomic.Stack(ResumeNode.EventFd).Node,
-                    extra_thread_count,
-                );
-                errdefer self.allocator.free(self.os_data.eventfd_resume_nodes);
-
                 errdefer {
-                    while (self.os_data.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
+                    while (self.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
                 }
-                for (self.os_data.eventfd_resume_nodes) |*eventfd_node| {
+                for (self.eventfd_resume_nodes) |*eventfd_node| {
                     eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
                         .data = ResumeNode.EventFd{
                             .base = ResumeNode{
@@ -209,7 +226,7 @@ pub const Loop = struct {
                         },
                         .next = undefined,
                     };
-                    self.os_data.available_eventfd_resume_nodes.push(eventfd_node);
+                    self.available_eventfd_resume_nodes.push(eventfd_node);
                 }
 
                 self.os_data.epollfd = try std.os.linuxEpollCreate(posix.EPOLL_CLOEXEC);
@@ -228,15 +245,84 @@ pub const Loop = struct {
                     self.os_data.final_eventfd,
                     &self.os_data.final_eventfd_event,
                 );
-                self.extra_threads = try self.allocator.alloc(*std.os.Thread, extra_thread_count);
-                errdefer self.allocator.free(self.extra_threads);
 
                 var extra_thread_index: usize = 0;
                 errdefer {
+                    // writing 8 bytes to an eventfd cannot fail
+                    std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
+                    while (extra_thread_index != 0) {
+                        extra_thread_index -= 1;
+                        self.extra_threads[extra_thread_index].wait();
+                    }
+                }
+                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
+                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
+                }
+            },
+            builtin.Os.macosx => {
+                self.os_data.kqfd = try std.os.bsdKQueue();
+                errdefer std.os.close(self.os_data.kqfd);
+
+                self.os_data.kevents = try self.allocator.alloc(posix.Kevent, extra_thread_count);
+                errdefer self.allocator.free(self.os_data.kevents);
+
+                const eventlist = ([*]posix.Kevent)(undefined)[0..0];
+
+                for (self.eventfd_resume_nodes) |*eventfd_node, i| {
+                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
+                        .data = ResumeNode.EventFd{
+                            .base = ResumeNode{
+                                .id = ResumeNode.Id.EventFd,
+                                .handle = undefined,
+                            },
+                            // this one is for sending events
+                            .kevent = posix.Kevent {
+                                .ident = i,
+                                .filter = posix.EVFILT_USER,
+                                .flags = posix.EV_CLEAR|posix.EV_ADD|posix.EV_DISABLE,
+                                .fflags = 0,
+                                .data = 0,
+                                .udata = @ptrToInt(&eventfd_node.data.base),
+                            },
+                        },
+                        .next = undefined,
+                    };
+                    self.available_eventfd_resume_nodes.push(eventfd_node);
+                    const kevent_array = (*[1]posix.Kevent)(&eventfd_node.data.kevent);
+                    _ = try std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null);
+                    eventfd_node.data.kevent.flags = posix.EV_CLEAR|posix.EV_ENABLE;
+                    eventfd_node.data.kevent.fflags = posix.NOTE_TRIGGER;
+                    // this one is for waiting for events
+                    self.os_data.kevents[i] = posix.Kevent {
+                        .ident = i,
+                        .filter = posix.EVFILT_USER,
+                        .flags = 0,
+                        .fflags = 0,
+                        .data = 0,
+                        .udata = @ptrToInt(&eventfd_node.data.base),
+                    };
+                }
+
+                // Pre-add so that we cannot get error.SystemResources
+                // later when we try to activate it.
+                self.os_data.final_kevent = posix.Kevent{
+                    .ident = extra_thread_count,
+                    .filter = posix.EVFILT_USER,
+                    .flags = posix.EV_ADD | posix.EV_DISABLE,
+                    .fflags = 0,
+                    .data = 0,
+                    .udata = @ptrToInt(&self.final_resume_node),
+                };
+                const kevent_array = (*[1]posix.Kevent)(&self.os_data.final_kevent);
+                _ = try std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null);
+                self.os_data.final_kevent.flags = posix.EV_ENABLE;
+                self.os_data.final_kevent.fflags = posix.NOTE_TRIGGER;
+
+                var extra_thread_index: usize = 0;
+                errdefer {
+                    _ = std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null) catch unreachable;
                     while (extra_thread_index != 0) {
                         extra_thread_index -= 1;
-                        // writing 8 bytes to an eventfd cannot fail
-                        std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
                         self.extra_threads[extra_thread_index].wait();
                     }
                 }
@@ -252,10 +338,12 @@ pub const Loop = struct {
         switch (builtin.os) {
             builtin.Os.linux => {
                 std.os.close(self.os_data.final_eventfd);
-                while (self.os_data.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
+                while (self.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
                 std.os.close(self.os_data.epollfd);
-                self.allocator.free(self.os_data.eventfd_resume_nodes);
-                self.allocator.free(self.extra_threads);
+                self.allocator.free(self.eventfd_resume_nodes);
+            },
+            builtin.Os.macosx => {
+                self.allocator.free(self.os_data.kevents);
             },
             else => {},
         }
@@ -332,21 +420,38 @@ pub const Loop = struct {
                         continue :start_over;
                     }
 
-                    // non-last node, stick it in the epoll set so that
+                    // non-last node, stick it in the epoll/kqueue set so that
                     // other threads can get to it
-                    if (self.os_data.available_eventfd_resume_nodes.pop()) |resume_stack_node| {
+                    if (self.available_eventfd_resume_nodes.pop()) |resume_stack_node| {
                         const eventfd_node = &resume_stack_node.data;
                         eventfd_node.base.handle = handle;
-                        // the pending count is already accounted for
-                        const epoll_events = posix.EPOLLONESHOT | std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET;
-                        self.modFd(eventfd_node.eventfd, eventfd_node.epoll_op, epoll_events, &eventfd_node.base) catch |_| {
-                            // fine, we didn't need it anyway
-                            _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-                            self.os_data.available_eventfd_resume_nodes.push(resume_stack_node);
-                            resume handle;
-                            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                            continue :start_over;
-                        };
+                        switch (builtin.os) {
+                            builtin.Os.macosx => {
+                                const kevent_array = (*[1]posix.Kevent)(&eventfd_node.kevent);
+                                const eventlist = ([*]posix.Kevent)(undefined)[0..0];
+                                _ = std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null) catch |_| {
+                                    // fine, we didn't need it anyway
+                                    _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                                    self.available_eventfd_resume_nodes.push(resume_stack_node);
+                                    resume handle;
+                                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                                    continue :start_over;
+                                };
+                            },
+                            builtin.Os.linux => {
+                                // the pending count is already accounted for
+                                const epoll_events = posix.EPOLLONESHOT | std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET;
+                                self.modFd(eventfd_node.eventfd, eventfd_node.epoll_op, epoll_events, &eventfd_node.base) catch |_| {
+                                    // fine, we didn't need it anyway
+                                    _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                                    self.available_eventfd_resume_nodes.push(resume_stack_node);
+                                    resume handle;
+                                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                                    continue :start_over;
+                                };
+                            },
+                            else => @compileError("unsupported OS"),
+                        }
                     } else {
                         // threads are too busy, can't add another eventfd to wake one up
                         _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
@@ -359,35 +464,74 @@ pub const Loop = struct {
                 const pending_event_count = @atomicLoad(usize, &self.pending_event_count, AtomicOrder.SeqCst);
                 if (pending_event_count == 0) {
                     // cause all the threads to stop
-                    // writing 8 bytes to an eventfd cannot fail
-                    std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
-                    return;
+                    switch (builtin.os) {
+                        builtin.Os.linux => {
+                            // writing 8 bytes to an eventfd cannot fail
+                            std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
+                            return;
+                        },
+                        builtin.Os.macosx => {
+                            const final_kevent = (*[1]posix.Kevent)(&self.os_data.final_kevent);
+                            const eventlist = ([*]posix.Kevent)(undefined)[0..0];
+                            // cannot fail because we already added it and this just enables it
+                            _ = std.os.bsdKEvent(self.os_data.kqfd, final_kevent, eventlist, null) catch unreachable;
+                            return;
+                        },
+                        else => @compileError("unsupported OS"),
+                    }
                 }
 
                 _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
             }
 
-            // only process 1 event so we don't steal from other threads
-            var events: [1]std.os.linux.epoll_event = undefined;
-            const count = std.os.linuxEpollWait(self.os_data.epollfd, events[0..], -1);
-            for (events[0..count]) |ev| {
-                const resume_node = @intToPtr(*ResumeNode, ev.data.ptr);
-                const handle = resume_node.handle;
-                const resume_node_id = resume_node.id;
-                switch (resume_node_id) {
-                    ResumeNode.Id.Basic => {},
-                    ResumeNode.Id.Stop => return,
-                    ResumeNode.Id.EventFd => {
-                        const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
-                        event_fd_node.epoll_op = posix.EPOLL_CTL_MOD;
-                        const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
-                        self.os_data.available_eventfd_resume_nodes.push(stack_node);
-                    },
-                }
-                resume handle;
-                if (resume_node_id == ResumeNode.Id.EventFd) {
-                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                }
+            switch (builtin.os) {
+                builtin.Os.linux => {
+                    // only process 1 event so we don't steal from other threads
+                    var events: [1]std.os.linux.epoll_event = undefined;
+                    const count = std.os.linuxEpollWait(self.os_data.epollfd, events[0..], -1);
+                    for (events[0..count]) |ev| {
+                        const resume_node = @intToPtr(*ResumeNode, ev.data.ptr);
+                        const handle = resume_node.handle;
+                        const resume_node_id = resume_node.id;
+                        switch (resume_node_id) {
+                            ResumeNode.Id.Basic => {},
+                            ResumeNode.Id.Stop => return,
+                            ResumeNode.Id.EventFd => {
+                                const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
+                                event_fd_node.epoll_op = posix.EPOLL_CTL_MOD;
+                                const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
+                                self.available_eventfd_resume_nodes.push(stack_node);
+                            },
+                        }
+                        resume handle;
+                        if (resume_node_id == ResumeNode.Id.EventFd) {
+                            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        }
+                    }
+                },
+                builtin.Os.macosx => {
+                    var eventlist: [1]posix.Kevent = undefined;
+                    const count = std.os.bsdKEvent(self.os_data.kqfd, self.os_data.kevents, eventlist[0..], null) catch unreachable;
+                    for (eventlist[0..count]) |ev| {
+                        const resume_node = @intToPtr(*ResumeNode, ev.udata);
+                        const handle = resume_node.handle;
+                        const resume_node_id = resume_node.id;
+                        switch (resume_node_id) {
+                            ResumeNode.Id.Basic => {},
+                            ResumeNode.Id.Stop => return,
+                            ResumeNode.Id.EventFd => {
+                                const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
+                                const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
+                                self.available_eventfd_resume_nodes.push(stack_node);
+                            },
+                        }
+                        resume handle;
+                        if (resume_node_id == ResumeNode.Id.EventFd) {
+                            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        }
+                    }
+                },
+                else => @compileError("unsupported OS"),
             }
         }
     }
@@ -395,12 +539,13 @@ pub const Loop = struct {
     const OsData = switch (builtin.os) {
         builtin.Os.linux => struct {
             epollfd: i32,
-            // pre-allocated eventfds. all permanently active.
-            // this is how we send promises to be resumed on other threads.
-            available_eventfd_resume_nodes: std.atomic.Stack(ResumeNode.EventFd),
-            eventfd_resume_nodes: []std.atomic.Stack(ResumeNode.EventFd).Node,
             final_eventfd: i32,
-            final_eventfd_event: posix.epoll_event,
+            final_eventfd_event: std.os.linux.epoll_event,
+        },
+        builtin.Os.macosx => struct {
+            kqfd: i32,
+            final_kevent: posix.Kevent,
+            kevents: []posix.Kevent,
         },
         else => struct {},
     };
diff --git a/std/os/darwin.zig b/std/os/darwin.zig
index 15e5608343..7921d1b6f0 100644
--- a/std/os/darwin.zig
+++ b/std/os/darwin.zig
@@ -264,6 +264,119 @@ pub const SIGUSR1 = 30;
 /// user defined signal 2
 pub const SIGUSR2 = 31;
 
+pub const KEVENT_FLAG_NONE = 0x000; /// no flag value
+pub const KEVENT_FLAG_IMMEDIATE = 0x001; /// immediate timeout
+pub const KEVENT_FLAG_ERROR_EVENTS = 0x002; /// output events only include change
+
+pub const EV_ADD = 0x0001; /// add event to kq (implies enable)
+pub const EV_DELETE = 0x0002; /// delete event from kq
+pub const EV_ENABLE = 0x0004; /// enable event
+pub const EV_DISABLE = 0x0008; /// disable event (not reported)
+
+pub const EV_ONESHOT = 0x0010; /// only report one occurrence
+pub const EV_CLEAR = 0x0020; /// clear event state after reporting
+
+/// force immediate event output
+/// ... with or without EV_ERROR
+/// ... use KEVENT_FLAG_ERROR_EVENTS
+///     on syscalls supporting flags
+pub const EV_RECEIPT = 0x0040;
+
+pub const EV_DISPATCH = 0x0080; /// disable event after reporting
+pub const EV_UDATA_SPECIFIC = 0x0100; /// unique kevent per udata value
+
+/// ... in combination with EV_DELETE
+/// will defer delete until udata-specific
+/// event enabled. EINPROGRESS will be
+/// returned to indicate the deferral
+pub const EV_DISPATCH2 = EV_DISPATCH | EV_UDATA_SPECIFIC;
+
+/// report that source has vanished 
+/// ... only valid with EV_DISPATCH2
+pub const EV_VANISHED = 0x0200;
+
+pub const EV_SYSFLAGS = 0xF000; /// reserved by system
+pub const EV_FLAG0 = 0x1000; /// filter-specific flag
+pub const EV_FLAG1 = 0x2000; /// filter-specific flag
+pub const EV_EOF = 0x8000; /// EOF detected
+pub const EV_ERROR = 0x4000; /// error, data contains errno
+
+pub const EV_POLL = EV_FLAG0;
+pub const EV_OOBAND = EV_FLAG1;
+
+pub const EVFILT_READ = -1;
+pub const EVFILT_WRITE = -2;
+pub const EVFILT_AIO = -3; /// attached to aio requests
+pub const EVFILT_VNODE = -4; /// attached to vnodes
+pub const EVFILT_PROC = -5; /// attached to struct proc
+pub const EVFILT_SIGNAL = -6; /// attached to struct proc
+pub const EVFILT_TIMER = -7; /// timers
+pub const EVFILT_MACHPORT = -8; /// Mach portsets
+pub const EVFILT_FS = -9; /// Filesystem events
+pub const EVFILT_USER = -10; /// User events
+pub const EVFILT_VM = -12; /// Virtual memory events
+
+pub const EVFILT_EXCEPT = -15; /// Exception events
+
+pub const EVFILT_SYSCOUNT = 17;
+
+/// On input, NOTE_TRIGGER causes the event to be triggered for output.
+pub const NOTE_TRIGGER = 0x01000000;
+
+pub const NOTE_FFNOP      = 0x00000000; /// ignore input fflags
+pub const NOTE_FFAND      = 0x40000000; /// and fflags
+pub const NOTE_FFOR       = 0x80000000; /// or fflags
+pub const NOTE_FFCOPY     = 0xc0000000; /// copy fflags
+pub const NOTE_FFCTRLMASK = 0xc0000000; /// mask for operations
+pub const NOTE_FFLAGSMASK = 0x00ffffff;
+
+pub const NOTE_LOWAT = 0x00000001; /// low water mark
+
+pub const NOTE_OOB = 0x00000002; /// OOB data
+
+pub const NOTE_DELETE = 0x00000001;      /// vnode was removed
+pub const NOTE_WRITE  = 0x00000002;      /// data contents changed
+pub const NOTE_EXTEND = 0x00000004;      /// size increased
+pub const NOTE_ATTRIB = 0x00000008;      /// attributes changed
+pub const NOTE_LINK   = 0x00000010;      /// link count changed
+pub const NOTE_RENAME = 0x00000020;      /// vnode was renamed
+pub const NOTE_REVOKE = 0x00000040;      /// vnode access was revoked
+pub const NOTE_NONE   = 0x00000080;      /// No specific vnode event: to test for EVFILT_READ      activation
+pub const NOTE_FUNLOCK    = 0x00000100;      /// vnode was unlocked by flock(2)
+
+pub const NOTE_EXIT       = 0x80000000; /// process exited
+pub const NOTE_FORK       = 0x40000000; /// process forked
+pub const NOTE_EXEC       = 0x20000000; /// process exec'd
+pub const NOTE_SIGNAL     = 0x08000000; /// shared with EVFILT_SIGNAL
+pub const NOTE_EXITSTATUS     = 0x04000000; /// exit status to be returned, valid for child       process only
+pub const NOTE_EXIT_DETAIL    = 0x02000000; /// provide details on reasons for exit
+
+pub const NOTE_PDATAMASK  = 0x000fffff; /// mask for signal & exit status
+pub const NOTE_PCTRLMASK  = (~NOTE_PDATAMASK);
+
+pub const NOTE_EXIT_DETAIL_MASK       = 0x00070000;
+pub const NOTE_EXIT_DECRYPTFAIL       = 0x00010000;
+pub const NOTE_EXIT_MEMORY        = 0x00020000;
+pub const NOTE_EXIT_CSERROR       = 0x00040000;
+
+
+pub const NOTE_VM_PRESSURE            = 0x80000000;              /// will react on memory          pressure
+pub const NOTE_VM_PRESSURE_TERMINATE      = 0x40000000;             /// will quit on memory       pressure, possibly after cleaning up dirty state
+pub const NOTE_VM_PRESSURE_SUDDEN_TERMINATE   = 0x20000000;     /// will quit immediately on      memory pressure
+pub const NOTE_VM_ERROR               = 0x10000000;              /// there was an error
+
+pub const NOTE_SECONDS    = 0x00000001; /// data is seconds        
+pub const NOTE_USECONDS   = 0x00000002; /// data is microseconds  
+pub const NOTE_NSECONDS   = 0x00000004; /// data is nanoseconds  
+pub const NOTE_ABSOLUTE   = 0x00000008; /// absolute timeout    
+
+pub const NOTE_LEEWAY = 0x00000010; /// ext[1] holds leeway for power aware timers
+pub const NOTE_CRITICAL   = 0x00000020; /// system does minimal timer coalescing
+pub const NOTE_BACKGROUND = 0x00000040; /// system does maximum timer coalescing
+pub const NOTE_MACH_CONTINUOUS_TIME   = 0x00000080;
+pub const NOTE_MACHTIME =   0x00000100;             /// data is mach absolute time units
+
+
 fn wstatus(x: i32) i32 {
     return x & 0o177;
 }
@@ -385,6 +498,20 @@ pub fn getdirentries64(fd: i32, buf_ptr: [*]u8, buf_len: usize, basep: *i64) usi
     return errnoWrap(@bitCast(isize, c.__getdirentries64(fd, buf_ptr, buf_len, basep)));
 }
 
+pub fn kqueue() usize {
+    return errnoWrap(c.kqueue());
+}
+
+pub fn kevent(kq: i32, changelist: []const Kevent, eventlist: []Kevent, timeout: ?*const timespec) usize {
+    return errnoWrap(c.kevent(kq, changelist.ptr, @intCast(c_int, changelist.len), eventlist.ptr, @intCast(c_int, eventlist.len), timeout,));
+}
+
+pub fn kevent64(kq: i32, changelist: []const kevent64_s, eventlist: []kevent64_s, flags: u32,
+    timeout: ?*const timespec) usize
+{
+    return errnoWrap(c.kevent64(kq, changelist.ptr, changelist.len, eventlist.ptr, eventlist.len, flags, timeout));
+}
+
 pub fn mkdir(path: [*]const u8, mode: u32) usize {
     return errnoWrap(c.mkdir(path, mode));
 }
@@ -474,6 +601,10 @@ pub const dirent = c.dirent;
 pub const sa_family_t = c.sa_family_t;
 pub const sockaddr = c.sockaddr;
 
+/// Renamed from `kevent` to `Kevent` to avoid conflict with the syscall.
+pub const Kevent = c.Kevent;
+pub const kevent64_s = c.kevent64_s;
+
 /// Renamed from `sigaction` to `Sigaction` to avoid conflict with the syscall.
 pub const Sigaction = struct {
     handler: extern fn (i32) void,
diff --git a/std/os/index.zig b/std/os/index.zig
index c36aae91da..15594edcc8 100644
--- a/std/os/index.zig
+++ b/std/os/index.zig
@@ -2787,3 +2787,59 @@ pub fn cpuCount(fallback_allocator: *mem.Allocator) CpuCountError!usize {
         }
     }
 }
+
+pub const BsdKQueueError = error {
+    /// The per-process limit on the number of open file descriptors has been reached.
+    ProcessFdQuotaExceeded,
+
+    /// The system-wide limit on the total number of open files has been reached.
+    SystemFdQuotaExceeded,
+
+    Unexpected,
+};
+
+pub fn bsdKQueue() BsdKQueueError!i32 {
+    const rc = posix.kqueue();
+    const err = posix.getErrno(rc);
+    switch (err) {
+        0 => return @intCast(i32, rc),
+        posix.EMFILE => return BsdKQueueError.ProcessFdQuotaExceeded,
+        posix.ENFILE => return BsdKQueueError.SystemFdQuotaExceeded,
+        else => return unexpectedErrorPosix(err),
+    }
+}
+
+pub const BsdKEventError = error {
+    /// The process does not have permission to register a filter.
+    AccessDenied,
+
+    /// The event could not be found to be modified or deleted.
+    EventNotFound,
+
+    /// No memory was available to register the event.
+    SystemResources,
+
+    /// The specified process to attach to does not exist.
+    ProcessNotFound,
+};
+
+pub fn bsdKEvent(kq: i32, changelist: []const posix.Kevent, eventlist: []posix.Kevent,
+    timeout: ?*const posix.timespec) BsdKEventError!usize
+{
+    while (true) {
+        const rc = posix.kevent(kq, changelist, eventlist, timeout);
+        const err = posix.getErrno(rc);
+        switch (err) {
+            0 => return rc,
+            posix.EACCES => return BsdKEventError.AccessDenied,
+            posix.EFAULT => unreachable,
+            posix.EBADF => unreachable,
+            posix.EINTR => continue,
+            posix.EINVAL => unreachable,
+            posix.ENOENT => return BsdKEventError.EventNotFound,
+            posix.ENOMEM => return BsdKEventError.SystemResources,
+            posix.ESRCH => return BsdKEventError.ProcessNotFound,
+            else => unreachable,
+        }
+    }
+}

From 04d3da4bd1d5b8922d3f161c92c6185f33961523 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 01:08:33 -0400
Subject: [PATCH 15/35] std.os.cpuCount implementation for macos

---
 std/c/darwin.zig  |  4 +++
 std/os/darwin.zig | 12 ++++++++
 std/os/index.zig  | 74 ++++++++++++++++++++++++++++++-----------------
 3 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/std/c/darwin.zig b/std/c/darwin.zig
index cbaa2f6811..c7e18d94cc 100644
--- a/std/c/darwin.zig
+++ b/std/c/darwin.zig
@@ -13,6 +13,10 @@ pub extern "c" fn kevent(kq: c_int, changelist: [*]const Kevent, nchanges: c_int
 pub extern "c" fn kevent64(kq: c_int, changelist: [*]const kevent64_s, nchanges: c_int,
     eventlist: [*]kevent64_s, nevents: c_int, flags: c_uint, timeout: ?*const timespec) c_int;
 
+pub extern "c" fn sysctl(name: [*]c_int, namelen: c_uint, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) c_int;
+pub extern "c" fn sysctlbyname(name: [*]const u8, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) c_int;
+pub extern "c" fn sysctlnametomib(name: [*]const u8, mibp: ?*c_int, sizep: ?*usize) c_int;
+
 pub use @import("../os/darwin_errno.zig");
 
 pub const _errno = __error;
diff --git a/std/os/darwin.zig b/std/os/darwin.zig
index 7921d1b6f0..fc933b7f47 100644
--- a/std/os/darwin.zig
+++ b/std/os/darwin.zig
@@ -520,6 +520,18 @@ pub fn symlink(existing: [*]const u8, new: [*]const u8) usize {
     return errnoWrap(c.symlink(existing, new));
 }
 
+pub fn sysctl(name: [*]c_int, namelen: c_uint, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) usize {
+    return errnoWrap(c.sysctl(name, namelen, oldp, oldlenp, newp, newlen));
+}
+
+pub fn sysctlbyname(name: [*]const u8, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) usize {
+    return errnoWrap(c.sysctlbyname(name, oldp, oldlenp, newp, newlen));
+}
+
+pub fn sysctlnametomib(name: [*]const u8, mibp: ?*c_int, sizep: ?*usize) usize {
+    return errnoWrap(c.sysctlnametomib(name, wibp, sizep));
+}
+
 pub fn rename(old: [*]const u8, new: [*]const u8) usize {
     return errnoWrap(c.rename(old, new));
 }
diff --git a/std/os/index.zig b/std/os/index.zig
index 15594edcc8..0d0f63a066 100644
--- a/std/os/index.zig
+++ b/std/os/index.zig
@@ -2756,35 +2756,57 @@ pub const CpuCountError = error{
 };
 
 pub fn cpuCount(fallback_allocator: *mem.Allocator) CpuCountError!usize {
-    const usize_count = 16;
-    const allocator = std.heap.stackFallback(usize_count * @sizeOf(usize), fallback_allocator).get();
+    switch (builtin.os) {
+        builtin.Os.macosx => {
+            var count: c_int = undefined;
+            var count_len: usize = @sizeOf(c_int);
+            const rc = posix.sysctlbyname(c"hw.ncpu", @ptrCast(*c_void, &count), &count_len, null, 0);
+            const err = posix.getErrno(rc);
+            switch (err) {
+                0 => return @intCast(usize, count),
+                posix.EFAULT => unreachable,
+                posix.EINVAL => unreachable,
+                posix.ENOMEM => return CpuCountError.OutOfMemory,
+                posix.ENOTDIR => unreachable,
+                posix.EISDIR => unreachable,
+                posix.ENOENT => unreachable,
+                posix.EPERM => unreachable,
+                else => return os.unexpectedErrorPosix(err),
+            }
+        },
+        builtin.Os.linux => {
+            const usize_count = 16;
+            const allocator = std.heap.stackFallback(usize_count * @sizeOf(usize), fallback_allocator).get();
 
-    var set = try allocator.alloc(usize, usize_count);
-    defer allocator.free(set);
+            var set = try allocator.alloc(usize, usize_count);
+            defer allocator.free(set);
 
-    while (true) {
-        const rc = posix.sched_getaffinity(0, set);
-        const err = posix.getErrno(rc);
-        switch (err) {
-            0 => {
-                if (rc < set.len * @sizeOf(usize)) {
-                    const result = set[0 .. rc / @sizeOf(usize)];
-                    var sum: usize = 0;
-                    for (result) |x| {
-                        sum += @popCount(x);
-                    }
-                    return sum;
-                } else {
-                    set = try allocator.realloc(usize, set, set.len * 2);
-                    continue;
+            while (true) {
+                const rc = posix.sched_getaffinity(0, set);
+                const err = posix.getErrno(rc);
+                switch (err) {
+                    0 => {
+                        if (rc < set.len * @sizeOf(usize)) {
+                            const result = set[0 .. rc / @sizeOf(usize)];
+                            var sum: usize = 0;
+                            for (result) |x| {
+                                sum += @popCount(x);
+                            }
+                            return sum;
+                        } else {
+                            set = try allocator.realloc(usize, set, set.len * 2);
+                            continue;
+                        }
+                    },
+                    posix.EFAULT => unreachable,
+                    posix.EINVAL => unreachable,
+                    posix.EPERM => return CpuCountError.PermissionDenied,
+                    posix.ESRCH => unreachable,
+                    else => return os.unexpectedErrorPosix(err),
                 }
-            },
-            posix.EFAULT => unreachable,
-            posix.EINVAL => unreachable,
-            posix.EPERM => return CpuCountError.PermissionDenied,
-            posix.ESRCH => unreachable,
-            else => return os.unexpectedErrorPosix(err),
-        }
+            }
+        },
+        else => @compileError("unsupported OS"),
     }
 }
 

From 82e9190d0939a7f71df3d602e381b0ec7cccb561 Mon Sep 17 00:00:00 2001
From: Marc Tiehuis <marctiehuis@gmail.com>
Date: Mon, 9 Jul 2018 17:14:04 +1200
Subject: [PATCH 16/35] Update zig.parser benchmark program

---
 std/zig/bench.zig | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/std/zig/bench.zig b/std/zig/bench.zig
index 59392889a6..630f6b2233 100644
--- a/std/zig/bench.zig
+++ b/std/zig/bench.zig
@@ -19,20 +19,18 @@ pub fn main() !void {
     }
     const end = timer.read();
     memory_used /= iterations;
-    const elapsed_s = f64(end - start) / std.os.time.ns_per_s;
-    const bytes_per_sec = f64(source.len * iterations) / elapsed_s;
+    const elapsed_s = @intToFloat(f64, end - start) / std.os.time.ns_per_s;
+    const bytes_per_sec = @intToFloat(f64, source.len * iterations) / elapsed_s;
     const mb_per_sec = bytes_per_sec / (1024 * 1024);
 
     var stdout_file = try std.io.getStdOut();
-    const stdout = *std.io.FileOutStream.init(*stdout_file).stream;
-    try stdout.print("{.3} MB/s, {} KB used \n", mb_per_sec, memory_used / 1024);
+    const stdout = &std.io.FileOutStream.init(&stdout_file).stream;
+    try stdout.print("{.3} MiB/s, {} KiB used \n", mb_per_sec, memory_used / 1024);
 }
 
 fn testOnce() usize {
     var fixed_buf_alloc = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..]);
-    var allocator = *fixed_buf_alloc.allocator;
-    var tokenizer = Tokenizer.init(source);
-    var parser = Parser.init(*tokenizer, allocator, "(memory buffer)");
-    _ = parser.parse() catch @panic("parse failure");
+    var allocator = &fixed_buf_alloc.allocator;
+    _ = std.zig.parse(allocator, source) catch @panic("parse failure");
     return fixed_buf_alloc.end_index;
 }

From 3ba451778fde48a5463180deea6d6539f91e1303 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 01:22:36 -0400
Subject: [PATCH 17/35] fix regressions on linux

---
 std/event.zig | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/std/event.zig b/std/event.zig
index 12aa2a3fc7..3fad81a78b 100644
--- a/std/event.zig
+++ b/std/event.zig
@@ -136,10 +136,7 @@ pub const Loop = struct {
         };
 
         pub const EventFd = switch (builtin.os) {
-            builtin.Os.macosx => struct {
-                base: ResumeNode,
-                kevent: posix.Kevent,
-            },
+            builtin.Os.macosx => MacOsEventFd,
             builtin.Os.linux => struct {
                 base: ResumeNode,
                 epoll_op: u32,
@@ -147,6 +144,11 @@ pub const Loop = struct {
             },
             else => @compileError("unsupported OS"),
         };
+
+        const MacOsEventFd = struct {
+            base: ResumeNode,
+            kevent: posix.Kevent,
+        };
     };
 
     /// After initialization, call run().
@@ -276,10 +278,10 @@ pub const Loop = struct {
                                 .handle = undefined,
                             },
                             // this one is for sending events
-                            .kevent = posix.Kevent {
+                            .kevent = posix.Kevent{
                                 .ident = i,
                                 .filter = posix.EVFILT_USER,
-                                .flags = posix.EV_CLEAR|posix.EV_ADD|posix.EV_DISABLE,
+                                .flags = posix.EV_CLEAR | posix.EV_ADD | posix.EV_DISABLE,
                                 .fflags = 0,
                                 .data = 0,
                                 .udata = @ptrToInt(&eventfd_node.data.base),
@@ -290,10 +292,10 @@ pub const Loop = struct {
                     self.available_eventfd_resume_nodes.push(eventfd_node);
                     const kevent_array = (*[1]posix.Kevent)(&eventfd_node.data.kevent);
                     _ = try std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null);
-                    eventfd_node.data.kevent.flags = posix.EV_CLEAR|posix.EV_ENABLE;
+                    eventfd_node.data.kevent.flags = posix.EV_CLEAR | posix.EV_ENABLE;
                     eventfd_node.data.kevent.fflags = posix.NOTE_TRIGGER;
                     // this one is for waiting for events
-                    self.os_data.kevents[i] = posix.Kevent {
+                    self.os_data.kevents[i] = posix.Kevent{
                         .ident = i,
                         .filter = posix.EVFILT_USER,
                         .flags = 0,
@@ -542,13 +544,15 @@ pub const Loop = struct {
             final_eventfd: i32,
             final_eventfd_event: std.os.linux.epoll_event,
         },
-        builtin.Os.macosx => struct {
-            kqfd: i32,
-            final_kevent: posix.Kevent,
-            kevents: []posix.Kevent,
-        },
+        builtin.Os.macosx => MacOsData,
         else => struct {},
     };
+
+    const MacOsData = struct {
+        kqfd: i32,
+        final_kevent: posix.Kevent,
+        kevents: posix.Kevent,
+    };
 };
 
 /// many producer, many consumer, thread-safe, lock-free, runtime configurable buffer size

From a0c564d7621c1bf3f83be59d6f91056b0cfe1e16 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 01:23:47 -0400
Subject: [PATCH 18/35] zig fmt

---
 std/c/darwin.zig                             |  22 +-
 std/os/darwin.zig                            | 260 ++++++++++++++-----
 std/os/index.zig                             |  13 +-
 std/special/compiler_rt/extendXfYf2_test.zig |  40 +--
 4 files changed, 233 insertions(+), 102 deletions(-)

diff --git a/std/c/darwin.zig b/std/c/darwin.zig
index c7e18d94cc..133ef62f05 100644
--- a/std/c/darwin.zig
+++ b/std/c/darwin.zig
@@ -7,11 +7,24 @@ pub extern "c" fn mach_absolute_time() u64;
 pub extern "c" fn mach_timebase_info(tinfo: ?*mach_timebase_info_data) void;
 
 pub extern "c" fn kqueue() c_int;
-pub extern "c" fn kevent(kq: c_int, changelist: [*]const Kevent, nchanges: c_int,
-    eventlist: [*]Kevent, nevents: c_int, timeout: ?*const timespec) c_int;
+pub extern "c" fn kevent(
+    kq: c_int,
+    changelist: [*]const Kevent,
+    nchanges: c_int,
+    eventlist: [*]Kevent,
+    nevents: c_int,
+    timeout: ?*const timespec,
+) c_int;
 
-pub extern "c" fn kevent64(kq: c_int, changelist: [*]const kevent64_s, nchanges: c_int,
-    eventlist: [*]kevent64_s, nevents: c_int, flags: c_uint, timeout: ?*const timespec) c_int;
+pub extern "c" fn kevent64(
+    kq: c_int,
+    changelist: [*]const kevent64_s,
+    nchanges: c_int,
+    eventlist: [*]kevent64_s,
+    nevents: c_int,
+    flags: c_uint,
+    timeout: ?*const timespec,
+) c_int;
 
 pub extern "c" fn sysctl(name: [*]c_int, namelen: c_uint, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) c_int;
 pub extern "c" fn sysctlbyname(name: [*]const u8, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) c_int;
@@ -145,4 +158,3 @@ comptime {
     assert(@offsetOf(kevent64_s, "udata") == 24);
     assert(@offsetOf(kevent64_s, "ext") == 32);
 }
-
diff --git a/std/os/darwin.zig b/std/os/darwin.zig
index fc933b7f47..4134e382fc 100644
--- a/std/os/darwin.zig
+++ b/std/os/darwin.zig
@@ -264,17 +264,32 @@ pub const SIGUSR1 = 30;
 /// user defined signal 2
 pub const SIGUSR2 = 31;
 
-pub const KEVENT_FLAG_NONE = 0x000; /// no flag value
-pub const KEVENT_FLAG_IMMEDIATE = 0x001; /// immediate timeout
-pub const KEVENT_FLAG_ERROR_EVENTS = 0x002; /// output events only include change
+/// no flag value
+pub const KEVENT_FLAG_NONE = 0x000;
 
-pub const EV_ADD = 0x0001; /// add event to kq (implies enable)
-pub const EV_DELETE = 0x0002; /// delete event from kq
-pub const EV_ENABLE = 0x0004; /// enable event
-pub const EV_DISABLE = 0x0008; /// disable event (not reported)
+/// immediate timeout
+pub const KEVENT_FLAG_IMMEDIATE = 0x001;
 
-pub const EV_ONESHOT = 0x0010; /// only report one occurrence
-pub const EV_CLEAR = 0x0020; /// clear event state after reporting
+/// output events only include change
+pub const KEVENT_FLAG_ERROR_EVENTS = 0x002;
+
+/// add event to kq (implies enable)
+pub const EV_ADD = 0x0001;
+
+/// delete event from kq
+pub const EV_DELETE = 0x0002;
+
+/// enable event
+pub const EV_ENABLE = 0x0004;
+
+/// disable event (not reported)
+pub const EV_DISABLE = 0x0008;
+
+/// only report one occurrence
+pub const EV_ONESHOT = 0x0010;
+
+/// clear event state after reporting
+pub const EV_CLEAR = 0x0020;
 
 /// force immediate event output
 /// ... with or without EV_ERROR
@@ -282,8 +297,11 @@ pub const EV_CLEAR = 0x0020; /// clear event state after reporting
 ///     on syscalls supporting flags
 pub const EV_RECEIPT = 0x0040;
 
-pub const EV_DISPATCH = 0x0080; /// disable event after reporting
-pub const EV_UDATA_SPECIFIC = 0x0100; /// unique kevent per udata value
+/// disable event after reporting
+pub const EV_DISPATCH = 0x0080;
+
+/// unique kevent per udata value
+pub const EV_UDATA_SPECIFIC = 0x0100;
 
 /// ... in combination with EV_DELETE
 /// will defer delete until udata-specific
@@ -291,91 +309,178 @@ pub const EV_UDATA_SPECIFIC = 0x0100; /// unique kevent per udata value
 /// returned to indicate the deferral
 pub const EV_DISPATCH2 = EV_DISPATCH | EV_UDATA_SPECIFIC;
 
-/// report that source has vanished 
+/// report that source has vanished
 /// ... only valid with EV_DISPATCH2
 pub const EV_VANISHED = 0x0200;
 
-pub const EV_SYSFLAGS = 0xF000; /// reserved by system
-pub const EV_FLAG0 = 0x1000; /// filter-specific flag
-pub const EV_FLAG1 = 0x2000; /// filter-specific flag
-pub const EV_EOF = 0x8000; /// EOF detected
-pub const EV_ERROR = 0x4000; /// error, data contains errno
+/// reserved by system
+pub const EV_SYSFLAGS = 0xF000;
+
+/// filter-specific flag
+pub const EV_FLAG0 = 0x1000;
+
+/// filter-specific flag
+pub const EV_FLAG1 = 0x2000;
+
+/// EOF detected
+pub const EV_EOF = 0x8000;
+
+/// error, data contains errno
+pub const EV_ERROR = 0x4000;
 
 pub const EV_POLL = EV_FLAG0;
 pub const EV_OOBAND = EV_FLAG1;
 
 pub const EVFILT_READ = -1;
 pub const EVFILT_WRITE = -2;
-pub const EVFILT_AIO = -3; /// attached to aio requests
-pub const EVFILT_VNODE = -4; /// attached to vnodes
-pub const EVFILT_PROC = -5; /// attached to struct proc
-pub const EVFILT_SIGNAL = -6; /// attached to struct proc
-pub const EVFILT_TIMER = -7; /// timers
-pub const EVFILT_MACHPORT = -8; /// Mach portsets
-pub const EVFILT_FS = -9; /// Filesystem events
-pub const EVFILT_USER = -10; /// User events
-pub const EVFILT_VM = -12; /// Virtual memory events
 
-pub const EVFILT_EXCEPT = -15; /// Exception events
+/// attached to aio requests
+pub const EVFILT_AIO = -3;
+
+/// attached to vnodes
+pub const EVFILT_VNODE = -4;
+
+/// attached to struct proc
+pub const EVFILT_PROC = -5;
+
+/// attached to struct proc
+pub const EVFILT_SIGNAL = -6;
+
+/// timers
+pub const EVFILT_TIMER = -7;
+
+/// Mach portsets
+pub const EVFILT_MACHPORT = -8;
+
+/// Filesystem events
+pub const EVFILT_FS = -9;
+
+/// User events
+pub const EVFILT_USER = -10;
+
+/// Virtual memory events
+pub const EVFILT_VM = -12;
+
+/// Exception events
+pub const EVFILT_EXCEPT = -15;
 
 pub const EVFILT_SYSCOUNT = 17;
 
 /// On input, NOTE_TRIGGER causes the event to be triggered for output.
 pub const NOTE_TRIGGER = 0x01000000;
 
-pub const NOTE_FFNOP      = 0x00000000; /// ignore input fflags
-pub const NOTE_FFAND      = 0x40000000; /// and fflags
-pub const NOTE_FFOR       = 0x80000000; /// or fflags
-pub const NOTE_FFCOPY     = 0xc0000000; /// copy fflags
-pub const NOTE_FFCTRLMASK = 0xc0000000; /// mask for operations
+/// ignore input fflags
+pub const NOTE_FFNOP = 0x00000000;
+
+/// and fflags
+pub const NOTE_FFAND = 0x40000000;
+
+/// or fflags
+pub const NOTE_FFOR = 0x80000000;
+
+/// copy fflags
+pub const NOTE_FFCOPY = 0xc0000000;
+
+/// mask for operations
+pub const NOTE_FFCTRLMASK = 0xc0000000;
 pub const NOTE_FFLAGSMASK = 0x00ffffff;
 
-pub const NOTE_LOWAT = 0x00000001; /// low water mark
+/// low water mark
+pub const NOTE_LOWAT = 0x00000001;
 
-pub const NOTE_OOB = 0x00000002; /// OOB data
+/// OOB data
+pub const NOTE_OOB = 0x00000002;
 
-pub const NOTE_DELETE = 0x00000001;      /// vnode was removed
-pub const NOTE_WRITE  = 0x00000002;      /// data contents changed
-pub const NOTE_EXTEND = 0x00000004;      /// size increased
-pub const NOTE_ATTRIB = 0x00000008;      /// attributes changed
-pub const NOTE_LINK   = 0x00000010;      /// link count changed
-pub const NOTE_RENAME = 0x00000020;      /// vnode was renamed
-pub const NOTE_REVOKE = 0x00000040;      /// vnode access was revoked
-pub const NOTE_NONE   = 0x00000080;      /// No specific vnode event: to test for EVFILT_READ      activation
-pub const NOTE_FUNLOCK    = 0x00000100;      /// vnode was unlocked by flock(2)
+/// vnode was removed
+pub const NOTE_DELETE = 0x00000001;
 
-pub const NOTE_EXIT       = 0x80000000; /// process exited
-pub const NOTE_FORK       = 0x40000000; /// process forked
-pub const NOTE_EXEC       = 0x20000000; /// process exec'd
-pub const NOTE_SIGNAL     = 0x08000000; /// shared with EVFILT_SIGNAL
-pub const NOTE_EXITSTATUS     = 0x04000000; /// exit status to be returned, valid for child       process only
-pub const NOTE_EXIT_DETAIL    = 0x02000000; /// provide details on reasons for exit
+/// data contents changed
+pub const NOTE_WRITE = 0x00000002;
 
-pub const NOTE_PDATAMASK  = 0x000fffff; /// mask for signal & exit status
-pub const NOTE_PCTRLMASK  = (~NOTE_PDATAMASK);
+/// size increased
+pub const NOTE_EXTEND = 0x00000004;
 
-pub const NOTE_EXIT_DETAIL_MASK       = 0x00070000;
-pub const NOTE_EXIT_DECRYPTFAIL       = 0x00010000;
-pub const NOTE_EXIT_MEMORY        = 0x00020000;
-pub const NOTE_EXIT_CSERROR       = 0x00040000;
+/// attributes changed
+pub const NOTE_ATTRIB = 0x00000008;
 
+/// link count changed
+pub const NOTE_LINK = 0x00000010;
 
-pub const NOTE_VM_PRESSURE            = 0x80000000;              /// will react on memory          pressure
-pub const NOTE_VM_PRESSURE_TERMINATE      = 0x40000000;             /// will quit on memory       pressure, possibly after cleaning up dirty state
-pub const NOTE_VM_PRESSURE_SUDDEN_TERMINATE   = 0x20000000;     /// will quit immediately on      memory pressure
-pub const NOTE_VM_ERROR               = 0x10000000;              /// there was an error
+/// vnode was renamed
+pub const NOTE_RENAME = 0x00000020;
 
-pub const NOTE_SECONDS    = 0x00000001; /// data is seconds        
-pub const NOTE_USECONDS   = 0x00000002; /// data is microseconds  
-pub const NOTE_NSECONDS   = 0x00000004; /// data is nanoseconds  
-pub const NOTE_ABSOLUTE   = 0x00000008; /// absolute timeout    
+/// vnode access was revoked
+pub const NOTE_REVOKE = 0x00000040;
 
-pub const NOTE_LEEWAY = 0x00000010; /// ext[1] holds leeway for power aware timers
-pub const NOTE_CRITICAL   = 0x00000020; /// system does minimal timer coalescing
-pub const NOTE_BACKGROUND = 0x00000040; /// system does maximum timer coalescing
-pub const NOTE_MACH_CONTINUOUS_TIME   = 0x00000080;
-pub const NOTE_MACHTIME =   0x00000100;             /// data is mach absolute time units
+/// No specific vnode event: to test for EVFILT_READ      activation
+pub const NOTE_NONE = 0x00000080;
 
+/// vnode was unlocked by flock(2)
+pub const NOTE_FUNLOCK = 0x00000100;
+
+/// process exited
+pub const NOTE_EXIT = 0x80000000;
+
+/// process forked
+pub const NOTE_FORK = 0x40000000;
+
+/// process exec'd
+pub const NOTE_EXEC = 0x20000000;
+
+/// shared with EVFILT_SIGNAL
+pub const NOTE_SIGNAL = 0x08000000;
+
+/// exit status to be returned, valid for child       process only
+pub const NOTE_EXITSTATUS = 0x04000000;
+
+/// provide details on reasons for exit
+pub const NOTE_EXIT_DETAIL = 0x02000000;
+
+/// mask for signal & exit status
+pub const NOTE_PDATAMASK = 0x000fffff;
+pub const NOTE_PCTRLMASK = (~NOTE_PDATAMASK);
+
+pub const NOTE_EXIT_DETAIL_MASK = 0x00070000;
+pub const NOTE_EXIT_DECRYPTFAIL = 0x00010000;
+pub const NOTE_EXIT_MEMORY = 0x00020000;
+pub const NOTE_EXIT_CSERROR = 0x00040000;
+
+/// will react on memory          pressure
+pub const NOTE_VM_PRESSURE = 0x80000000;
+
+/// will quit on memory       pressure, possibly after cleaning up dirty state
+pub const NOTE_VM_PRESSURE_TERMINATE = 0x40000000;
+
+/// will quit immediately on      memory pressure
+pub const NOTE_VM_PRESSURE_SUDDEN_TERMINATE = 0x20000000;
+
+/// there was an error
+pub const NOTE_VM_ERROR = 0x10000000;
+
+/// data is seconds
+pub const NOTE_SECONDS = 0x00000001;
+
+/// data is microseconds
+pub const NOTE_USECONDS = 0x00000002;
+
+/// data is nanoseconds
+pub const NOTE_NSECONDS = 0x00000004;
+
+/// absolute timeout
+pub const NOTE_ABSOLUTE = 0x00000008;
+
+/// ext[1] holds leeway for power aware timers
+pub const NOTE_LEEWAY = 0x00000010;
+
+/// system does minimal timer coalescing
+pub const NOTE_CRITICAL = 0x00000020;
+
+/// system does maximum timer coalescing
+pub const NOTE_BACKGROUND = 0x00000040;
+pub const NOTE_MACH_CONTINUOUS_TIME = 0x00000080;
+
+/// data is mach absolute time units
+pub const NOTE_MACHTIME = 0x00000100;
 
 fn wstatus(x: i32) i32 {
     return x & 0o177;
@@ -503,12 +608,23 @@ pub fn kqueue() usize {
 }
 
 pub fn kevent(kq: i32, changelist: []const Kevent, eventlist: []Kevent, timeout: ?*const timespec) usize {
-    return errnoWrap(c.kevent(kq, changelist.ptr, @intCast(c_int, changelist.len), eventlist.ptr, @intCast(c_int, eventlist.len), timeout,));
+    return errnoWrap(c.kevent(
+        kq,
+        changelist.ptr,
+        @intCast(c_int, changelist.len),
+        eventlist.ptr,
+        @intCast(c_int, eventlist.len),
+        timeout,
+    ));
 }
 
-pub fn kevent64(kq: i32, changelist: []const kevent64_s, eventlist: []kevent64_s, flags: u32,
-    timeout: ?*const timespec) usize
-{
+pub fn kevent64(
+    kq: i32,
+    changelist: []const kevent64_s,
+    eventlist: []kevent64_s,
+    flags: u32,
+    timeout: ?*const timespec,
+) usize {
     return errnoWrap(c.kevent64(kq, changelist.ptr, changelist.len, eventlist.ptr, eventlist.len, flags, timeout));
 }
 
diff --git a/std/os/index.zig b/std/os/index.zig
index 0d0f63a066..021a29e3d5 100644
--- a/std/os/index.zig
+++ b/std/os/index.zig
@@ -2810,7 +2810,7 @@ pub fn cpuCount(fallback_allocator: *mem.Allocator) CpuCountError!usize {
     }
 }
 
-pub const BsdKQueueError = error {
+pub const BsdKQueueError = error{
     /// The per-process limit on the number of open file descriptors has been reached.
     ProcessFdQuotaExceeded,
 
@@ -2831,7 +2831,7 @@ pub fn bsdKQueue() BsdKQueueError!i32 {
     }
 }
 
-pub const BsdKEventError = error {
+pub const BsdKEventError = error{
     /// The process does not have permission to register a filter.
     AccessDenied,
 
@@ -2845,9 +2845,12 @@ pub const BsdKEventError = error {
     ProcessNotFound,
 };
 
-pub fn bsdKEvent(kq: i32, changelist: []const posix.Kevent, eventlist: []posix.Kevent,
-    timeout: ?*const posix.timespec) BsdKEventError!usize
-{
+pub fn bsdKEvent(
+    kq: i32,
+    changelist: []const posix.Kevent,
+    eventlist: []posix.Kevent,
+    timeout: ?*const posix.timespec,
+) BsdKEventError!usize {
     while (true) {
         const rc = posix.kevent(kq, changelist, eventlist, timeout);
         const err = posix.getErrno(rc);
diff --git a/std/special/compiler_rt/extendXfYf2_test.zig b/std/special/compiler_rt/extendXfYf2_test.zig
index 185c83a0ef..9969607011 100644
--- a/std/special/compiler_rt/extendXfYf2_test.zig
+++ b/std/special/compiler_rt/extendXfYf2_test.zig
@@ -31,7 +31,7 @@ fn test__extendhfsf2(a: u16, expected: u32) void {
 
     if (rep == expected) {
         if (rep & 0x7fffffff > 0x7f800000) {
-            return;  // NaN is always unequal.
+            return; // NaN is always unequal.
         }
         if (x == @bitCast(f32, expected)) {
             return;
@@ -86,33 +86,33 @@ test "extenddftf2" {
 }
 
 test "extendhfsf2" {
-    test__extendhfsf2(0x7e00, 0x7fc00000);  // qNaN
-    test__extendhfsf2(0x7f00, 0x7fe00000);  // sNaN
-    test__extendhfsf2(0x7c01, 0x7f802000);  // sNaN
+    test__extendhfsf2(0x7e00, 0x7fc00000); // qNaN
+    test__extendhfsf2(0x7f00, 0x7fe00000); // sNaN
+    test__extendhfsf2(0x7c01, 0x7f802000); // sNaN
 
-    test__extendhfsf2(0, 0);  // 0
-    test__extendhfsf2(0x8000, 0x80000000);  // -0
+    test__extendhfsf2(0, 0); // 0
+    test__extendhfsf2(0x8000, 0x80000000); // -0
 
-    test__extendhfsf2(0x7c00, 0x7f800000);  // inf
-    test__extendhfsf2(0xfc00, 0xff800000);  // -inf
+    test__extendhfsf2(0x7c00, 0x7f800000); // inf
+    test__extendhfsf2(0xfc00, 0xff800000); // -inf
 
-    test__extendhfsf2(0x0001, 0x33800000);  // denormal (min), 2**-24
-    test__extendhfsf2(0x8001, 0xb3800000);  // denormal (min), -2**-24
+    test__extendhfsf2(0x0001, 0x33800000); // denormal (min), 2**-24
+    test__extendhfsf2(0x8001, 0xb3800000); // denormal (min), -2**-24
 
-    test__extendhfsf2(0x03ff, 0x387fc000);  // denormal (max), 2**-14 - 2**-24
-    test__extendhfsf2(0x83ff, 0xb87fc000);  // denormal (max), -2**-14 + 2**-24
+    test__extendhfsf2(0x03ff, 0x387fc000); // denormal (max), 2**-14 - 2**-24
+    test__extendhfsf2(0x83ff, 0xb87fc000); // denormal (max), -2**-14 + 2**-24
 
-    test__extendhfsf2(0x0400, 0x38800000);  // normal (min), 2**-14
-    test__extendhfsf2(0x8400, 0xb8800000);  // normal (min), -2**-14
+    test__extendhfsf2(0x0400, 0x38800000); // normal (min), 2**-14
+    test__extendhfsf2(0x8400, 0xb8800000); // normal (min), -2**-14
 
-    test__extendhfsf2(0x7bff, 0x477fe000);  // normal (max), 65504
-    test__extendhfsf2(0xfbff, 0xc77fe000);  // normal (max), -65504
+    test__extendhfsf2(0x7bff, 0x477fe000); // normal (max), 65504
+    test__extendhfsf2(0xfbff, 0xc77fe000); // normal (max), -65504
 
-    test__extendhfsf2(0x3c01, 0x3f802000);  // normal, 1 + 2**-10
-    test__extendhfsf2(0xbc01, 0xbf802000);  // normal, -1 - 2**-10
+    test__extendhfsf2(0x3c01, 0x3f802000); // normal, 1 + 2**-10
+    test__extendhfsf2(0xbc01, 0xbf802000); // normal, -1 - 2**-10
 
-    test__extendhfsf2(0x3555, 0x3eaaa000);  // normal, approx. 1/3
-    test__extendhfsf2(0xb555, 0xbeaaa000);  // normal, approx. -1/3
+    test__extendhfsf2(0x3555, 0x3eaaa000); // normal, approx. 1/3
+    test__extendhfsf2(0xb555, 0xbeaaa000); // normal, approx. -1/3
 }
 
 test "extendsftf2" {

From 42ba06133aec995feec3ea24ee7fbbc40d7ac2ca Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 10:33:12 -0400
Subject: [PATCH 19/35] std.Hashmap - don't use catch unreachable in tests

---
 std/hash_map.zig | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/std/hash_map.zig b/std/hash_map.zig
index 3bd03d4f28..cebd5272c0 100644
--- a/std/hash_map.zig
+++ b/std/hash_map.zig
@@ -259,14 +259,14 @@ test "basic hash map usage" {
     var map = HashMap(i32, i32, hash_i32, eql_i32).init(&direct_allocator.allocator);
     defer map.deinit();
 
-    assert((map.put(1, 11) catch unreachable) == null);
-    assert((map.put(2, 22) catch unreachable) == null);
-    assert((map.put(3, 33) catch unreachable) == null);
-    assert((map.put(4, 44) catch unreachable) == null);
-    assert((map.put(5, 55) catch unreachable) == null);
+    assert((try map.put(1, 11)) == null);
+    assert((try map.put(2, 22)) == null);
+    assert((try map.put(3, 33)) == null);
+    assert((try map.put(4, 44)) == null);
+    assert((try map.put(5, 55)) == null);
 
-    assert((map.put(5, 66) catch unreachable).? == 55);
-    assert((map.put(5, 55) catch unreachable).? == 66);
+    assert((try map.put(5, 66)).? == 55);
+    assert((try map.put(5, 55)).? == 66);
 
     assert(map.contains(2));
     assert(map.get(2).?.value == 22);
@@ -282,9 +282,9 @@ test "iterator hash map" {
     var reset_map = HashMap(i32, i32, hash_i32, eql_i32).init(&direct_allocator.allocator);
     defer reset_map.deinit();
 
-    assert((reset_map.put(1, 11) catch unreachable) == null);
-    assert((reset_map.put(2, 22) catch unreachable) == null);
-    assert((reset_map.put(3, 33) catch unreachable) == null);
+    assert((try reset_map.put(1, 11)) == null);
+    assert((try reset_map.put(2, 22)) == null);
+    assert((try reset_map.put(3, 33)) == null);
 
     var keys = []i32{
         1,

From 9eb51e20ed1a040a617541303db760f80ffd3aa1 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 10:43:29 -0400
Subject: [PATCH 20/35] fix crash on @ptrToInt of a *void

closes #1192
---
 src/ir.cpp              | 6 ++++++
 test/compile_errors.zig | 9 +++++++++
 2 files changed, 15 insertions(+)

diff --git a/src/ir.cpp b/src/ir.cpp
index 98b1bd85ad..5e4c847e14 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -19796,6 +19796,12 @@ static TypeTableEntry *ir_analyze_instruction_ptr_to_int(IrAnalyze *ira, IrInstr
         return ira->codegen->builtin_types.entry_invalid;
     }
 
+    if (!type_has_bits(target->value.type)) {
+        ir_add_error(ira, target,
+                buf_sprintf("pointer to size 0 type has no address"));
+        return ira->codegen->builtin_types.entry_invalid;
+    }
+
     if (instr_is_comptime(target)) {
         ConstExprValue *val = ir_resolve_const(ira, target, UndefBad);
         if (!val)
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 9071f0ad7e..4ed65e449d 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -1,6 +1,15 @@
 const tests = @import("tests.zig");
 
 pub fn addCases(cases: *tests.CompileErrorContext) void {
+    cases.add(
+        "@ptrToInt on *void",
+        \\export fn entry() bool {
+        \\    return @ptrToInt(&{}) == @ptrToInt(&{});
+        \\}
+    ,
+        ".tmp_source.zig:2:23: error: pointer to size 0 type has no address",
+    );
+
     cases.add(
         "@popCount - non-integer",
         \\export fn entry(x: f32) u32 {

From 2ee67b7642cfeef36d8ebbc08080202b5b1d1958 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 11:13:29 -0400
Subject: [PATCH 21/35] langref: docs for invalid error set cast and incorrect
 pointer alignment

also add detection of incorrect pointer alignment at compile-time
of pointers that were constructed with `@intToPtr`.
---
 doc/langref.html.in     | 54 ++++++++++++++++++++++++++++++++++++++---
 src/ir.cpp              |  9 +++++++
 test/compile_errors.zig | 10 ++++++++
 3 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 8eaffb64ad..16e9023f26 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -6649,12 +6649,60 @@ pub fn main() void {
       {#header_close#}
 
       {#header_open|Invalid Error Set Cast#}
-      <p>TODO</p>
+      <p>At compile-time:</p>
+      {#code_begin|test_err|error.B not a member of error set 'Set2'#}
+const Set1 = error{
+    A,
+    B,
+};
+const Set2 = error{
+    A,
+    C,
+};
+comptime {
+    _ = @errSetCast(Set2, Set1.B);
+}
+      {#code_end#}
+      <p>At runtime:</p>
+      {#code_begin|exe_err#}
+const Set1 = error{
+    A,
+    B,
+};
+const Set2 = error{
+    A,
+    C,
+};
+pub fn main() void {
+    _ = foo(Set1.B);
+}
+fn foo(set1: Set1) Set2 {
+    return @errSetCast(Set2, set1);
+}
+      {#code_end#}
       {#header_close#}
 
       {#header_open|Incorrect Pointer Alignment#}
-      <p>TODO</p>
-
+      <p>At compile-time:</p>
+      {#code_begin|test_err|pointer address 0x1 is not aligned to 4 bytes#}
+comptime {
+    const ptr = @intToPtr(*i32, 0x1);
+    const aligned = @alignCast(4, ptr);
+}
+      {#code_end#}
+      <p>At runtime:</p>
+      {#code_begin|exe_err#}
+pub fn main() !void {
+    var array align(4) = []u32{ 0x11111111, 0x11111111 };
+    const bytes = @sliceToBytes(array[0..]);
+    if (foo(bytes) != 0x11111111) return error.Wrong;
+}
+fn foo(bytes: []u8) u32 {
+    const slice4 = bytes[1..5];
+    const int_slice = @bytesToSlice(u32, @alignCast(4, slice4));
+    return int_slice[0];
+}
+      {#code_end#}
       {#header_close#}
       {#header_open|Wrong Union Field Access#}
       <p>TODO</p>
diff --git a/src/ir.cpp b/src/ir.cpp
index 5e4c847e14..dcd39ccfe5 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -19370,6 +19370,15 @@ static IrInstruction *ir_align_cast(IrAnalyze *ira, IrInstruction *target, uint3
         if (!val)
             return ira->codegen->invalid_instruction;
 
+        if (val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr &&
+            val->data.x_ptr.data.hard_coded_addr.addr % align_bytes != 0)
+        {
+            ir_add_error(ira, target,
+                    buf_sprintf("pointer address 0x%lx is not aligned to %" PRIu32 " bytes",
+                        val->data.x_ptr.data.hard_coded_addr.addr, align_bytes));
+            return ira->codegen->invalid_instruction;
+        }
+
         IrInstruction *result = ir_create_const(&ira->new_irb, target->scope, target->source_node, result_type);
         copy_const_val(&result->value, val, false);
         result->value.type = result_type;
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 4ed65e449d..1b76c01564 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -1,6 +1,16 @@
 const tests = @import("tests.zig");
 
 pub fn addCases(cases: *tests.CompileErrorContext) void {
+    cases.add(
+        "bad @alignCast at comptime",
+        \\comptime {
+        \\    const ptr = @intToPtr(*i32, 0x1);
+        \\    const aligned = @alignCast(4, ptr);
+        \\}
+    ,
+        ".tmp_source.zig:3:35: error: pointer address 0x1 is not aligned to 4 bytes",
+    );
+
     cases.add(
         "@ptrToInt on *void",
         \\export fn entry() bool {

From 05f1ea33d2d2f4ffa2bb6686a6a938d1b7983074 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 12:12:37 -0400
Subject: [PATCH 22/35] ZIG_DEBUG_COLOR=1 overrides tty detection for runtime
 stack traces

---
 doc/docgen.zig      | 29 ++++++++++++++++-------------
 std/debug/index.zig | 11 +++++++++--
 std/os/index.zig    | 14 +++++++++++---
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/doc/docgen.zig b/doc/docgen.zig
index dfda54567f..e2da1fe6cc 100644
--- a/doc/docgen.zig
+++ b/doc/docgen.zig
@@ -689,7 +689,10 @@ fn termColor(allocator: *mem.Allocator, input: []const u8) ![]u8 {
 fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var, zig_exe: []const u8) !void {
     var code_progress_index: usize = 0;
 
-    const builtin_code = try escapeHtml(allocator, try getBuiltinCode(allocator, zig_exe));
+    var env_map = try os.getEnvMap(allocator);
+    try env_map.set("ZIG_DEBUG_COLOR", "1");
+
+    const builtin_code = try escapeHtml(allocator, try getBuiltinCode(allocator, &env_map, zig_exe));
 
     for (toc.nodes) |node| {
         switch (node) {
@@ -778,12 +781,12 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var
                             try build_args.append("c");
                             try out.print(" --library c");
                         }
-                        _ = exec(allocator, build_args.toSliceConst()) catch return parseError(tokenizer, code.source_token, "example failed to compile");
+                        _ = exec(allocator, &env_map, build_args.toSliceConst()) catch return parseError(tokenizer, code.source_token, "example failed to compile");
 
                         const run_args = [][]const u8{tmp_bin_file_name};
 
                         const result = if (expected_outcome == ExpectedOutcome.Fail) blk: {
-                            const result = try os.ChildProcess.exec(allocator, run_args, null, null, max_doc_file_size);
+                            const result = try os.ChildProcess.exec(allocator, run_args, null, &env_map, max_doc_file_size);
                             switch (result.term) {
                                 os.ChildProcess.Term.Exited => |exit_code| {
                                     if (exit_code == 0) {
@@ -799,7 +802,7 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var
                             }
                             break :blk result;
                         } else blk: {
-                            break :blk exec(allocator, run_args) catch return parseError(tokenizer, code.source_token, "example crashed");
+                            break :blk exec(allocator, &env_map, run_args) catch return parseError(tokenizer, code.source_token, "example crashed");
                         };
 
                         const escaped_stderr = try escapeHtml(allocator, result.stderr);
@@ -845,7 +848,7 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var
                                 "msvc",
                             });
                         }
-                        const result = exec(allocator, test_args.toSliceConst()) catch return parseError(tokenizer, code.source_token, "test failed");
+                        const result = exec(allocator, &env_map, test_args.toSliceConst()) catch return parseError(tokenizer, code.source_token, "test failed");
                         const escaped_stderr = try escapeHtml(allocator, result.stderr);
                         const escaped_stdout = try escapeHtml(allocator, result.stdout);
                         try out.print("\n{}{}</code></pre>\n", escaped_stderr, escaped_stdout);
@@ -877,7 +880,7 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var
                                 try out.print(" --release-small");
                             },
                         }
-                        const result = try os.ChildProcess.exec(allocator, test_args.toSliceConst(), null, null, max_doc_file_size);
+                        const result = try os.ChildProcess.exec(allocator, test_args.toSliceConst(), null, &env_map, max_doc_file_size);
                         switch (result.term) {
                             os.ChildProcess.Term.Exited => |exit_code| {
                                 if (exit_code == 0) {
@@ -923,7 +926,7 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var
                             builtin.Mode.ReleaseSmall => try test_args.append("--release-small"),
                         }
 
-                        const result = try os.ChildProcess.exec(allocator, test_args.toSliceConst(), null, null, max_doc_file_size);
+                        const result = try os.ChildProcess.exec(allocator, test_args.toSliceConst(), null, &env_map, max_doc_file_size);
                         switch (result.term) {
                             os.ChildProcess.Term.Exited => |exit_code| {
                                 if (exit_code == 0) {
@@ -1000,7 +1003,7 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var
                         }
 
                         if (maybe_error_match) |error_match| {
-                            const result = try os.ChildProcess.exec(allocator, build_args.toSliceConst(), null, null, max_doc_file_size);
+                            const result = try os.ChildProcess.exec(allocator, build_args.toSliceConst(), null, &env_map, max_doc_file_size);
                             switch (result.term) {
                                 os.ChildProcess.Term.Exited => |exit_code| {
                                     if (exit_code == 0) {
@@ -1032,7 +1035,7 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var
                                 try out.print("</code></pre>\n");
                             }
                         } else {
-                            _ = exec(allocator, build_args.toSliceConst()) catch return parseError(tokenizer, code.source_token, "example failed to compile");
+                            _ = exec(allocator, &env_map, build_args.toSliceConst()) catch return parseError(tokenizer, code.source_token, "example failed to compile");
                         }
                         if (!code.is_inline) {
                             try out.print("</code></pre>\n");
@@ -1045,8 +1048,8 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var
     }
 }
 
-fn exec(allocator: *mem.Allocator, args: []const []const u8) !os.ChildProcess.ExecResult {
-    const result = try os.ChildProcess.exec(allocator, args, null, null, max_doc_file_size);
+fn exec(allocator: *mem.Allocator, env_map: *std.BufMap, args: []const []const u8) !os.ChildProcess.ExecResult {
+    const result = try os.ChildProcess.exec(allocator, args, null, env_map, max_doc_file_size);
     switch (result.term) {
         os.ChildProcess.Term.Exited => |exit_code| {
             if (exit_code != 0) {
@@ -1070,8 +1073,8 @@ fn exec(allocator: *mem.Allocator, args: []const []const u8) !os.ChildProcess.Ex
     return result;
 }
 
-fn getBuiltinCode(allocator: *mem.Allocator, zig_exe: []const u8) ![]const u8 {
-    const result = try exec(allocator, []const []const u8{
+fn getBuiltinCode(allocator: *mem.Allocator, env_map: *std.BufMap, zig_exe: []const u8) ![]const u8 {
+    const result = try exec(allocator, env_map, []const []const u8{
         zig_exe,
         "builtin",
     });
diff --git a/std/debug/index.zig b/std/debug/index.zig
index 0e2a3a8d39..54a9af4b9e 100644
--- a/std/debug/index.zig
+++ b/std/debug/index.zig
@@ -10,6 +10,7 @@ const ArrayList = std.ArrayList;
 const builtin = @import("builtin");
 
 pub const FailingAllocator = @import("failing_allocator.zig").FailingAllocator;
+pub const failing_allocator = FailingAllocator.init(global_allocator, 0);
 
 /// Tries to write to stderr, unbuffered, and ignores any error returned.
 /// Does not append a newline.
@@ -44,6 +45,12 @@ pub fn getSelfDebugInfo() !*ElfStackTrace {
     }
 }
 
+fn wantTtyColor() bool {
+    var bytes: [128]u8 = undefined;
+    const allocator = &std.heap.FixedBufferAllocator.init(bytes[0..]).allocator;
+    return if (std.os.getEnvVarOwned(allocator, "ZIG_DEBUG_COLOR")) |_| true else |_| stderr_file.isTty();
+}
+
 /// Tries to print the current stack trace to stderr, unbuffered, and ignores any error returned.
 pub fn dumpCurrentStackTrace(start_addr: ?usize) void {
     const stderr = getStderrStream() catch return;
@@ -51,7 +58,7 @@ pub fn dumpCurrentStackTrace(start_addr: ?usize) void {
         stderr.print("Unable to dump stack trace: Unable to open debug info: {}\n", @errorName(err)) catch return;
         return;
     };
-    writeCurrentStackTrace(stderr, getDebugInfoAllocator(), debug_info, stderr_file.isTty(), start_addr) catch |err| {
+    writeCurrentStackTrace(stderr, getDebugInfoAllocator(), debug_info, wantTtyColor(), start_addr) catch |err| {
         stderr.print("Unable to dump stack trace: {}\n", @errorName(err)) catch return;
         return;
     };
@@ -64,7 +71,7 @@ pub fn dumpStackTrace(stack_trace: *const builtin.StackTrace) void {
         stderr.print("Unable to dump stack trace: Unable to open debug info: {}\n", @errorName(err)) catch return;
         return;
     };
-    writeStackTrace(stack_trace, stderr, getDebugInfoAllocator(), debug_info, stderr_file.isTty()) catch |err| {
+    writeStackTrace(stack_trace, stderr, getDebugInfoAllocator(), debug_info, wantTtyColor()) catch |err| {
         stderr.print("Unable to dump stack trace: {}\n", @errorName(err)) catch return;
         return;
     };
diff --git a/std/os/index.zig b/std/os/index.zig
index 52b36c351c..0f9aea914d 100644
--- a/std/os/index.zig
+++ b/std/os/index.zig
@@ -544,8 +544,13 @@ pub fn getEnvPosix(key: []const u8) ?[]const u8 {
     return null;
 }
 
+pub const GetEnvVarOwnedError = error{
+    OutOfMemory,
+    EnvironmentVariableNotFound,
+};
+
 /// Caller must free returned memory.
-pub fn getEnvVarOwned(allocator: *mem.Allocator, key: []const u8) ![]u8 {
+pub fn getEnvVarOwned(allocator: *mem.Allocator, key: []const u8) GetEnvVarOwnedError![]u8 {
     if (is_windows) {
         const key_with_null = try cstr.addNullByte(allocator, key);
         defer allocator.free(key_with_null);
@@ -554,14 +559,17 @@ pub fn getEnvVarOwned(allocator: *mem.Allocator, key: []const u8) ![]u8 {
         errdefer allocator.free(buf);
 
         while (true) {
-            const windows_buf_len = try math.cast(windows.DWORD, buf.len);
+            const windows_buf_len = math.cast(windows.DWORD, buf.len) catch return error.OutOfMemory;
             const result = windows.GetEnvironmentVariableA(key_with_null.ptr, buf.ptr, windows_buf_len);
 
             if (result == 0) {
                 const err = windows.GetLastError();
                 return switch (err) {
                     windows.ERROR.ENVVAR_NOT_FOUND => error.EnvironmentVariableNotFound,
-                    else => unexpectedErrorWindows(err),
+                    else => {
+                        _ = unexpectedErrorWindows(err);
+                        return error.EnvironmentVariableNotFound;
+                    },
                 };
             }
 

From caa008505729f9511f6f0b070636013e9597b3f7 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 13:19:11 -0400
Subject: [PATCH 23/35] implement std.os.cpuCount for windows

---
 std/event.zig            |  3 +++
 std/heap.zig             |  4 ++--
 std/os/index.zig         |  5 +++++
 std/os/windows/index.zig | 21 +++++++++++++++++++++
 4 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/std/event.zig b/std/event.zig
index 3fad81a78b..589ab4cb5f 100644
--- a/std/event.zig
+++ b/std/event.zig
@@ -142,6 +142,9 @@ pub const Loop = struct {
                 epoll_op: u32,
                 eventfd: i32,
             },
+            builtin.Os.windows => struct {
+                base: ResumeNode,
+            },
             else => @compileError("unsupported OS"),
         };
 
diff --git a/std/heap.zig b/std/heap.zig
index 6d3fd05cdb..caf972e605 100644
--- a/std/heap.zig
+++ b/std/heap.zig
@@ -96,12 +96,12 @@ pub const DirectAllocator = struct {
             },
             Os.windows => {
                 const amt = n + alignment + @sizeOf(usize);
-                const optional_heap_handle = @atomicLoad(?HeapHandle, ?self.heap_handle, builtin.AtomicOrder.SeqCst);
+                const optional_heap_handle = @atomicLoad(?HeapHandle, &self.heap_handle, builtin.AtomicOrder.SeqCst);
                 const heap_handle = optional_heap_handle orelse blk: {
                     const hh = os.windows.HeapCreate(os.windows.HEAP_NO_SERIALIZE, amt, 0) orelse return error.OutOfMemory;
                     const other_hh = @cmpxchgStrong(?HeapHandle, &self.heap_handle, null, hh, builtin.AtomicOrder.SeqCst, builtin.AtomicOrder.SeqCst) orelse break :blk hh;
                     _ = os.windows.HeapDestroy(hh);
-                    break :blk other_hh;
+                    break :blk other_hh.?; // can't be null because of the cmpxchg
                 };
                 const ptr = os.windows.HeapAlloc(heap_handle, 0, amt) orelse return error.OutOfMemory;
                 const root_addr = @ptrToInt(ptr);
diff --git a/std/os/index.zig b/std/os/index.zig
index 021a29e3d5..94fdd9dc84 100644
--- a/std/os/index.zig
+++ b/std/os/index.zig
@@ -2806,6 +2806,11 @@ pub fn cpuCount(fallback_allocator: *mem.Allocator) CpuCountError!usize {
                 }
             }
         },
+        builtin.Os.windows => {
+            var system_info: windows.SYSTEM_INFO = undefined;
+            windows.GetSystemInfo(&system_info);
+            return @intCast(usize, system_info.dwNumberOfProcessors);
+        },
         else => @compileError("unsupported OS"),
     }
 }
diff --git a/std/os/windows/index.zig b/std/os/windows/index.zig
index d631c6adbf..571ac97fac 100644
--- a/std/os/windows/index.zig
+++ b/std/os/windows/index.zig
@@ -107,6 +107,7 @@ pub extern "kernel32" stdcallcc fn GetFinalPathNameByHandleA(
 
 pub extern "kernel32" stdcallcc fn GetProcessHeap() ?HANDLE;
 
+pub extern "kernel32" stdcallcc fn GetSystemInfo(lpSystemInfo: *SYSTEM_INFO) void;
 pub extern "kernel32" stdcallcc fn GetSystemTimeAsFileTime(*FILETIME) void;
 
 pub extern "kernel32" stdcallcc fn HeapCreate(flOptions: DWORD, dwInitialSize: SIZE_T, dwMaximumSize: SIZE_T) ?HANDLE;
@@ -204,6 +205,7 @@ pub const SIZE_T = usize;
 pub const TCHAR = if (UNICODE) WCHAR else u8;
 pub const UINT = c_uint;
 pub const ULONG_PTR = usize;
+pub const DWORD_PTR = ULONG_PTR;
 pub const UNICODE = false;
 pub const WCHAR = u16;
 pub const WORD = u16;
@@ -413,3 +415,22 @@ pub const FILETIME = extern struct {
     dwLowDateTime: DWORD,
     dwHighDateTime: DWORD,
 };
+
+pub const SYSTEM_INFO = extern struct {
+    anon1: extern union {
+        dwOemId: DWORD,
+        anon2: extern struct {
+            wProcessorArchitecture: WORD,
+            wReserved: WORD,
+        },
+    },
+    dwPageSize: DWORD,
+    lpMinimumApplicationAddress: LPVOID,
+    lpMaximumApplicationAddress: LPVOID,
+    dwActiveProcessorMask: DWORD_PTR,
+    dwNumberOfProcessors: DWORD,
+    dwProcessorType: DWORD,
+    dwAllocationGranularity: DWORD,
+    wProcessorLevel: WORD,
+    wProcessorRevision: WORD,
+};

From 9462852433a815496e0edf5d5b2e00726f5ea072 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 16:49:46 -0400
Subject: [PATCH 24/35] std.event.Loop multithreading for windows using IOCP

---
 std/event.zig            | 122 +++++++++++++++++++++++++++++++++++++--
 std/heap.zig             |   2 +-
 std/os/index.zig         |  25 ++++++--
 std/os/windows/index.zig |   7 +++
 std/os/windows/util.zig  |  47 +++++++++++++++
 5 files changed, 191 insertions(+), 12 deletions(-)

diff --git a/std/event.zig b/std/event.zig
index 589ab4cb5f..90d614d72e 100644
--- a/std/event.zig
+++ b/std/event.zig
@@ -4,6 +4,7 @@ const assert = std.debug.assert;
 const event = this;
 const mem = std.mem;
 const posix = std.os.posix;
+const windows = std.os.windows;
 const AtomicRmwOp = builtin.AtomicRmwOp;
 const AtomicOrder = builtin.AtomicOrder;
 
@@ -113,10 +114,10 @@ pub const Loop = struct {
     allocator: *mem.Allocator,
     next_tick_queue: std.atomic.QueueMpsc(promise),
     os_data: OsData,
+    final_resume_node: ResumeNode,
     dispatch_lock: u8, // TODO make this a bool
     pending_event_count: usize,
     extra_threads: []*std.os.Thread,
-    final_resume_node: ResumeNode,
 
     // pre-allocated eventfds. all permanently active.
     // this is how we send promises to be resumed on other threads.
@@ -144,6 +145,7 @@ pub const Loop = struct {
             },
             builtin.Os.windows => struct {
                 base: ResumeNode,
+                completion_key: usize,
             },
             else => @compileError("unsupported OS"),
         };
@@ -181,12 +183,12 @@ pub const Loop = struct {
             .next_tick_queue = std.atomic.QueueMpsc(promise).init(),
             .dispatch_lock = 1, // start locked so threads go directly into epoll wait
             .extra_threads = undefined,
+            .available_eventfd_resume_nodes = std.atomic.Stack(ResumeNode.EventFd).init(),
+            .eventfd_resume_nodes = undefined,
             .final_resume_node = ResumeNode{
                 .id = ResumeNode.Id.Stop,
                 .handle = undefined,
             },
-            .available_eventfd_resume_nodes = std.atomic.Stack(ResumeNode.EventFd).init(),
-            .eventfd_resume_nodes = undefined,
         };
         const extra_thread_count = thread_count - 1;
         self.eventfd_resume_nodes = try self.allocator.alloc(
@@ -209,7 +211,8 @@ pub const Loop = struct {
     }
 
     const InitOsDataError = std.os.LinuxEpollCreateError || mem.Allocator.Error || std.os.LinuxEventFdError ||
-        std.os.SpawnThreadError || std.os.LinuxEpollCtlError || std.os.BsdKEventError;
+        std.os.SpawnThreadError || std.os.LinuxEpollCtlError || std.os.BsdKEventError ||
+        std.os.WindowsCreateIoCompletionPortError;
 
     const wakeup_bytes = []u8{0x1} ** 8;
 
@@ -335,6 +338,51 @@ pub const Loop = struct {
                     self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
                 }
             },
+            builtin.Os.windows => {
+                self.os_data.extra_thread_count = extra_thread_count;
+
+                self.os_data.io_port = try std.os.windowsCreateIoCompletionPort(
+                    windows.INVALID_HANDLE_VALUE,
+                    null,
+                    undefined,
+                    undefined,
+                );
+                errdefer std.os.close(self.os_data.io_port);
+
+                for (self.eventfd_resume_nodes) |*eventfd_node, i| {
+                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
+                        .data = ResumeNode.EventFd{
+                            .base = ResumeNode{
+                                .id = ResumeNode.Id.EventFd,
+                                .handle = undefined,
+                            },
+                            // this one is for sending events
+                            .completion_key = @ptrToInt(&eventfd_node.data.base),
+                        },
+                        .next = undefined,
+                    };
+                    self.available_eventfd_resume_nodes.push(eventfd_node);
+                }
+
+                var extra_thread_index: usize = 0;
+                errdefer {
+                    var i: usize = 0;
+                    while (i < extra_thread_index) : (i += 1) {
+                        while (true) {
+                            const overlapped = @intToPtr(?*windows.OVERLAPPED, 0x1);
+                            std.os.windowsPostQueuedCompletionStatus(self.os_data.io_port, undefined, @ptrToInt(&self.final_resume_node), overlapped) catch continue;
+                            break;
+                        }
+                    }
+                    while (extra_thread_index != 0) {
+                        extra_thread_index -= 1;
+                        self.extra_threads[extra_thread_index].wait();
+                    }
+                }
+                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
+                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
+                }
+            },
             else => {},
         }
     }
@@ -349,6 +397,10 @@ pub const Loop = struct {
             },
             builtin.Os.macosx => {
                 self.allocator.free(self.os_data.kevents);
+                std.os.close(self.os_data.kqfd);
+            },
+            builtin.Os.windows => {
+                std.os.close(self.os_data.io_port);
             },
             else => {},
         }
@@ -434,7 +486,7 @@ pub const Loop = struct {
                             builtin.Os.macosx => {
                                 const kevent_array = (*[1]posix.Kevent)(&eventfd_node.kevent);
                                 const eventlist = ([*]posix.Kevent)(undefined)[0..0];
-                                _ = std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null) catch |_| {
+                                _ = std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null) catch {
                                     // fine, we didn't need it anyway
                                     _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
                                     self.available_eventfd_resume_nodes.push(resume_stack_node);
@@ -446,7 +498,21 @@ pub const Loop = struct {
                             builtin.Os.linux => {
                                 // the pending count is already accounted for
                                 const epoll_events = posix.EPOLLONESHOT | std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET;
-                                self.modFd(eventfd_node.eventfd, eventfd_node.epoll_op, epoll_events, &eventfd_node.base) catch |_| {
+                                self.modFd(eventfd_node.eventfd, eventfd_node.epoll_op, epoll_events, &eventfd_node.base) catch {
+                                    // fine, we didn't need it anyway
+                                    _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                                    self.available_eventfd_resume_nodes.push(resume_stack_node);
+                                    resume handle;
+                                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                                    continue :start_over;
+                                };
+                            },
+                            builtin.Os.windows => {
+                                // this value is never dereferenced but we need it to be non-null so that
+                                // the consumer code can decide whether to read the completion key.
+                                // it has to do this for normal I/O, so we match that behavior here.
+                                const overlapped = @intToPtr(?*windows.OVERLAPPED, 0x1);
+                                std.os.windowsPostQueuedCompletionStatus(self.os_data.io_port, undefined, eventfd_node.completion_key, overlapped) catch {
                                     // fine, we didn't need it anyway
                                     _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
                                     self.available_eventfd_resume_nodes.push(resume_stack_node);
@@ -482,6 +548,17 @@ pub const Loop = struct {
                             _ = std.os.bsdKEvent(self.os_data.kqfd, final_kevent, eventlist, null) catch unreachable;
                             return;
                         },
+                        builtin.Os.windows => {
+                            var i: usize = 0;
+                            while (i < self.os_data.extra_thread_count) : (i += 1) {
+                                while (true) {
+                                    const overlapped = @intToPtr(?*windows.OVERLAPPED, 0x1);
+                                    std.os.windowsPostQueuedCompletionStatus(self.os_data.io_port, undefined, @ptrToInt(&self.final_resume_node), overlapped) catch continue;
+                                    break;
+                                }
+                            }
+                            return;
+                        },
                         else => @compileError("unsupported OS"),
                     }
                 }
@@ -536,6 +613,35 @@ pub const Loop = struct {
                         }
                     }
                 },
+                builtin.Os.windows => {
+                    var completion_key: usize = undefined;
+                    while (true) {
+                        var nbytes: windows.DWORD = undefined;
+                        var overlapped: ?*windows.OVERLAPPED = undefined;
+                        switch (std.os.windowsGetQueuedCompletionStatus(self.os_data.io_port, &nbytes, &completion_key, 
+                            &overlapped, windows.INFINITE)) {
+                            std.os.WindowsWaitResult.Aborted => return,
+                            std.os.WindowsWaitResult.Normal => {},
+                        }
+                        if (overlapped != null) break;
+                    }
+                    const resume_node = @intToPtr(*ResumeNode, completion_key);
+                    const handle = resume_node.handle;
+                    const resume_node_id = resume_node.id;
+                    switch (resume_node_id) {
+                        ResumeNode.Id.Basic => {},
+                        ResumeNode.Id.Stop => return,
+                        ResumeNode.Id.EventFd => {
+                            const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
+                            const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
+                            self.available_eventfd_resume_nodes.push(stack_node);
+                        },
+                    }
+                    resume handle;
+                    if (resume_node_id == ResumeNode.Id.EventFd) {
+                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                    }
+                },
                 else => @compileError("unsupported OS"),
             }
         }
@@ -548,6 +654,10 @@ pub const Loop = struct {
             final_eventfd_event: std.os.linux.epoll_event,
         },
         builtin.Os.macosx => MacOsData,
+        builtin.Os.windows => struct {
+            io_port: windows.HANDLE,
+            extra_thread_count: usize,
+        },
         else => struct {},
     };
 
diff --git a/std/heap.zig b/std/heap.zig
index caf972e605..ef22c8d0c5 100644
--- a/std/heap.zig
+++ b/std/heap.zig
@@ -98,7 +98,7 @@ pub const DirectAllocator = struct {
                 const amt = n + alignment + @sizeOf(usize);
                 const optional_heap_handle = @atomicLoad(?HeapHandle, &self.heap_handle, builtin.AtomicOrder.SeqCst);
                 const heap_handle = optional_heap_handle orelse blk: {
-                    const hh = os.windows.HeapCreate(os.windows.HEAP_NO_SERIALIZE, amt, 0) orelse return error.OutOfMemory;
+                    const hh = os.windows.HeapCreate(0, amt, 0) orelse return error.OutOfMemory;
                     const other_hh = @cmpxchgStrong(?HeapHandle, &self.heap_handle, null, hh, builtin.AtomicOrder.SeqCst, builtin.AtomicOrder.SeqCst) orelse break :blk hh;
                     _ = os.windows.HeapDestroy(hh);
                     break :blk other_hh.?; // can't be null because of the cmpxchg
diff --git a/std/os/index.zig b/std/os/index.zig
index 94fdd9dc84..896d6b3df8 100644
--- a/std/os/index.zig
+++ b/std/os/index.zig
@@ -61,6 +61,15 @@ pub const windowsLoadDll = windows_util.windowsLoadDll;
 pub const windowsUnloadDll = windows_util.windowsUnloadDll;
 pub const createWindowsEnvBlock = windows_util.createWindowsEnvBlock;
 
+pub const WindowsCreateIoCompletionPortError = windows_util.WindowsCreateIoCompletionPortError;
+pub const windowsCreateIoCompletionPort = windows_util.windowsCreateIoCompletionPort;
+
+pub const WindowsPostQueuedCompletionStatusError = windows_util.WindowsPostQueuedCompletionStatusError;
+pub const windowsPostQueuedCompletionStatus = windows_util.windowsPostQueuedCompletionStatus;
+
+pub const WindowsWaitResult = windows_util.WindowsWaitResult;
+pub const windowsGetQueuedCompletionStatus = windows_util.windowsGetQueuedCompletionStatus;
+
 pub const WindowsWaitError = windows_util.WaitError;
 pub const WindowsOpenError = windows_util.OpenError;
 pub const WindowsWriteError = windows_util.WriteError;
@@ -2592,11 +2601,17 @@ pub fn spawnThread(context: var, comptime startFn: var) SpawnThreadError!*Thread
                 thread: Thread,
                 inner: Context,
             };
-            extern fn threadMain(arg: windows.LPVOID) windows.DWORD {
-                if (@sizeOf(Context) == 0) {
-                    return startFn({});
-                } else {
-                    return startFn(@ptrCast(*Context, @alignCast(@alignOf(Context), arg)).*);
+            extern fn threadMain(raw_arg: windows.LPVOID) windows.DWORD {
+                const arg = if (@sizeOf(Context) == 0) {} else @ptrCast(*Context, @alignCast(@alignOf(Context), raw_arg)).*;
+                switch (@typeId(@typeOf(startFn).ReturnType)) {
+                    builtin.TypeId.Int => {
+                        return startFn(arg);
+                    },
+                    builtin.TypeId.Void => {
+                        startFn(arg);
+                        return 0;
+                    },
+                    else => @compileError("expected return type of startFn to be 'u8', 'noreturn', 'void', or '!void'"),
                 }
             }
         };
diff --git a/std/os/windows/index.zig b/std/os/windows/index.zig
index 571ac97fac..f73b8ec261 100644
--- a/std/os/windows/index.zig
+++ b/std/os/windows/index.zig
@@ -59,6 +59,9 @@ pub extern "kernel32" stdcallcc fn CreateSymbolicLinkA(
     dwFlags: DWORD,
 ) BOOLEAN;
 
+
+pub extern "kernel32" stdcallcc fn CreateIoCompletionPort(FileHandle: HANDLE, ExistingCompletionPort: ?HANDLE, CompletionKey: ULONG_PTR, NumberOfConcurrentThreads: DWORD) ?HANDLE;
+
 pub extern "kernel32" stdcallcc fn CreateThread(lpThreadAttributes: ?LPSECURITY_ATTRIBUTES, dwStackSize: SIZE_T, lpStartAddress: LPTHREAD_START_ROUTINE, lpParameter: ?LPVOID, dwCreationFlags: DWORD, lpThreadId: ?LPDWORD) ?HANDLE;
 
 pub extern "kernel32" stdcallcc fn DeleteFileA(lpFileName: LPCSTR) BOOL;
@@ -106,6 +109,7 @@ pub extern "kernel32" stdcallcc fn GetFinalPathNameByHandleA(
 ) DWORD;
 
 pub extern "kernel32" stdcallcc fn GetProcessHeap() ?HANDLE;
+pub extern "kernel32" stdcallcc fn GetQueuedCompletionStatus(CompletionPort: HANDLE, lpNumberOfBytesTransferred: LPDWORD, lpCompletionKey: *ULONG_PTR, lpOverlapped: *?*OVERLAPPED, dwMilliseconds: DWORD) BOOL;
 
 pub extern "kernel32" stdcallcc fn GetSystemInfo(lpSystemInfo: *SYSTEM_INFO) void;
 pub extern "kernel32" stdcallcc fn GetSystemTimeAsFileTime(*FILETIME) void;
@@ -130,6 +134,9 @@ pub extern "kernel32" stdcallcc fn MoveFileExA(
     dwFlags: DWORD,
 ) BOOL;
 
+
+pub extern "kernel32" stdcallcc fn PostQueuedCompletionStatus(CompletionPort: HANDLE, dwNumberOfBytesTransferred: DWORD, dwCompletionKey: ULONG_PTR, lpOverlapped: ?*OVERLAPPED) BOOL;
+
 pub extern "kernel32" stdcallcc fn QueryPerformanceCounter(lpPerformanceCount: *LARGE_INTEGER) BOOL;
 
 pub extern "kernel32" stdcallcc fn QueryPerformanceFrequency(lpFrequency: *LARGE_INTEGER) BOOL;
diff --git a/std/os/windows/util.zig b/std/os/windows/util.zig
index 45b205451d..b04e8efc4b 100644
--- a/std/os/windows/util.zig
+++ b/std/os/windows/util.zig
@@ -214,3 +214,50 @@ pub fn windowsFindNextFile(handle: windows.HANDLE, find_file_data: *windows.WIN3
     }
     return true;
 }
+
+
+pub const WindowsCreateIoCompletionPortError = error {
+    Unexpected,
+};
+
+pub fn windowsCreateIoCompletionPort(file_handle: windows.HANDLE, existing_completion_port: ?windows.HANDLE, completion_key: usize, concurrent_thread_count: windows.DWORD) !windows.HANDLE {
+    const handle = windows.CreateIoCompletionPort(file_handle, existing_completion_port, completion_key, concurrent_thread_count) orelse {
+        const err = windows.GetLastError();
+        switch (err) {
+            else => return os.unexpectedErrorWindows(err),
+        }
+    };
+    return handle;
+}
+
+pub const WindowsPostQueuedCompletionStatusError = error {
+    Unexpected,
+};
+
+pub fn windowsPostQueuedCompletionStatus(completion_port: windows.HANDLE, bytes_transferred_count: windows.DWORD, completion_key: usize, lpOverlapped: ?*windows.OVERLAPPED) WindowsPostQueuedCompletionStatusError!void {
+    if (windows.PostQueuedCompletionStatus(completion_port, bytes_transferred_count, completion_key, lpOverlapped) == 0) {
+        const err = windows.GetLastError();
+        switch (err) {
+            else => return os.unexpectedErrorWindows(err),
+        }
+    }
+}
+
+pub const WindowsWaitResult = error {
+    Normal,
+    Aborted,
+};
+
+pub fn windowsGetQueuedCompletionStatus(completion_port: windows.HANDLE, bytes_transferred_count: *windows.DWORD, lpCompletionKey: *usize, lpOverlapped: *?*windows.OVERLAPPED, dwMilliseconds: windows.DWORD) WindowsWaitResult {
+    if (windows.GetQueuedCompletionStatus(completion_port, bytes_transferred_count, lpCompletionKey, lpOverlapped, dwMilliseconds) == windows.FALSE) {
+        if (std.debug.runtime_safety) {
+            const err = windows.GetLastError();
+            if (err != windows.ERROR.ABANDONED_WAIT_0) {
+                std.debug.warn("err: {}\n", err);
+            }
+            assert(err == windows.ERROR.ABANDONED_WAIT_0);
+        }
+        return WindowsWaitResult.Aborted;
+    }
+    return WindowsWaitResult.Normal;
+}

From 0ac1b83885c7f2a97a8ac25657afcb5c9b80afb4 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 17:13:31 -0400
Subject: [PATCH 25/35] fix non-portable format specifier

---
 src/ir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ir.cpp b/src/ir.cpp
index dcd39ccfe5..505a32247e 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -19374,7 +19374,7 @@ static IrInstruction *ir_align_cast(IrAnalyze *ira, IrInstruction *target, uint3
             val->data.x_ptr.data.hard_coded_addr.addr % align_bytes != 0)
         {
             ir_add_error(ira, target,
-                    buf_sprintf("pointer address 0x%lx is not aligned to %" PRIu32 " bytes",
+                    buf_sprintf("pointer address 0x%" ZIG_PRI_x64 " is not aligned to %" PRIu32 " bytes",
                         val->data.x_ptr.data.hard_coded_addr.addr, align_bytes));
             return ira->codegen->invalid_instruction;
         }

From 1a1534ecb55d0273bd9cd62d415ac840eb73b2e5 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 17:16:06 -0400
Subject: [PATCH 26/35] fix regression on macos

---
 std/event.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/std/event.zig b/std/event.zig
index 90d614d72e..a72db05e78 100644
--- a/std/event.zig
+++ b/std/event.zig
@@ -664,7 +664,7 @@ pub const Loop = struct {
     const MacOsData = struct {
         kqfd: i32,
         final_kevent: posix.Kevent,
-        kevents: posix.Kevent,
+        kevents: []posix.Kevent,
     };
 };
 

From a2834d48b9480286549fd9882d67e874396eec79 Mon Sep 17 00:00:00 2001
From: wilsonk <wilsonk@cpsc.ucalgary.ca>
Date: Mon, 9 Jul 2018 15:21:20 -0600
Subject: [PATCH 27/35] Update throughput_test.zig. (#1211)

---
 std/crypto/throughput_test.zig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/std/crypto/throughput_test.zig b/std/crypto/throughput_test.zig
index 0ad6845d1a..c21838e607 100644
--- a/std/crypto/throughput_test.zig
+++ b/std/crypto/throughput_test.zig
@@ -15,8 +15,8 @@ const BytesToHash = 1024 * MiB;
 
 pub fn main() !void {
     var stdout_file = try std.io.getStdOut();
-    var stdout_out_stream = std.io.FileOutStream.init(*stdout_file);
-    const stdout = *stdout_out_stream.stream;
+    var stdout_out_stream = std.io.FileOutStream.init(&stdout_file);
+    const stdout = &stdout_out_stream.stream;
 
     var block: [HashFunction.block_size]u8 = undefined;
     std.mem.set(u8, block[0..], 0);
@@ -31,8 +31,8 @@ pub fn main() !void {
     }
     const end = timer.read();
 
-    const elapsed_s = f64(end - start) / time.ns_per_s;
-    const throughput = u64(BytesToHash / elapsed_s);
+    const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+    const throughput = @floatToInt(u64, BytesToHash / elapsed_s);
 
     try stdout.print("{}: {} MiB/s\n", @typeName(HashFunction), throughput / (1 * MiB));
 }

From c89aac85c440ea4cbccf1abdbd6acf84a33077e3 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 21:21:59 -0400
Subject: [PATCH 28/35] better workaround for guaranteeing memory in coroutine
 frame

See #1194
---
 std/atomic/queue_mpsc.zig | 25 ++++++++++++++++++++++++
 std/event.zig             | 40 ++++++++++++++++++---------------------
 2 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/std/atomic/queue_mpsc.zig b/std/atomic/queue_mpsc.zig
index bc0a94258b..978e189453 100644
--- a/std/atomic/queue_mpsc.zig
+++ b/std/atomic/queue_mpsc.zig
@@ -60,6 +60,31 @@ pub fn QueueMpsc(comptime T: type) type {
             }
             return self.outbox.isEmpty();
         }
+
+        /// For debugging only. No API guarantees about what this does.
+        pub fn dump(self: *Self) void {
+            {
+                var it = self.outbox.root;
+                while (it) |node| {
+                    std.debug.warn("0x{x} -> ", @ptrToInt(node));
+                    it = node.next;
+                }
+            }
+            const inbox_index = self.inbox_index;
+            const inboxes = []*std.atomic.Stack(T){
+                &self.inboxes[self.inbox_index],
+                &self.inboxes[1 - self.inbox_index],
+            };
+            for (inboxes) |inbox| {
+                var it = inbox.root;
+                while (it) |node| {
+                    std.debug.warn("0x{x} -> ", @ptrToInt(node));
+                    it = node.next;
+                }
+            }
+
+            std.debug.warn("null\n");
+        }
     };
 }
 
diff --git a/std/event.zig b/std/event.zig
index a72db05e78..de51f8c87e 100644
--- a/std/event.zig
+++ b/std/event.zig
@@ -439,15 +439,14 @@ pub const Loop = struct {
 
     pub async fn waitFd(self: *Loop, fd: i32) !void {
         defer self.removeFd(fd);
-        var resume_node = ResumeNode{
-            .id = ResumeNode.Id.Basic,
-            .handle = undefined,
-        };
         suspend |p| {
-            resume_node.handle = p;
+            // TODO explicitly put this memory in the coroutine frame #1194
+            var resume_node = ResumeNode{
+                .id = ResumeNode.Id.Basic,
+                .handle = p,
+            };
             try self.addFd(fd, &resume_node);
         }
-        var a = &resume_node; // TODO better way to explicitly put memory in coro frame
     }
 
     /// Bring your own linked list node. This means it can't fail.
@@ -618,8 +617,7 @@ pub const Loop = struct {
                     while (true) {
                         var nbytes: windows.DWORD = undefined;
                         var overlapped: ?*windows.OVERLAPPED = undefined;
-                        switch (std.os.windowsGetQueuedCompletionStatus(self.os_data.io_port, &nbytes, &completion_key, 
-                            &overlapped, windows.INFINITE)) {
+                        switch (std.os.windowsGetQueuedCompletionStatus(self.os_data.io_port, &nbytes, &completion_key, &overlapped, windows.INFINITE)) {
                             std.os.WindowsWaitResult.Aborted => return,
                             std.os.WindowsWaitResult.Normal => {},
                         }
@@ -1062,10 +1060,13 @@ pub const Lock = struct {
     }
 
     pub async fn acquire(self: *Lock) Held {
-        var my_tick_node: Loop.NextTickNode = undefined;
-
         s: suspend |handle| {
-            my_tick_node.data = handle;
+            // TODO explicitly put this memory in the coroutine frame #1194
+            var my_tick_node = Loop.NextTickNode{
+                .data = handle,
+                .next = undefined,
+            };
+
             self.queue.put(&my_tick_node);
 
             // At this point, we are in the queue, so we might have already been resumed and this coroutine
@@ -1107,10 +1108,6 @@ pub const Lock = struct {
             }
         }
 
-        // TODO this workaround to force my_tick_node to be in the coroutine frame should
-        // not be necessary
-        var trash1 = &my_tick_node;
-
         return Held{ .lock = self };
     }
 };
@@ -1176,6 +1173,10 @@ test "std.event.Lock" {
 }
 
 async fn testLock(loop: *Loop, lock: *Lock) void {
+    // TODO explicitly put next tick node memory in the coroutine frame #1194
+    suspend |p| {
+        resume p;
+    }
     const handle1 = async lockRunner(lock) catch @panic("out of memory");
     var tick_node1 = Loop.NextTickNode{
         .next = undefined,
@@ -1200,12 +1201,6 @@ async fn testLock(loop: *Loop, lock: *Lock) void {
     await handle1;
     await handle2;
     await handle3;
-
-    // TODO this is to force tick node memory to be in the coro frame
-    // there should be a way to make it explicit where the memory is
-    var a = &tick_node1;
-    var b = &tick_node2;
-    var c = &tick_node3;
 }
 
 var shared_test_data = [1]i32{0} ** 10;
@@ -1216,7 +1211,8 @@ async fn lockRunner(lock: *Lock) void {
 
     var i: usize = 0;
     while (i < shared_test_data.len) : (i += 1) {
-        const handle = await (async lock.acquire() catch @panic("out of memory"));
+        const lock_promise = async lock.acquire() catch @panic("out of memory");
+        const handle = await lock_promise;
         defer handle.release();
 
         shared_test_index = 0;

From 10cc49db1ca1f9b3ac63277c0742e05f6412f3c6 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 21:42:05 -0400
Subject: [PATCH 29/35] define c macros before importing llvm h files

Seems to matter on Ubuntu 16.04.

closes #1196
---
 src-self-hosted/c.zig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src-self-hosted/c.zig b/src-self-hosted/c.zig
index 08060fbe3a..3912462985 100644
--- a/src-self-hosted/c.zig
+++ b/src-self-hosted/c.zig
@@ -1,4 +1,6 @@
 pub use @cImport({
+    @cDefine("__STDC_CONSTANT_MACROS", "");
+    @cDefine("__STDC_LIMIT_MACROS", "");
     @cInclude("inttypes.h");
     @cInclude("config.h");
     @cInclude("zig_llvm.h");

From b6eb404831e44a92b4841459068f4fbe9c753541 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 22:22:44 -0400
Subject: [PATCH 30/35] organize std.event into directories

---
 CMakeLists.txt        |    5 +
 std/event.zig         | 1234 +----------------------------------------
 std/event/channel.zig |  254 +++++++++
 std/event/lock.zig    |  204 +++++++
 std/event/locked.zig  |   42 ++
 std/event/loop.zig    |  577 +++++++++++++++++++
 std/event/tcp.zig     |  183 ++++++
 7 files changed, 1277 insertions(+), 1222 deletions(-)
 create mode 100644 std/event/channel.zig
 create mode 100644 std/event/lock.zig
 create mode 100644 std/event/locked.zig
 create mode 100644 std/event/loop.zig
 create mode 100644 std/event/tcp.zig

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9701de9e42..fdedcd5eec 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -458,6 +458,11 @@ set(ZIG_STD_FILES
     "elf.zig"
     "empty.zig"
     "event.zig"
+    "event/channel.zig"
+    "event/lock.zig"
+    "event/locked.zig"
+    "event/loop.zig"
+    "event/tcp.zig"
     "fmt/errol/enum3.zig"
     "fmt/errol/index.zig"
     "fmt/errol/lookup.zig"
diff --git a/std/event.zig b/std/event.zig
index de51f8c87e..7e9928b3d7 100644
--- a/std/event.zig
+++ b/std/event.zig
@@ -1,1223 +1,13 @@
-const std = @import("index.zig");
-const builtin = @import("builtin");
-const assert = std.debug.assert;
-const event = this;
-const mem = std.mem;
-const posix = std.os.posix;
-const windows = std.os.windows;
-const AtomicRmwOp = builtin.AtomicRmwOp;
-const AtomicOrder = builtin.AtomicOrder;
-
-pub const TcpServer = struct {
-    handleRequestFn: async<*mem.Allocator> fn (*TcpServer, *const std.net.Address, *const std.os.File) void,
-
-    loop: *Loop,
-    sockfd: ?i32,
-    accept_coro: ?promise,
-    listen_address: std.net.Address,
-
-    waiting_for_emfile_node: PromiseNode,
-    listen_resume_node: event.Loop.ResumeNode,
-
-    const PromiseNode = std.LinkedList(promise).Node;
-
-    pub fn init(loop: *Loop) TcpServer {
-        // TODO can't initialize handler coroutine here because we need well defined copy elision
-        return TcpServer{
-            .loop = loop,
-            .sockfd = null,
-            .accept_coro = null,
-            .handleRequestFn = undefined,
-            .waiting_for_emfile_node = undefined,
-            .listen_address = undefined,
-            .listen_resume_node = event.Loop.ResumeNode{
-                .id = event.Loop.ResumeNode.Id.Basic,
-                .handle = undefined,
-            },
-        };
-    }
-
-    pub fn listen(
-        self: *TcpServer,
-        address: *const std.net.Address,
-        handleRequestFn: async<*mem.Allocator> fn (*TcpServer, *const std.net.Address, *const std.os.File) void,
-    ) !void {
-        self.handleRequestFn = handleRequestFn;
-
-        const sockfd = try std.os.posixSocket(posix.AF_INET, posix.SOCK_STREAM | posix.SOCK_CLOEXEC | posix.SOCK_NONBLOCK, posix.PROTO_tcp);
-        errdefer std.os.close(sockfd);
-        self.sockfd = sockfd;
-
-        try std.os.posixBind(sockfd, &address.os_addr);
-        try std.os.posixListen(sockfd, posix.SOMAXCONN);
-        self.listen_address = std.net.Address.initPosix(try std.os.posixGetSockName(sockfd));
-
-        self.accept_coro = try async<self.loop.allocator> TcpServer.handler(self);
-        errdefer cancel self.accept_coro.?;
-
-        self.listen_resume_node.handle = self.accept_coro.?;
-        try self.loop.addFd(sockfd, &self.listen_resume_node);
-        errdefer self.loop.removeFd(sockfd);
-    }
-
-    /// Stop listening
-    pub fn close(self: *TcpServer) void {
-        self.loop.removeFd(self.sockfd.?);
-        std.os.close(self.sockfd.?);
-    }
-
-    pub fn deinit(self: *TcpServer) void {
-        if (self.accept_coro) |accept_coro| cancel accept_coro;
-        if (self.sockfd) |sockfd| std.os.close(sockfd);
-    }
-
-    pub async fn handler(self: *TcpServer) void {
-        while (true) {
-            var accepted_addr: std.net.Address = undefined;
-            if (std.os.posixAccept(self.sockfd.?, &accepted_addr.os_addr, posix.SOCK_NONBLOCK | posix.SOCK_CLOEXEC)) |accepted_fd| {
-                var socket = std.os.File.openHandle(accepted_fd);
-                _ = async<self.loop.allocator> self.handleRequestFn(self, accepted_addr, socket) catch |err| switch (err) {
-                    error.OutOfMemory => {
-                        socket.close();
-                        continue;
-                    },
-                };
-            } else |err| switch (err) {
-                error.WouldBlock => {
-                    suspend; // we will get resumed by epoll_wait in the event loop
-                    continue;
-                },
-                error.ProcessFdQuotaExceeded => {
-                    errdefer std.os.emfile_promise_queue.remove(&self.waiting_for_emfile_node);
-                    suspend |p| {
-                        self.waiting_for_emfile_node = PromiseNode.init(p);
-                        std.os.emfile_promise_queue.append(&self.waiting_for_emfile_node);
-                    }
-                    continue;
-                },
-                error.ConnectionAborted, error.FileDescriptorClosed => continue,
-
-                error.PageFault => unreachable,
-                error.InvalidSyscall => unreachable,
-                error.FileDescriptorNotASocket => unreachable,
-                error.OperationNotSupported => unreachable,
-
-                error.SystemFdQuotaExceeded, error.SystemResources, error.ProtocolFailure, error.BlockedByFirewall, error.Unexpected => {
-                    @panic("TODO handle this error");
-                },
-            }
-        }
-    }
-};
-
-pub const Loop = struct {
-    allocator: *mem.Allocator,
-    next_tick_queue: std.atomic.QueueMpsc(promise),
-    os_data: OsData,
-    final_resume_node: ResumeNode,
-    dispatch_lock: u8, // TODO make this a bool
-    pending_event_count: usize,
-    extra_threads: []*std.os.Thread,
-
-    // pre-allocated eventfds. all permanently active.
-    // this is how we send promises to be resumed on other threads.
-    available_eventfd_resume_nodes: std.atomic.Stack(ResumeNode.EventFd),
-    eventfd_resume_nodes: []std.atomic.Stack(ResumeNode.EventFd).Node,
-
-    pub const NextTickNode = std.atomic.QueueMpsc(promise).Node;
-
-    pub const ResumeNode = struct {
-        id: Id,
-        handle: promise,
-
-        pub const Id = enum {
-            Basic,
-            Stop,
-            EventFd,
-        };
-
-        pub const EventFd = switch (builtin.os) {
-            builtin.Os.macosx => MacOsEventFd,
-            builtin.Os.linux => struct {
-                base: ResumeNode,
-                epoll_op: u32,
-                eventfd: i32,
-            },
-            builtin.Os.windows => struct {
-                base: ResumeNode,
-                completion_key: usize,
-            },
-            else => @compileError("unsupported OS"),
-        };
-
-        const MacOsEventFd = struct {
-            base: ResumeNode,
-            kevent: posix.Kevent,
-        };
-    };
-
-    /// After initialization, call run().
-    /// TODO copy elision / named return values so that the threads referencing *Loop
-    /// have the correct pointer value.
-    fn initSingleThreaded(self: *Loop, allocator: *mem.Allocator) !void {
-        return self.initInternal(allocator, 1);
-    }
-
-    /// The allocator must be thread-safe because we use it for multiplexing
-    /// coroutines onto kernel threads.
-    /// After initialization, call run().
-    /// TODO copy elision / named return values so that the threads referencing *Loop
-    /// have the correct pointer value.
-    fn initMultiThreaded(self: *Loop, allocator: *mem.Allocator) !void {
-        const core_count = try std.os.cpuCount(allocator);
-        return self.initInternal(allocator, core_count);
-    }
-
-    /// Thread count is the total thread count. The thread pool size will be
-    /// max(thread_count - 1, 0)
-    fn initInternal(self: *Loop, allocator: *mem.Allocator, thread_count: usize) !void {
-        self.* = Loop{
-            .pending_event_count = 0,
-            .allocator = allocator,
-            .os_data = undefined,
-            .next_tick_queue = std.atomic.QueueMpsc(promise).init(),
-            .dispatch_lock = 1, // start locked so threads go directly into epoll wait
-            .extra_threads = undefined,
-            .available_eventfd_resume_nodes = std.atomic.Stack(ResumeNode.EventFd).init(),
-            .eventfd_resume_nodes = undefined,
-            .final_resume_node = ResumeNode{
-                .id = ResumeNode.Id.Stop,
-                .handle = undefined,
-            },
-        };
-        const extra_thread_count = thread_count - 1;
-        self.eventfd_resume_nodes = try self.allocator.alloc(
-            std.atomic.Stack(ResumeNode.EventFd).Node,
-            extra_thread_count,
-        );
-        errdefer self.allocator.free(self.eventfd_resume_nodes);
-
-        self.extra_threads = try self.allocator.alloc(*std.os.Thread, extra_thread_count);
-        errdefer self.allocator.free(self.extra_threads);
-
-        try self.initOsData(extra_thread_count);
-        errdefer self.deinitOsData();
-    }
-
-    /// must call stop before deinit
-    pub fn deinit(self: *Loop) void {
-        self.deinitOsData();
-        self.allocator.free(self.extra_threads);
-    }
-
-    const InitOsDataError = std.os.LinuxEpollCreateError || mem.Allocator.Error || std.os.LinuxEventFdError ||
-        std.os.SpawnThreadError || std.os.LinuxEpollCtlError || std.os.BsdKEventError ||
-        std.os.WindowsCreateIoCompletionPortError;
-
-    const wakeup_bytes = []u8{0x1} ** 8;
-
-    fn initOsData(self: *Loop, extra_thread_count: usize) InitOsDataError!void {
-        switch (builtin.os) {
-            builtin.Os.linux => {
-                errdefer {
-                    while (self.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
-                }
-                for (self.eventfd_resume_nodes) |*eventfd_node| {
-                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
-                        .data = ResumeNode.EventFd{
-                            .base = ResumeNode{
-                                .id = ResumeNode.Id.EventFd,
-                                .handle = undefined,
-                            },
-                            .eventfd = try std.os.linuxEventFd(1, posix.EFD_CLOEXEC | posix.EFD_NONBLOCK),
-                            .epoll_op = posix.EPOLL_CTL_ADD,
-                        },
-                        .next = undefined,
-                    };
-                    self.available_eventfd_resume_nodes.push(eventfd_node);
-                }
-
-                self.os_data.epollfd = try std.os.linuxEpollCreate(posix.EPOLL_CLOEXEC);
-                errdefer std.os.close(self.os_data.epollfd);
-
-                self.os_data.final_eventfd = try std.os.linuxEventFd(0, posix.EFD_CLOEXEC | posix.EFD_NONBLOCK);
-                errdefer std.os.close(self.os_data.final_eventfd);
-
-                self.os_data.final_eventfd_event = posix.epoll_event{
-                    .events = posix.EPOLLIN,
-                    .data = posix.epoll_data{ .ptr = @ptrToInt(&self.final_resume_node) },
-                };
-                try std.os.linuxEpollCtl(
-                    self.os_data.epollfd,
-                    posix.EPOLL_CTL_ADD,
-                    self.os_data.final_eventfd,
-                    &self.os_data.final_eventfd_event,
-                );
-
-                var extra_thread_index: usize = 0;
-                errdefer {
-                    // writing 8 bytes to an eventfd cannot fail
-                    std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
-                    while (extra_thread_index != 0) {
-                        extra_thread_index -= 1;
-                        self.extra_threads[extra_thread_index].wait();
-                    }
-                }
-                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
-                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
-                }
-            },
-            builtin.Os.macosx => {
-                self.os_data.kqfd = try std.os.bsdKQueue();
-                errdefer std.os.close(self.os_data.kqfd);
-
-                self.os_data.kevents = try self.allocator.alloc(posix.Kevent, extra_thread_count);
-                errdefer self.allocator.free(self.os_data.kevents);
-
-                const eventlist = ([*]posix.Kevent)(undefined)[0..0];
-
-                for (self.eventfd_resume_nodes) |*eventfd_node, i| {
-                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
-                        .data = ResumeNode.EventFd{
-                            .base = ResumeNode{
-                                .id = ResumeNode.Id.EventFd,
-                                .handle = undefined,
-                            },
-                            // this one is for sending events
-                            .kevent = posix.Kevent{
-                                .ident = i,
-                                .filter = posix.EVFILT_USER,
-                                .flags = posix.EV_CLEAR | posix.EV_ADD | posix.EV_DISABLE,
-                                .fflags = 0,
-                                .data = 0,
-                                .udata = @ptrToInt(&eventfd_node.data.base),
-                            },
-                        },
-                        .next = undefined,
-                    };
-                    self.available_eventfd_resume_nodes.push(eventfd_node);
-                    const kevent_array = (*[1]posix.Kevent)(&eventfd_node.data.kevent);
-                    _ = try std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null);
-                    eventfd_node.data.kevent.flags = posix.EV_CLEAR | posix.EV_ENABLE;
-                    eventfd_node.data.kevent.fflags = posix.NOTE_TRIGGER;
-                    // this one is for waiting for events
-                    self.os_data.kevents[i] = posix.Kevent{
-                        .ident = i,
-                        .filter = posix.EVFILT_USER,
-                        .flags = 0,
-                        .fflags = 0,
-                        .data = 0,
-                        .udata = @ptrToInt(&eventfd_node.data.base),
-                    };
-                }
-
-                // Pre-add so that we cannot get error.SystemResources
-                // later when we try to activate it.
-                self.os_data.final_kevent = posix.Kevent{
-                    .ident = extra_thread_count,
-                    .filter = posix.EVFILT_USER,
-                    .flags = posix.EV_ADD | posix.EV_DISABLE,
-                    .fflags = 0,
-                    .data = 0,
-                    .udata = @ptrToInt(&self.final_resume_node),
-                };
-                const kevent_array = (*[1]posix.Kevent)(&self.os_data.final_kevent);
-                _ = try std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null);
-                self.os_data.final_kevent.flags = posix.EV_ENABLE;
-                self.os_data.final_kevent.fflags = posix.NOTE_TRIGGER;
-
-                var extra_thread_index: usize = 0;
-                errdefer {
-                    _ = std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null) catch unreachable;
-                    while (extra_thread_index != 0) {
-                        extra_thread_index -= 1;
-                        self.extra_threads[extra_thread_index].wait();
-                    }
-                }
-                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
-                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
-                }
-            },
-            builtin.Os.windows => {
-                self.os_data.extra_thread_count = extra_thread_count;
-
-                self.os_data.io_port = try std.os.windowsCreateIoCompletionPort(
-                    windows.INVALID_HANDLE_VALUE,
-                    null,
-                    undefined,
-                    undefined,
-                );
-                errdefer std.os.close(self.os_data.io_port);
-
-                for (self.eventfd_resume_nodes) |*eventfd_node, i| {
-                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
-                        .data = ResumeNode.EventFd{
-                            .base = ResumeNode{
-                                .id = ResumeNode.Id.EventFd,
-                                .handle = undefined,
-                            },
-                            // this one is for sending events
-                            .completion_key = @ptrToInt(&eventfd_node.data.base),
-                        },
-                        .next = undefined,
-                    };
-                    self.available_eventfd_resume_nodes.push(eventfd_node);
-                }
-
-                var extra_thread_index: usize = 0;
-                errdefer {
-                    var i: usize = 0;
-                    while (i < extra_thread_index) : (i += 1) {
-                        while (true) {
-                            const overlapped = @intToPtr(?*windows.OVERLAPPED, 0x1);
-                            std.os.windowsPostQueuedCompletionStatus(self.os_data.io_port, undefined, @ptrToInt(&self.final_resume_node), overlapped) catch continue;
-                            break;
-                        }
-                    }
-                    while (extra_thread_index != 0) {
-                        extra_thread_index -= 1;
-                        self.extra_threads[extra_thread_index].wait();
-                    }
-                }
-                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
-                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
-                }
-            },
-            else => {},
-        }
-    }
-
-    fn deinitOsData(self: *Loop) void {
-        switch (builtin.os) {
-            builtin.Os.linux => {
-                std.os.close(self.os_data.final_eventfd);
-                while (self.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
-                std.os.close(self.os_data.epollfd);
-                self.allocator.free(self.eventfd_resume_nodes);
-            },
-            builtin.Os.macosx => {
-                self.allocator.free(self.os_data.kevents);
-                std.os.close(self.os_data.kqfd);
-            },
-            builtin.Os.windows => {
-                std.os.close(self.os_data.io_port);
-            },
-            else => {},
-        }
-    }
-
-    /// resume_node must live longer than the promise that it holds a reference to.
-    pub fn addFd(self: *Loop, fd: i32, resume_node: *ResumeNode) !void {
-        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
-        errdefer {
-            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-        }
-        try self.modFd(
-            fd,
-            posix.EPOLL_CTL_ADD,
-            std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET,
-            resume_node,
-        );
-    }
-
-    pub fn modFd(self: *Loop, fd: i32, op: u32, events: u32, resume_node: *ResumeNode) !void {
-        var ev = std.os.linux.epoll_event{
-            .events = events,
-            .data = std.os.linux.epoll_data{ .ptr = @ptrToInt(resume_node) },
-        };
-        try std.os.linuxEpollCtl(self.os_data.epollfd, op, fd, &ev);
-    }
-
-    pub fn removeFd(self: *Loop, fd: i32) void {
-        self.removeFdNoCounter(fd);
-        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-    }
-
-    fn removeFdNoCounter(self: *Loop, fd: i32) void {
-        std.os.linuxEpollCtl(self.os_data.epollfd, std.os.linux.EPOLL_CTL_DEL, fd, undefined) catch {};
-    }
-
-    pub async fn waitFd(self: *Loop, fd: i32) !void {
-        defer self.removeFd(fd);
-        suspend |p| {
-            // TODO explicitly put this memory in the coroutine frame #1194
-            var resume_node = ResumeNode{
-                .id = ResumeNode.Id.Basic,
-                .handle = p,
-            };
-            try self.addFd(fd, &resume_node);
-        }
-    }
-
-    /// Bring your own linked list node. This means it can't fail.
-    pub fn onNextTick(self: *Loop, node: *NextTickNode) void {
-        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
-        self.next_tick_queue.put(node);
-    }
-
-    pub fn run(self: *Loop) void {
-        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-        self.workerRun();
-        for (self.extra_threads) |extra_thread| {
-            extra_thread.wait();
-        }
-    }
-
-    fn workerRun(self: *Loop) void {
-        start_over: while (true) {
-            if (@atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst) == 0) {
-                while (self.next_tick_queue.get()) |next_tick_node| {
-                    const handle = next_tick_node.data;
-                    if (self.next_tick_queue.isEmpty()) {
-                        // last node, just resume it
-                        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-                        resume handle;
-                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                        continue :start_over;
-                    }
-
-                    // non-last node, stick it in the epoll/kqueue set so that
-                    // other threads can get to it
-                    if (self.available_eventfd_resume_nodes.pop()) |resume_stack_node| {
-                        const eventfd_node = &resume_stack_node.data;
-                        eventfd_node.base.handle = handle;
-                        switch (builtin.os) {
-                            builtin.Os.macosx => {
-                                const kevent_array = (*[1]posix.Kevent)(&eventfd_node.kevent);
-                                const eventlist = ([*]posix.Kevent)(undefined)[0..0];
-                                _ = std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null) catch {
-                                    // fine, we didn't need it anyway
-                                    _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-                                    self.available_eventfd_resume_nodes.push(resume_stack_node);
-                                    resume handle;
-                                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                                    continue :start_over;
-                                };
-                            },
-                            builtin.Os.linux => {
-                                // the pending count is already accounted for
-                                const epoll_events = posix.EPOLLONESHOT | std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET;
-                                self.modFd(eventfd_node.eventfd, eventfd_node.epoll_op, epoll_events, &eventfd_node.base) catch {
-                                    // fine, we didn't need it anyway
-                                    _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-                                    self.available_eventfd_resume_nodes.push(resume_stack_node);
-                                    resume handle;
-                                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                                    continue :start_over;
-                                };
-                            },
-                            builtin.Os.windows => {
-                                // this value is never dereferenced but we need it to be non-null so that
-                                // the consumer code can decide whether to read the completion key.
-                                // it has to do this for normal I/O, so we match that behavior here.
-                                const overlapped = @intToPtr(?*windows.OVERLAPPED, 0x1);
-                                std.os.windowsPostQueuedCompletionStatus(self.os_data.io_port, undefined, eventfd_node.completion_key, overlapped) catch {
-                                    // fine, we didn't need it anyway
-                                    _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-                                    self.available_eventfd_resume_nodes.push(resume_stack_node);
-                                    resume handle;
-                                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                                    continue :start_over;
-                                };
-                            },
-                            else => @compileError("unsupported OS"),
-                        }
-                    } else {
-                        // threads are too busy, can't add another eventfd to wake one up
-                        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-                        resume handle;
-                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                        continue :start_over;
-                    }
-                }
-
-                const pending_event_count = @atomicLoad(usize, &self.pending_event_count, AtomicOrder.SeqCst);
-                if (pending_event_count == 0) {
-                    // cause all the threads to stop
-                    switch (builtin.os) {
-                        builtin.Os.linux => {
-                            // writing 8 bytes to an eventfd cannot fail
-                            std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
-                            return;
-                        },
-                        builtin.Os.macosx => {
-                            const final_kevent = (*[1]posix.Kevent)(&self.os_data.final_kevent);
-                            const eventlist = ([*]posix.Kevent)(undefined)[0..0];
-                            // cannot fail because we already added it and this just enables it
-                            _ = std.os.bsdKEvent(self.os_data.kqfd, final_kevent, eventlist, null) catch unreachable;
-                            return;
-                        },
-                        builtin.Os.windows => {
-                            var i: usize = 0;
-                            while (i < self.os_data.extra_thread_count) : (i += 1) {
-                                while (true) {
-                                    const overlapped = @intToPtr(?*windows.OVERLAPPED, 0x1);
-                                    std.os.windowsPostQueuedCompletionStatus(self.os_data.io_port, undefined, @ptrToInt(&self.final_resume_node), overlapped) catch continue;
-                                    break;
-                                }
-                            }
-                            return;
-                        },
-                        else => @compileError("unsupported OS"),
-                    }
-                }
-
-                _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-            }
-
-            switch (builtin.os) {
-                builtin.Os.linux => {
-                    // only process 1 event so we don't steal from other threads
-                    var events: [1]std.os.linux.epoll_event = undefined;
-                    const count = std.os.linuxEpollWait(self.os_data.epollfd, events[0..], -1);
-                    for (events[0..count]) |ev| {
-                        const resume_node = @intToPtr(*ResumeNode, ev.data.ptr);
-                        const handle = resume_node.handle;
-                        const resume_node_id = resume_node.id;
-                        switch (resume_node_id) {
-                            ResumeNode.Id.Basic => {},
-                            ResumeNode.Id.Stop => return,
-                            ResumeNode.Id.EventFd => {
-                                const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
-                                event_fd_node.epoll_op = posix.EPOLL_CTL_MOD;
-                                const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
-                                self.available_eventfd_resume_nodes.push(stack_node);
-                            },
-                        }
-                        resume handle;
-                        if (resume_node_id == ResumeNode.Id.EventFd) {
-                            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                        }
-                    }
-                },
-                builtin.Os.macosx => {
-                    var eventlist: [1]posix.Kevent = undefined;
-                    const count = std.os.bsdKEvent(self.os_data.kqfd, self.os_data.kevents, eventlist[0..], null) catch unreachable;
-                    for (eventlist[0..count]) |ev| {
-                        const resume_node = @intToPtr(*ResumeNode, ev.udata);
-                        const handle = resume_node.handle;
-                        const resume_node_id = resume_node.id;
-                        switch (resume_node_id) {
-                            ResumeNode.Id.Basic => {},
-                            ResumeNode.Id.Stop => return,
-                            ResumeNode.Id.EventFd => {
-                                const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
-                                const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
-                                self.available_eventfd_resume_nodes.push(stack_node);
-                            },
-                        }
-                        resume handle;
-                        if (resume_node_id == ResumeNode.Id.EventFd) {
-                            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                        }
-                    }
-                },
-                builtin.Os.windows => {
-                    var completion_key: usize = undefined;
-                    while (true) {
-                        var nbytes: windows.DWORD = undefined;
-                        var overlapped: ?*windows.OVERLAPPED = undefined;
-                        switch (std.os.windowsGetQueuedCompletionStatus(self.os_data.io_port, &nbytes, &completion_key, &overlapped, windows.INFINITE)) {
-                            std.os.WindowsWaitResult.Aborted => return,
-                            std.os.WindowsWaitResult.Normal => {},
-                        }
-                        if (overlapped != null) break;
-                    }
-                    const resume_node = @intToPtr(*ResumeNode, completion_key);
-                    const handle = resume_node.handle;
-                    const resume_node_id = resume_node.id;
-                    switch (resume_node_id) {
-                        ResumeNode.Id.Basic => {},
-                        ResumeNode.Id.Stop => return,
-                        ResumeNode.Id.EventFd => {
-                            const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
-                            const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
-                            self.available_eventfd_resume_nodes.push(stack_node);
-                        },
-                    }
-                    resume handle;
-                    if (resume_node_id == ResumeNode.Id.EventFd) {
-                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                    }
-                },
-                else => @compileError("unsupported OS"),
-            }
-        }
-    }
-
-    const OsData = switch (builtin.os) {
-        builtin.Os.linux => struct {
-            epollfd: i32,
-            final_eventfd: i32,
-            final_eventfd_event: std.os.linux.epoll_event,
-        },
-        builtin.Os.macosx => MacOsData,
-        builtin.Os.windows => struct {
-            io_port: windows.HANDLE,
-            extra_thread_count: usize,
-        },
-        else => struct {},
-    };
-
-    const MacOsData = struct {
-        kqfd: i32,
-        final_kevent: posix.Kevent,
-        kevents: []posix.Kevent,
-    };
-};
-
-/// many producer, many consumer, thread-safe, lock-free, runtime configurable buffer size
-/// when buffer is empty, consumers suspend and are resumed by producers
-/// when buffer is full, producers suspend and are resumed by consumers
-pub fn Channel(comptime T: type) type {
-    return struct {
-        loop: *Loop,
-
-        getters: std.atomic.QueueMpsc(GetNode),
-        putters: std.atomic.QueueMpsc(PutNode),
-        get_count: usize,
-        put_count: usize,
-        dispatch_lock: u8, // TODO make this a bool
-        need_dispatch: u8, // TODO make this a bool
-
-        // simple fixed size ring buffer
-        buffer_nodes: []T,
-        buffer_index: usize,
-        buffer_len: usize,
-
-        const SelfChannel = this;
-        const GetNode = struct {
-            ptr: *T,
-            tick_node: *Loop.NextTickNode,
-        };
-        const PutNode = struct {
-            data: T,
-            tick_node: *Loop.NextTickNode,
-        };
-
-        /// call destroy when done
-        pub fn create(loop: *Loop, capacity: usize) !*SelfChannel {
-            const buffer_nodes = try loop.allocator.alloc(T, capacity);
-            errdefer loop.allocator.free(buffer_nodes);
-
-            const self = try loop.allocator.create(SelfChannel{
-                .loop = loop,
-                .buffer_len = 0,
-                .buffer_nodes = buffer_nodes,
-                .buffer_index = 0,
-                .dispatch_lock = 0,
-                .need_dispatch = 0,
-                .getters = std.atomic.QueueMpsc(GetNode).init(),
-                .putters = std.atomic.QueueMpsc(PutNode).init(),
-                .get_count = 0,
-                .put_count = 0,
-            });
-            errdefer loop.allocator.destroy(self);
-
-            return self;
-        }
-
-        /// must be called when all calls to put and get have suspended and no more calls occur
-        pub fn destroy(self: *SelfChannel) void {
-            while (self.getters.get()) |get_node| {
-                cancel get_node.data.tick_node.data;
-            }
-            while (self.putters.get()) |put_node| {
-                cancel put_node.data.tick_node.data;
-            }
-            self.loop.allocator.free(self.buffer_nodes);
-            self.loop.allocator.destroy(self);
-        }
-
-        /// puts a data item in the channel. The promise completes when the value has been added to the
-        /// buffer, or in the case of a zero size buffer, when the item has been retrieved by a getter.
-        pub async fn put(self: *SelfChannel, data: T) void {
-            // TODO should be able to group memory allocation failure before first suspend point
-            // so that the async invocation catches it
-            var dispatch_tick_node_ptr: *Loop.NextTickNode = undefined;
-            _ = async self.dispatch(&dispatch_tick_node_ptr) catch unreachable;
-
-            suspend |handle| {
-                var my_tick_node = Loop.NextTickNode{
-                    .next = undefined,
-                    .data = handle,
-                };
-                var queue_node = std.atomic.QueueMpsc(PutNode).Node{
-                    .data = PutNode{
-                        .tick_node = &my_tick_node,
-                        .data = data,
-                    },
-                    .next = undefined,
-                };
-                self.putters.put(&queue_node);
-                _ = @atomicRmw(usize, &self.put_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
-
-                self.loop.onNextTick(dispatch_tick_node_ptr);
-            }
-        }
-
-        /// await this function to get an item from the channel. If the buffer is empty, the promise will
-        /// complete when the next item is put in the channel.
-        pub async fn get(self: *SelfChannel) T {
-            // TODO should be able to group memory allocation failure before first suspend point
-            // so that the async invocation catches it
-            var dispatch_tick_node_ptr: *Loop.NextTickNode = undefined;
-            _ = async self.dispatch(&dispatch_tick_node_ptr) catch unreachable;
-
-            // TODO integrate this function with named return values
-            // so we can get rid of this extra result copy
-            var result: T = undefined;
-            suspend |handle| {
-                var my_tick_node = Loop.NextTickNode{
-                    .next = undefined,
-                    .data = handle,
-                };
-                var queue_node = std.atomic.QueueMpsc(GetNode).Node{
-                    .data = GetNode{
-                        .ptr = &result,
-                        .tick_node = &my_tick_node,
-                    },
-                    .next = undefined,
-                };
-                self.getters.put(&queue_node);
-                _ = @atomicRmw(usize, &self.get_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
-
-                self.loop.onNextTick(dispatch_tick_node_ptr);
-            }
-            return result;
-        }
-
-        async fn dispatch(self: *SelfChannel, tick_node_ptr: **Loop.NextTickNode) void {
-            // resumed by onNextTick
-            suspend |handle| {
-                var tick_node = Loop.NextTickNode{
-                    .data = handle,
-                    .next = undefined,
-                };
-                tick_node_ptr.* = &tick_node;
-            }
-
-            // set the "need dispatch" flag
-            _ = @atomicRmw(u8, &self.need_dispatch, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
-
-            lock: while (true) {
-                // set the lock flag
-                const prev_lock = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
-                if (prev_lock != 0) return;
-
-                // clear the need_dispatch flag since we're about to do it
-                _ = @atomicRmw(u8, &self.need_dispatch, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-
-                while (true) {
-                    one_dispatch: {
-                        // later we correct these extra subtractions
-                        var get_count = @atomicRmw(usize, &self.get_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                        var put_count = @atomicRmw(usize, &self.put_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-
-                        // transfer self.buffer to self.getters
-                        while (self.buffer_len != 0) {
-                            if (get_count == 0) break :one_dispatch;
-
-                            const get_node = &self.getters.get().?.data;
-                            get_node.ptr.* = self.buffer_nodes[self.buffer_index -% self.buffer_len];
-                            self.loop.onNextTick(get_node.tick_node);
-                            self.buffer_len -= 1;
-
-                            get_count = @atomicRmw(usize, &self.get_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                        }
-
-                        // direct transfer self.putters to self.getters
-                        while (get_count != 0 and put_count != 0) {
-                            const get_node = &self.getters.get().?.data;
-                            const put_node = &self.putters.get().?.data;
-
-                            get_node.ptr.* = put_node.data;
-                            self.loop.onNextTick(get_node.tick_node);
-                            self.loop.onNextTick(put_node.tick_node);
-
-                            get_count = @atomicRmw(usize, &self.get_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                            put_count = @atomicRmw(usize, &self.put_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                        }
-
-                        // transfer self.putters to self.buffer
-                        while (self.buffer_len != self.buffer_nodes.len and put_count != 0) {
-                            const put_node = &self.putters.get().?.data;
-
-                            self.buffer_nodes[self.buffer_index] = put_node.data;
-                            self.loop.onNextTick(put_node.tick_node);
-                            self.buffer_index +%= 1;
-                            self.buffer_len += 1;
-
-                            put_count = @atomicRmw(usize, &self.put_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
-                        }
-                    }
-
-                    // undo the extra subtractions
-                    _ = @atomicRmw(usize, &self.get_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
-                    _ = @atomicRmw(usize, &self.put_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
-
-                    // clear need-dispatch flag
-                    const need_dispatch = @atomicRmw(u8, &self.need_dispatch, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-                    if (need_dispatch != 0) continue;
-
-                    const my_lock = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-                    assert(my_lock != 0);
-
-                    // we have to check again now that we unlocked
-                    if (@atomicLoad(u8, &self.need_dispatch, AtomicOrder.SeqCst) != 0) continue :lock;
-
-                    return;
-                }
-            }
-        }
-    };
-}
-
-pub async fn connect(loop: *Loop, _address: *const std.net.Address) !std.os.File {
-    var address = _address.*; // TODO https://github.com/ziglang/zig/issues/733
-
-    const sockfd = try std.os.posixSocket(posix.AF_INET, posix.SOCK_STREAM | posix.SOCK_CLOEXEC | posix.SOCK_NONBLOCK, posix.PROTO_tcp);
-    errdefer std.os.close(sockfd);
-
-    try std.os.posixConnectAsync(sockfd, &address.os_addr);
-    try await try async loop.waitFd(sockfd);
-    try std.os.posixGetSockOptConnectError(sockfd);
-
-    return std.os.File.openHandle(sockfd);
-}
-
-test "listen on a port, send bytes, receive bytes" {
-    if (builtin.os != builtin.Os.linux) {
-        // TODO build abstractions for other operating systems
-        return;
-    }
-    const MyServer = struct {
-        tcp_server: TcpServer,
-
-        const Self = this;
-        async<*mem.Allocator> fn handler(tcp_server: *TcpServer, _addr: *const std.net.Address, _socket: *const std.os.File) void {
-            const self = @fieldParentPtr(Self, "tcp_server", tcp_server);
-            var socket = _socket.*; // TODO https://github.com/ziglang/zig/issues/733
-            defer socket.close();
-            // TODO guarantee elision of this allocation
-            const next_handler = async errorableHandler(self, _addr, socket) catch unreachable;
-            (await next_handler) catch |err| {
-                std.debug.panic("unable to handle connection: {}\n", err);
-            };
-            suspend |p| {
-                cancel p;
-            }
-        }
-        async fn errorableHandler(self: *Self, _addr: *const std.net.Address, _socket: *const std.os.File) !void {
-            const addr = _addr.*; // TODO https://github.com/ziglang/zig/issues/733
-            var socket = _socket.*; // TODO https://github.com/ziglang/zig/issues/733
-
-            var adapter = std.io.FileOutStream.init(&socket);
-            var stream = &adapter.stream;
-            try stream.print("hello from server\n");
-        }
-    };
-
-    const ip4addr = std.net.parseIp4("127.0.0.1") catch unreachable;
-    const addr = std.net.Address.initIp4(ip4addr, 0);
-
-    var loop: Loop = undefined;
-    try loop.initSingleThreaded(std.debug.global_allocator);
-    var server = MyServer{ .tcp_server = TcpServer.init(&loop) };
-    defer server.tcp_server.deinit();
-    try server.tcp_server.listen(addr, MyServer.handler);
-
-    const p = try async<std.debug.global_allocator> doAsyncTest(&loop, server.tcp_server.listen_address, &server.tcp_server);
-    defer cancel p;
-    loop.run();
-}
-
-async fn doAsyncTest(loop: *Loop, address: *const std.net.Address, server: *TcpServer) void {
-    errdefer @panic("test failure");
-
-    var socket_file = try await try async event.connect(loop, address);
-    defer socket_file.close();
-
-    var buf: [512]u8 = undefined;
-    const amt_read = try socket_file.read(buf[0..]);
-    const msg = buf[0..amt_read];
-    assert(mem.eql(u8, msg, "hello from server\n"));
-    server.close();
-}
-
-test "std.event.Channel" {
-    var da = std.heap.DirectAllocator.init();
-    defer da.deinit();
-
-    const allocator = &da.allocator;
-
-    var loop: Loop = undefined;
-    // TODO make a multi threaded test
-    try loop.initSingleThreaded(allocator);
-    defer loop.deinit();
-
-    const channel = try Channel(i32).create(&loop, 0);
-    defer channel.destroy();
-
-    const handle = try async<allocator> testChannelGetter(&loop, channel);
-    defer cancel handle;
-
-    const putter = try async<allocator> testChannelPutter(channel);
-    defer cancel putter;
-
-    loop.run();
-}
-
-async fn testChannelGetter(loop: *Loop, channel: *Channel(i32)) void {
-    errdefer @panic("test failed");
-
-    const value1_promise = try async channel.get();
-    const value1 = await value1_promise;
-    assert(value1 == 1234);
-
-    const value2_promise = try async channel.get();
-    const value2 = await value2_promise;
-    assert(value2 == 4567);
-}
-
-async fn testChannelPutter(channel: *Channel(i32)) void {
-    await (async channel.put(1234) catch @panic("out of memory"));
-    await (async channel.put(4567) catch @panic("out of memory"));
-}
-
-/// Thread-safe async/await lock.
-/// Does not make any syscalls - coroutines which are waiting for the lock are suspended, and
-/// are resumed when the lock is released, in order.
-pub const Lock = struct {
-    loop: *Loop,
-    shared_bit: u8, // TODO make this a bool
-    queue: Queue,
-    queue_empty_bit: u8, // TODO make this a bool
-
-    const Queue = std.atomic.QueueMpsc(promise);
-
-    pub const Held = struct {
-        lock: *Lock,
-
-        pub fn release(self: Held) void {
-            // Resume the next item from the queue.
-            if (self.lock.queue.get()) |node| {
-                self.lock.loop.onNextTick(node);
-                return;
-            }
-
-            // We need to release the lock.
-            _ = @atomicRmw(u8, &self.lock.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
-            _ = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-
-            // There might be a queue item. If we know the queue is empty, we can be done,
-            // because the other actor will try to obtain the lock.
-            // But if there's a queue item, we are the actor which must loop and attempt
-            // to grab the lock again.
-            if (@atomicLoad(u8, &self.lock.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
-                return;
-            }
-
-            while (true) {
-                const old_bit = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
-                if (old_bit != 0) {
-                    // We did not obtain the lock. Great, the queue is someone else's problem.
-                    return;
-                }
-
-                // Resume the next item from the queue.
-                if (self.lock.queue.get()) |node| {
-                    self.lock.loop.onNextTick(node);
-                    return;
-                }
-
-                // Release the lock again.
-                _ = @atomicRmw(u8, &self.lock.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
-                _ = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-
-                // Find out if we can be done.
-                if (@atomicLoad(u8, &self.lock.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
-                    return;
-                }
-            }
-        }
-    };
-
-    pub fn init(loop: *Loop) Lock {
-        return Lock{
-            .loop = loop,
-            .shared_bit = 0,
-            .queue = Queue.init(),
-            .queue_empty_bit = 1,
-        };
-    }
-
-    /// Must be called when not locked. Not thread safe.
-    /// All calls to acquire() and release() must complete before calling deinit().
-    pub fn deinit(self: *Lock) void {
-        assert(self.shared_bit == 0);
-        while (self.queue.get()) |node| cancel node.data;
-    }
-
-    pub async fn acquire(self: *Lock) Held {
-        s: suspend |handle| {
-            // TODO explicitly put this memory in the coroutine frame #1194
-            var my_tick_node = Loop.NextTickNode{
-                .data = handle,
-                .next = undefined,
-            };
-
-            self.queue.put(&my_tick_node);
-
-            // At this point, we are in the queue, so we might have already been resumed and this coroutine
-            // frame might be destroyed. For the rest of the suspend block we cannot access the coroutine frame.
-
-            // We set this bit so that later we can rely on the fact, that if queue_empty_bit is 1, some actor
-            // will attempt to grab the lock.
-            _ = @atomicRmw(u8, &self.queue_empty_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-
-            while (true) {
-                const old_bit = @atomicRmw(u8, &self.shared_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
-                if (old_bit != 0) {
-                    // We did not obtain the lock. Trust that our queue entry will resume us, and allow
-                    // suspend to complete.
-                    break;
-                }
-                // We got the lock. However we might have already been resumed from the queue.
-                if (self.queue.get()) |node| {
-                    // Whether this node is us or someone else, we tail resume it.
-                    resume node.data;
-                    break;
-                } else {
-                    // We already got resumed, and there are none left in the queue, which means that
-                    // we aren't even supposed to hold the lock right now.
-                    _ = @atomicRmw(u8, &self.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
-                    _ = @atomicRmw(u8, &self.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
-
-                    // There might be a queue item. If we know the queue is empty, we can be done,
-                    // because the other actor will try to obtain the lock.
-                    // But if there's a queue item, we are the actor which must loop and attempt
-                    // to grab the lock again.
-                    if (@atomicLoad(u8, &self.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
-                        break;
-                    } else {
-                        continue;
-                    }
-                }
-                unreachable;
-            }
-        }
-
-        return Held{ .lock = self };
-    }
-};
-
-/// Thread-safe async/await lock that protects one piece of data.
-/// Does not make any syscalls - coroutines which are waiting for the lock are suspended, and
-/// are resumed when the lock is released, in order.
-pub fn Locked(comptime T: type) type {
-    return struct {
-        lock: Lock,
-        private_data: T,
-
-        const Self = this;
-
-        pub const HeldLock = struct {
-            value: *T,
-            held: Lock.Held,
-
-            pub fn release(self: HeldLock) void {
-                self.held.release();
-            }
-        };
-
-        pub fn init(loop: *Loop, data: T) Self {
-            return Self{
-                .lock = Lock.init(loop),
-                .private_data = data,
-            };
-        }
-
-        pub fn deinit(self: *Self) void {
-            self.lock.deinit();
-        }
-
-        pub async fn acquire(self: *Self) HeldLock {
-            return HeldLock{
-            // TODO guaranteed allocation elision
-                .held = await (async self.lock.acquire() catch unreachable),
-                .value = &self.private_data,
-            };
-        }
-    };
-}
-
-test "std.event.Lock" {
-    var da = std.heap.DirectAllocator.init();
-    defer da.deinit();
-
-    const allocator = &da.allocator;
-
-    var loop: Loop = undefined;
-    try loop.initMultiThreaded(allocator);
-    defer loop.deinit();
-
-    var lock = Lock.init(&loop);
-    defer lock.deinit();
-
-    const handle = try async<allocator> testLock(&loop, &lock);
-    defer cancel handle;
-    loop.run();
-
-    assert(mem.eql(i32, shared_test_data, [1]i32{3 * @intCast(i32, shared_test_data.len)} ** shared_test_data.len));
-}
-
-async fn testLock(loop: *Loop, lock: *Lock) void {
-    // TODO explicitly put next tick node memory in the coroutine frame #1194
-    suspend |p| {
-        resume p;
-    }
-    const handle1 = async lockRunner(lock) catch @panic("out of memory");
-    var tick_node1 = Loop.NextTickNode{
-        .next = undefined,
-        .data = handle1,
-    };
-    loop.onNextTick(&tick_node1);
-
-    const handle2 = async lockRunner(lock) catch @panic("out of memory");
-    var tick_node2 = Loop.NextTickNode{
-        .next = undefined,
-        .data = handle2,
-    };
-    loop.onNextTick(&tick_node2);
-
-    const handle3 = async lockRunner(lock) catch @panic("out of memory");
-    var tick_node3 = Loop.NextTickNode{
-        .next = undefined,
-        .data = handle3,
-    };
-    loop.onNextTick(&tick_node3);
-
-    await handle1;
-    await handle2;
-    await handle3;
-}
-
-var shared_test_data = [1]i32{0} ** 10;
-var shared_test_index: usize = 0;
-
-async fn lockRunner(lock: *Lock) void {
-    suspend; // resumed by onNextTick
-
-    var i: usize = 0;
-    while (i < shared_test_data.len) : (i += 1) {
-        const lock_promise = async lock.acquire() catch @panic("out of memory");
-        const handle = await lock_promise;
-        defer handle.release();
-
-        shared_test_index = 0;
-        while (shared_test_index < shared_test_data.len) : (shared_test_index += 1) {
-            shared_test_data[shared_test_index] = shared_test_data[shared_test_index] + 1;
-        }
-    }
+pub const Locked = @import("event/locked.zig").Locked;
+pub const Loop = @import("event/loop.zig").Loop;
+pub const Lock = @import("event/lock.zig").Lock;
+pub const tcp = @import("event/tcp.zig");
+pub const Channel = @import("event/channel.zig").Channel;
+
+test "import event tests" {
+    _ = @import("event/locked.zig");
+    _ = @import("event/loop.zig");
+    _ = @import("event/lock.zig");
+    _ = @import("event/tcp.zig");
+    _ = @import("event/channel.zig");
 }
diff --git a/std/event/channel.zig b/std/event/channel.zig
new file mode 100644
index 0000000000..4b3a7177a2
--- /dev/null
+++ b/std/event/channel.zig
@@ -0,0 +1,254 @@
+const std = @import("../index.zig");
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+const AtomicRmwOp = builtin.AtomicRmwOp;
+const AtomicOrder = builtin.AtomicOrder;
+const Loop = std.event.Loop;
+
+/// many producer, many consumer, thread-safe, lock-free, runtime configurable buffer size
+/// when buffer is empty, consumers suspend and are resumed by producers
+/// when buffer is full, producers suspend and are resumed by consumers
+pub fn Channel(comptime T: type) type {
+    return struct {
+        loop: *Loop,
+
+        getters: std.atomic.QueueMpsc(GetNode),
+        putters: std.atomic.QueueMpsc(PutNode),
+        get_count: usize,
+        put_count: usize,
+        dispatch_lock: u8, // TODO make this a bool
+        need_dispatch: u8, // TODO make this a bool
+
+        // simple fixed size ring buffer
+        buffer_nodes: []T,
+        buffer_index: usize,
+        buffer_len: usize,
+
+        const SelfChannel = this;
+        const GetNode = struct {
+            ptr: *T,
+            tick_node: *Loop.NextTickNode,
+        };
+        const PutNode = struct {
+            data: T,
+            tick_node: *Loop.NextTickNode,
+        };
+
+        /// call destroy when done
+        pub fn create(loop: *Loop, capacity: usize) !*SelfChannel {
+            const buffer_nodes = try loop.allocator.alloc(T, capacity);
+            errdefer loop.allocator.free(buffer_nodes);
+
+            const self = try loop.allocator.create(SelfChannel{
+                .loop = loop,
+                .buffer_len = 0,
+                .buffer_nodes = buffer_nodes,
+                .buffer_index = 0,
+                .dispatch_lock = 0,
+                .need_dispatch = 0,
+                .getters = std.atomic.QueueMpsc(GetNode).init(),
+                .putters = std.atomic.QueueMpsc(PutNode).init(),
+                .get_count = 0,
+                .put_count = 0,
+            });
+            errdefer loop.allocator.destroy(self);
+
+            return self;
+        }
+
+        /// must be called when all calls to put and get have suspended and no more calls occur
+        pub fn destroy(self: *SelfChannel) void {
+            while (self.getters.get()) |get_node| {
+                cancel get_node.data.tick_node.data;
+            }
+            while (self.putters.get()) |put_node| {
+                cancel put_node.data.tick_node.data;
+            }
+            self.loop.allocator.free(self.buffer_nodes);
+            self.loop.allocator.destroy(self);
+        }
+
+        /// puts a data item in the channel. The promise completes when the value has been added to the
+        /// buffer, or in the case of a zero size buffer, when the item has been retrieved by a getter.
+        pub async fn put(self: *SelfChannel, data: T) void {
+            // TODO should be able to group memory allocation failure before first suspend point
+            // so that the async invocation catches it
+            var dispatch_tick_node_ptr: *Loop.NextTickNode = undefined;
+            _ = async self.dispatch(&dispatch_tick_node_ptr) catch unreachable;
+
+            suspend |handle| {
+                var my_tick_node = Loop.NextTickNode{
+                    .next = undefined,
+                    .data = handle,
+                };
+                var queue_node = std.atomic.QueueMpsc(PutNode).Node{
+                    .data = PutNode{
+                        .tick_node = &my_tick_node,
+                        .data = data,
+                    },
+                    .next = undefined,
+                };
+                self.putters.put(&queue_node);
+                _ = @atomicRmw(usize, &self.put_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
+
+                self.loop.onNextTick(dispatch_tick_node_ptr);
+            }
+        }
+
+        /// await this function to get an item from the channel. If the buffer is empty, the promise will
+        /// complete when the next item is put in the channel.
+        pub async fn get(self: *SelfChannel) T {
+            // TODO should be able to group memory allocation failure before first suspend point
+            // so that the async invocation catches it
+            var dispatch_tick_node_ptr: *Loop.NextTickNode = undefined;
+            _ = async self.dispatch(&dispatch_tick_node_ptr) catch unreachable;
+
+            // TODO integrate this function with named return values
+            // so we can get rid of this extra result copy
+            var result: T = undefined;
+            suspend |handle| {
+                var my_tick_node = Loop.NextTickNode{
+                    .next = undefined,
+                    .data = handle,
+                };
+                var queue_node = std.atomic.QueueMpsc(GetNode).Node{
+                    .data = GetNode{
+                        .ptr = &result,
+                        .tick_node = &my_tick_node,
+                    },
+                    .next = undefined,
+                };
+                self.getters.put(&queue_node);
+                _ = @atomicRmw(usize, &self.get_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
+
+                self.loop.onNextTick(dispatch_tick_node_ptr);
+            }
+            return result;
+        }
+
+        async fn dispatch(self: *SelfChannel, tick_node_ptr: **Loop.NextTickNode) void {
+            // resumed by onNextTick
+            suspend |handle| {
+                var tick_node = Loop.NextTickNode{
+                    .data = handle,
+                    .next = undefined,
+                };
+                tick_node_ptr.* = &tick_node;
+            }
+
+            // set the "need dispatch" flag
+            _ = @atomicRmw(u8, &self.need_dispatch, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+
+            lock: while (true) {
+                // set the lock flag
+                const prev_lock = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                if (prev_lock != 0) return;
+
+                // clear the need_dispatch flag since we're about to do it
+                _ = @atomicRmw(u8, &self.need_dispatch, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+                while (true) {
+                    one_dispatch: {
+                        // later we correct these extra subtractions
+                        var get_count = @atomicRmw(usize, &self.get_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        var put_count = @atomicRmw(usize, &self.put_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+
+                        // transfer self.buffer to self.getters
+                        while (self.buffer_len != 0) {
+                            if (get_count == 0) break :one_dispatch;
+
+                            const get_node = &self.getters.get().?.data;
+                            get_node.ptr.* = self.buffer_nodes[self.buffer_index -% self.buffer_len];
+                            self.loop.onNextTick(get_node.tick_node);
+                            self.buffer_len -= 1;
+
+                            get_count = @atomicRmw(usize, &self.get_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        }
+
+                        // direct transfer self.putters to self.getters
+                        while (get_count != 0 and put_count != 0) {
+                            const get_node = &self.getters.get().?.data;
+                            const put_node = &self.putters.get().?.data;
+
+                            get_node.ptr.* = put_node.data;
+                            self.loop.onNextTick(get_node.tick_node);
+                            self.loop.onNextTick(put_node.tick_node);
+
+                            get_count = @atomicRmw(usize, &self.get_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                            put_count = @atomicRmw(usize, &self.put_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        }
+
+                        // transfer self.putters to self.buffer
+                        while (self.buffer_len != self.buffer_nodes.len and put_count != 0) {
+                            const put_node = &self.putters.get().?.data;
+
+                            self.buffer_nodes[self.buffer_index] = put_node.data;
+                            self.loop.onNextTick(put_node.tick_node);
+                            self.buffer_index +%= 1;
+                            self.buffer_len += 1;
+
+                            put_count = @atomicRmw(usize, &self.put_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        }
+                    }
+
+                    // undo the extra subtractions
+                    _ = @atomicRmw(usize, &self.get_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
+                    _ = @atomicRmw(usize, &self.put_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
+
+                    // clear need-dispatch flag
+                    const need_dispatch = @atomicRmw(u8, &self.need_dispatch, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                    if (need_dispatch != 0) continue;
+
+                    const my_lock = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                    assert(my_lock != 0);
+
+                    // we have to check again now that we unlocked
+                    if (@atomicLoad(u8, &self.need_dispatch, AtomicOrder.SeqCst) != 0) continue :lock;
+
+                    return;
+                }
+            }
+        }
+    };
+}
+
+test "std.event.Channel" {
+    var da = std.heap.DirectAllocator.init();
+    defer da.deinit();
+
+    const allocator = &da.allocator;
+
+    var loop: Loop = undefined;
+    // TODO make a multi threaded test
+    try loop.initSingleThreaded(allocator);
+    defer loop.deinit();
+
+    const channel = try Channel(i32).create(&loop, 0);
+    defer channel.destroy();
+
+    const handle = try async<allocator> testChannelGetter(&loop, channel);
+    defer cancel handle;
+
+    const putter = try async<allocator> testChannelPutter(channel);
+    defer cancel putter;
+
+    loop.run();
+}
+
+async fn testChannelGetter(loop: *Loop, channel: *Channel(i32)) void {
+    errdefer @panic("test failed");
+
+    const value1_promise = try async channel.get();
+    const value1 = await value1_promise;
+    assert(value1 == 1234);
+
+    const value2_promise = try async channel.get();
+    const value2 = await value2_promise;
+    assert(value2 == 4567);
+}
+
+async fn testChannelPutter(channel: *Channel(i32)) void {
+    await (async channel.put(1234) catch @panic("out of memory"));
+    await (async channel.put(4567) catch @panic("out of memory"));
+}
+
diff --git a/std/event/lock.zig b/std/event/lock.zig
new file mode 100644
index 0000000000..2a8d5ada77
--- /dev/null
+++ b/std/event/lock.zig
@@ -0,0 +1,204 @@
+const std = @import("../index.zig");
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+const mem = std.mem;
+const AtomicRmwOp = builtin.AtomicRmwOp;
+const AtomicOrder = builtin.AtomicOrder;
+const Loop = std.event.Loop;
+
+/// Thread-safe async/await lock.
+/// Does not make any syscalls - coroutines which are waiting for the lock are suspended, and
+/// are resumed when the lock is released, in order.
+pub const Lock = struct {
+    loop: *Loop,
+    shared_bit: u8, // TODO make this a bool
+    queue: Queue,
+    queue_empty_bit: u8, // TODO make this a bool
+
+    const Queue = std.atomic.QueueMpsc(promise);
+
+    pub const Held = struct {
+        lock: *Lock,
+
+        pub fn release(self: Held) void {
+            // Resume the next item from the queue.
+            if (self.lock.queue.get()) |node| {
+                self.lock.loop.onNextTick(node);
+                return;
+            }
+
+            // We need to release the lock.
+            _ = @atomicRmw(u8, &self.lock.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+            _ = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+            // There might be a queue item. If we know the queue is empty, we can be done,
+            // because the other actor will try to obtain the lock.
+            // But if there's a queue item, we are the actor which must loop and attempt
+            // to grab the lock again.
+            if (@atomicLoad(u8, &self.lock.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
+                return;
+            }
+
+            while (true) {
+                const old_bit = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                if (old_bit != 0) {
+                    // We did not obtain the lock. Great, the queue is someone else's problem.
+                    return;
+                }
+
+                // Resume the next item from the queue.
+                if (self.lock.queue.get()) |node| {
+                    self.lock.loop.onNextTick(node);
+                    return;
+                }
+
+                // Release the lock again.
+                _ = @atomicRmw(u8, &self.lock.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                _ = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+                // Find out if we can be done.
+                if (@atomicLoad(u8, &self.lock.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
+                    return;
+                }
+            }
+        }
+    };
+
+    pub fn init(loop: *Loop) Lock {
+        return Lock{
+            .loop = loop,
+            .shared_bit = 0,
+            .queue = Queue.init(),
+            .queue_empty_bit = 1,
+        };
+    }
+
+    /// Must be called when not locked. Not thread safe.
+    /// All calls to acquire() and release() must complete before calling deinit().
+    pub fn deinit(self: *Lock) void {
+        assert(self.shared_bit == 0);
+        while (self.queue.get()) |node| cancel node.data;
+    }
+
+    pub async fn acquire(self: *Lock) Held {
+        s: suspend |handle| {
+            // TODO explicitly put this memory in the coroutine frame #1194
+            var my_tick_node = Loop.NextTickNode{
+                .data = handle,
+                .next = undefined,
+            };
+
+            self.queue.put(&my_tick_node);
+
+            // At this point, we are in the queue, so we might have already been resumed and this coroutine
+            // frame might be destroyed. For the rest of the suspend block we cannot access the coroutine frame.
+
+            // We set this bit so that later we can rely on the fact, that if queue_empty_bit is 1, some actor
+            // will attempt to grab the lock.
+            _ = @atomicRmw(u8, &self.queue_empty_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+            while (true) {
+                const old_bit = @atomicRmw(u8, &self.shared_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                if (old_bit != 0) {
+                    // We did not obtain the lock. Trust that our queue entry will resume us, and allow
+                    // suspend to complete.
+                    break;
+                }
+                // We got the lock. However we might have already been resumed from the queue.
+                if (self.queue.get()) |node| {
+                    // Whether this node is us or someone else, we tail resume it.
+                    resume node.data;
+                    break;
+                } else {
+                    // We already got resumed, and there are none left in the queue, which means that
+                    // we aren't even supposed to hold the lock right now.
+                    _ = @atomicRmw(u8, &self.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                    _ = @atomicRmw(u8, &self.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+                    // There might be a queue item. If we know the queue is empty, we can be done,
+                    // because the other actor will try to obtain the lock.
+                    // But if there's a queue item, we are the actor which must loop and attempt
+                    // to grab the lock again.
+                    if (@atomicLoad(u8, &self.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
+                        break;
+                    } else {
+                        continue;
+                    }
+                }
+                unreachable;
+            }
+        }
+
+        return Held{ .lock = self };
+    }
+};
+
+test "std.event.Lock" {
+    var da = std.heap.DirectAllocator.init();
+    defer da.deinit();
+
+    const allocator = &da.allocator;
+
+    var loop: Loop = undefined;
+    try loop.initMultiThreaded(allocator);
+    defer loop.deinit();
+
+    var lock = Lock.init(&loop);
+    defer lock.deinit();
+
+    const handle = try async<allocator> testLock(&loop, &lock);
+    defer cancel handle;
+    loop.run();
+
+    assert(mem.eql(i32, shared_test_data, [1]i32{3 * @intCast(i32, shared_test_data.len)} ** shared_test_data.len));
+}
+
+async fn testLock(loop: *Loop, lock: *Lock) void {
+    // TODO explicitly put next tick node memory in the coroutine frame #1194
+    suspend |p| {
+        resume p;
+    }
+    const handle1 = async lockRunner(lock) catch @panic("out of memory");
+    var tick_node1 = Loop.NextTickNode{
+        .next = undefined,
+        .data = handle1,
+    };
+    loop.onNextTick(&tick_node1);
+
+    const handle2 = async lockRunner(lock) catch @panic("out of memory");
+    var tick_node2 = Loop.NextTickNode{
+        .next = undefined,
+        .data = handle2,
+    };
+    loop.onNextTick(&tick_node2);
+
+    const handle3 = async lockRunner(lock) catch @panic("out of memory");
+    var tick_node3 = Loop.NextTickNode{
+        .next = undefined,
+        .data = handle3,
+    };
+    loop.onNextTick(&tick_node3);
+
+    await handle1;
+    await handle2;
+    await handle3;
+}
+
+var shared_test_data = [1]i32{0} ** 10;
+var shared_test_index: usize = 0;
+
+async fn lockRunner(lock: *Lock) void {
+    suspend; // resumed by onNextTick
+
+    var i: usize = 0;
+    while (i < shared_test_data.len) : (i += 1) {
+        const lock_promise = async lock.acquire() catch @panic("out of memory");
+        const handle = await lock_promise;
+        defer handle.release();
+
+        shared_test_index = 0;
+        while (shared_test_index < shared_test_data.len) : (shared_test_index += 1) {
+            shared_test_data[shared_test_index] = shared_test_data[shared_test_index] + 1;
+        }
+    }
+}
diff --git a/std/event/locked.zig b/std/event/locked.zig
new file mode 100644
index 0000000000..41ab112aff
--- /dev/null
+++ b/std/event/locked.zig
@@ -0,0 +1,42 @@
+const std = @import("../index.zig");
+const Lock = std.event.Lock;
+
+/// Thread-safe async/await lock that protects one piece of data.
+/// Does not make any syscalls - coroutines which are waiting for the lock are suspended, and
+/// are resumed when the lock is released, in order.
+pub fn Locked(comptime T: type) type {
+    return struct {
+        lock: Lock,
+        private_data: T,
+
+        const Self = this;
+
+        pub const HeldLock = struct {
+            value: *T,
+            held: Lock.Held,
+
+            pub fn release(self: HeldLock) void {
+                self.held.release();
+            }
+        };
+
+        pub fn init(loop: *Loop, data: T) Self {
+            return Self{
+                .lock = Lock.init(loop),
+                .private_data = data,
+            };
+        }
+
+        pub fn deinit(self: *Self) void {
+            self.lock.deinit();
+        }
+
+        pub async fn acquire(self: *Self) HeldLock {
+            return HeldLock{
+            // TODO guaranteed allocation elision
+                .held = await (async self.lock.acquire() catch unreachable),
+                .value = &self.private_data,
+            };
+        }
+    };
+}
diff --git a/std/event/loop.zig b/std/event/loop.zig
new file mode 100644
index 0000000000..61c7f80cdc
--- /dev/null
+++ b/std/event/loop.zig
@@ -0,0 +1,577 @@
+const std = @import("../index.zig");
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+const mem = std.mem;
+const posix = std.os.posix;
+const windows = std.os.windows;
+const AtomicRmwOp = builtin.AtomicRmwOp;
+const AtomicOrder = builtin.AtomicOrder;
+
+pub const Loop = struct {
+    allocator: *mem.Allocator,
+    next_tick_queue: std.atomic.QueueMpsc(promise),
+    os_data: OsData,
+    final_resume_node: ResumeNode,
+    dispatch_lock: u8, // TODO make this a bool
+    pending_event_count: usize,
+    extra_threads: []*std.os.Thread,
+
+    // pre-allocated eventfds. all permanently active.
+    // this is how we send promises to be resumed on other threads.
+    available_eventfd_resume_nodes: std.atomic.Stack(ResumeNode.EventFd),
+    eventfd_resume_nodes: []std.atomic.Stack(ResumeNode.EventFd).Node,
+
+    pub const NextTickNode = std.atomic.QueueMpsc(promise).Node;
+
+    pub const ResumeNode = struct {
+        id: Id,
+        handle: promise,
+
+        pub const Id = enum {
+            Basic,
+            Stop,
+            EventFd,
+        };
+
+        pub const EventFd = switch (builtin.os) {
+            builtin.Os.macosx => MacOsEventFd,
+            builtin.Os.linux => struct {
+                base: ResumeNode,
+                epoll_op: u32,
+                eventfd: i32,
+            },
+            builtin.Os.windows => struct {
+                base: ResumeNode,
+                completion_key: usize,
+            },
+            else => @compileError("unsupported OS"),
+        };
+
+        const MacOsEventFd = struct {
+            base: ResumeNode,
+            kevent: posix.Kevent,
+        };
+    };
+
+    /// After initialization, call run().
+    /// TODO copy elision / named return values so that the threads referencing *Loop
+    /// have the correct pointer value.
+    fn initSingleThreaded(self: *Loop, allocator: *mem.Allocator) !void {
+        return self.initInternal(allocator, 1);
+    }
+
+    /// The allocator must be thread-safe because we use it for multiplexing
+    /// coroutines onto kernel threads.
+    /// After initialization, call run().
+    /// TODO copy elision / named return values so that the threads referencing *Loop
+    /// have the correct pointer value.
+    fn initMultiThreaded(self: *Loop, allocator: *mem.Allocator) !void {
+        const core_count = try std.os.cpuCount(allocator);
+        return self.initInternal(allocator, core_count);
+    }
+
+    /// Thread count is the total thread count. The thread pool size will be
+    /// max(thread_count - 1, 0)
+    fn initInternal(self: *Loop, allocator: *mem.Allocator, thread_count: usize) !void {
+        self.* = Loop{
+            .pending_event_count = 0,
+            .allocator = allocator,
+            .os_data = undefined,
+            .next_tick_queue = std.atomic.QueueMpsc(promise).init(),
+            .dispatch_lock = 1, // start locked so threads go directly into epoll wait
+            .extra_threads = undefined,
+            .available_eventfd_resume_nodes = std.atomic.Stack(ResumeNode.EventFd).init(),
+            .eventfd_resume_nodes = undefined,
+            .final_resume_node = ResumeNode{
+                .id = ResumeNode.Id.Stop,
+                .handle = undefined,
+            },
+        };
+        const extra_thread_count = thread_count - 1;
+        self.eventfd_resume_nodes = try self.allocator.alloc(
+            std.atomic.Stack(ResumeNode.EventFd).Node,
+            extra_thread_count,
+        );
+        errdefer self.allocator.free(self.eventfd_resume_nodes);
+
+        self.extra_threads = try self.allocator.alloc(*std.os.Thread, extra_thread_count);
+        errdefer self.allocator.free(self.extra_threads);
+
+        try self.initOsData(extra_thread_count);
+        errdefer self.deinitOsData();
+    }
+
+    /// must call stop before deinit
+    pub fn deinit(self: *Loop) void {
+        self.deinitOsData();
+        self.allocator.free(self.extra_threads);
+    }
+
+    const InitOsDataError = std.os.LinuxEpollCreateError || mem.Allocator.Error || std.os.LinuxEventFdError ||
+        std.os.SpawnThreadError || std.os.LinuxEpollCtlError || std.os.BsdKEventError ||
+        std.os.WindowsCreateIoCompletionPortError;
+
+    const wakeup_bytes = []u8{0x1} ** 8;
+
+    fn initOsData(self: *Loop, extra_thread_count: usize) InitOsDataError!void {
+        switch (builtin.os) {
+            builtin.Os.linux => {
+                errdefer {
+                    while (self.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
+                }
+                for (self.eventfd_resume_nodes) |*eventfd_node| {
+                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
+                        .data = ResumeNode.EventFd{
+                            .base = ResumeNode{
+                                .id = ResumeNode.Id.EventFd,
+                                .handle = undefined,
+                            },
+                            .eventfd = try std.os.linuxEventFd(1, posix.EFD_CLOEXEC | posix.EFD_NONBLOCK),
+                            .epoll_op = posix.EPOLL_CTL_ADD,
+                        },
+                        .next = undefined,
+                    };
+                    self.available_eventfd_resume_nodes.push(eventfd_node);
+                }
+
+                self.os_data.epollfd = try std.os.linuxEpollCreate(posix.EPOLL_CLOEXEC);
+                errdefer std.os.close(self.os_data.epollfd);
+
+                self.os_data.final_eventfd = try std.os.linuxEventFd(0, posix.EFD_CLOEXEC | posix.EFD_NONBLOCK);
+                errdefer std.os.close(self.os_data.final_eventfd);
+
+                self.os_data.final_eventfd_event = posix.epoll_event{
+                    .events = posix.EPOLLIN,
+                    .data = posix.epoll_data{ .ptr = @ptrToInt(&self.final_resume_node) },
+                };
+                try std.os.linuxEpollCtl(
+                    self.os_data.epollfd,
+                    posix.EPOLL_CTL_ADD,
+                    self.os_data.final_eventfd,
+                    &self.os_data.final_eventfd_event,
+                );
+
+                var extra_thread_index: usize = 0;
+                errdefer {
+                    // writing 8 bytes to an eventfd cannot fail
+                    std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
+                    while (extra_thread_index != 0) {
+                        extra_thread_index -= 1;
+                        self.extra_threads[extra_thread_index].wait();
+                    }
+                }
+                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
+                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
+                }
+            },
+            builtin.Os.macosx => {
+                self.os_data.kqfd = try std.os.bsdKQueue();
+                errdefer std.os.close(self.os_data.kqfd);
+
+                self.os_data.kevents = try self.allocator.alloc(posix.Kevent, extra_thread_count);
+                errdefer self.allocator.free(self.os_data.kevents);
+
+                const eventlist = ([*]posix.Kevent)(undefined)[0..0];
+
+                for (self.eventfd_resume_nodes) |*eventfd_node, i| {
+                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
+                        .data = ResumeNode.EventFd{
+                            .base = ResumeNode{
+                                .id = ResumeNode.Id.EventFd,
+                                .handle = undefined,
+                            },
+                            // this one is for sending events
+                            .kevent = posix.Kevent{
+                                .ident = i,
+                                .filter = posix.EVFILT_USER,
+                                .flags = posix.EV_CLEAR | posix.EV_ADD | posix.EV_DISABLE,
+                                .fflags = 0,
+                                .data = 0,
+                                .udata = @ptrToInt(&eventfd_node.data.base),
+                            },
+                        },
+                        .next = undefined,
+                    };
+                    self.available_eventfd_resume_nodes.push(eventfd_node);
+                    const kevent_array = (*[1]posix.Kevent)(&eventfd_node.data.kevent);
+                    _ = try std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null);
+                    eventfd_node.data.kevent.flags = posix.EV_CLEAR | posix.EV_ENABLE;
+                    eventfd_node.data.kevent.fflags = posix.NOTE_TRIGGER;
+                    // this one is for waiting for events
+                    self.os_data.kevents[i] = posix.Kevent{
+                        .ident = i,
+                        .filter = posix.EVFILT_USER,
+                        .flags = 0,
+                        .fflags = 0,
+                        .data = 0,
+                        .udata = @ptrToInt(&eventfd_node.data.base),
+                    };
+                }
+
+                // Pre-add so that we cannot get error.SystemResources
+                // later when we try to activate it.
+                self.os_data.final_kevent = posix.Kevent{
+                    .ident = extra_thread_count,
+                    .filter = posix.EVFILT_USER,
+                    .flags = posix.EV_ADD | posix.EV_DISABLE,
+                    .fflags = 0,
+                    .data = 0,
+                    .udata = @ptrToInt(&self.final_resume_node),
+                };
+                const kevent_array = (*[1]posix.Kevent)(&self.os_data.final_kevent);
+                _ = try std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null);
+                self.os_data.final_kevent.flags = posix.EV_ENABLE;
+                self.os_data.final_kevent.fflags = posix.NOTE_TRIGGER;
+
+                var extra_thread_index: usize = 0;
+                errdefer {
+                    _ = std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null) catch unreachable;
+                    while (extra_thread_index != 0) {
+                        extra_thread_index -= 1;
+                        self.extra_threads[extra_thread_index].wait();
+                    }
+                }
+                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
+                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
+                }
+            },
+            builtin.Os.windows => {
+                self.os_data.extra_thread_count = extra_thread_count;
+
+                self.os_data.io_port = try std.os.windowsCreateIoCompletionPort(
+                    windows.INVALID_HANDLE_VALUE,
+                    null,
+                    undefined,
+                    undefined,
+                );
+                errdefer std.os.close(self.os_data.io_port);
+
+                for (self.eventfd_resume_nodes) |*eventfd_node, i| {
+                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
+                        .data = ResumeNode.EventFd{
+                            .base = ResumeNode{
+                                .id = ResumeNode.Id.EventFd,
+                                .handle = undefined,
+                            },
+                            // this one is for sending events
+                            .completion_key = @ptrToInt(&eventfd_node.data.base),
+                        },
+                        .next = undefined,
+                    };
+                    self.available_eventfd_resume_nodes.push(eventfd_node);
+                }
+
+                var extra_thread_index: usize = 0;
+                errdefer {
+                    var i: usize = 0;
+                    while (i < extra_thread_index) : (i += 1) {
+                        while (true) {
+                            const overlapped = @intToPtr(?*windows.OVERLAPPED, 0x1);
+                            std.os.windowsPostQueuedCompletionStatus(self.os_data.io_port, undefined, @ptrToInt(&self.final_resume_node), overlapped) catch continue;
+                            break;
+                        }
+                    }
+                    while (extra_thread_index != 0) {
+                        extra_thread_index -= 1;
+                        self.extra_threads[extra_thread_index].wait();
+                    }
+                }
+                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
+                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
+                }
+            },
+            else => {},
+        }
+    }
+
+    fn deinitOsData(self: *Loop) void {
+        switch (builtin.os) {
+            builtin.Os.linux => {
+                std.os.close(self.os_data.final_eventfd);
+                while (self.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
+                std.os.close(self.os_data.epollfd);
+                self.allocator.free(self.eventfd_resume_nodes);
+            },
+            builtin.Os.macosx => {
+                self.allocator.free(self.os_data.kevents);
+                std.os.close(self.os_data.kqfd);
+            },
+            builtin.Os.windows => {
+                std.os.close(self.os_data.io_port);
+            },
+            else => {},
+        }
+    }
+
+    /// resume_node must live longer than the promise that it holds a reference to.
+    pub fn addFd(self: *Loop, fd: i32, resume_node: *ResumeNode) !void {
+        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
+        errdefer {
+            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+        }
+        try self.modFd(
+            fd,
+            posix.EPOLL_CTL_ADD,
+            std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET,
+            resume_node,
+        );
+    }
+
+    pub fn modFd(self: *Loop, fd: i32, op: u32, events: u32, resume_node: *ResumeNode) !void {
+        var ev = std.os.linux.epoll_event{
+            .events = events,
+            .data = std.os.linux.epoll_data{ .ptr = @ptrToInt(resume_node) },
+        };
+        try std.os.linuxEpollCtl(self.os_data.epollfd, op, fd, &ev);
+    }
+
+    pub fn removeFd(self: *Loop, fd: i32) void {
+        self.removeFdNoCounter(fd);
+        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+    }
+
+    fn removeFdNoCounter(self: *Loop, fd: i32) void {
+        std.os.linuxEpollCtl(self.os_data.epollfd, std.os.linux.EPOLL_CTL_DEL, fd, undefined) catch {};
+    }
+
+    pub async fn waitFd(self: *Loop, fd: i32) !void {
+        defer self.removeFd(fd);
+        suspend |p| {
+            // TODO explicitly put this memory in the coroutine frame #1194
+            var resume_node = ResumeNode{
+                .id = ResumeNode.Id.Basic,
+                .handle = p,
+            };
+            try self.addFd(fd, &resume_node);
+        }
+    }
+
+    /// Bring your own linked list node. This means it can't fail.
+    pub fn onNextTick(self: *Loop, node: *NextTickNode) void {
+        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
+        self.next_tick_queue.put(node);
+    }
+
+    pub fn run(self: *Loop) void {
+        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+        self.workerRun();
+        for (self.extra_threads) |extra_thread| {
+            extra_thread.wait();
+        }
+    }
+
+    fn workerRun(self: *Loop) void {
+        start_over: while (true) {
+            if (@atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst) == 0) {
+                while (self.next_tick_queue.get()) |next_tick_node| {
+                    const handle = next_tick_node.data;
+                    if (self.next_tick_queue.isEmpty()) {
+                        // last node, just resume it
+                        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                        resume handle;
+                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        continue :start_over;
+                    }
+
+                    // non-last node, stick it in the epoll/kqueue set so that
+                    // other threads can get to it
+                    if (self.available_eventfd_resume_nodes.pop()) |resume_stack_node| {
+                        const eventfd_node = &resume_stack_node.data;
+                        eventfd_node.base.handle = handle;
+                        switch (builtin.os) {
+                            builtin.Os.macosx => {
+                                const kevent_array = (*[1]posix.Kevent)(&eventfd_node.kevent);
+                                const eventlist = ([*]posix.Kevent)(undefined)[0..0];
+                                _ = std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null) catch {
+                                    // fine, we didn't need it anyway
+                                    _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                                    self.available_eventfd_resume_nodes.push(resume_stack_node);
+                                    resume handle;
+                                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                                    continue :start_over;
+                                };
+                            },
+                            builtin.Os.linux => {
+                                // the pending count is already accounted for
+                                const epoll_events = posix.EPOLLONESHOT | std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET;
+                                self.modFd(eventfd_node.eventfd, eventfd_node.epoll_op, epoll_events, &eventfd_node.base) catch {
+                                    // fine, we didn't need it anyway
+                                    _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                                    self.available_eventfd_resume_nodes.push(resume_stack_node);
+                                    resume handle;
+                                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                                    continue :start_over;
+                                };
+                            },
+                            builtin.Os.windows => {
+                                // this value is never dereferenced but we need it to be non-null so that
+                                // the consumer code can decide whether to read the completion key.
+                                // it has to do this for normal I/O, so we match that behavior here.
+                                const overlapped = @intToPtr(?*windows.OVERLAPPED, 0x1);
+                                std.os.windowsPostQueuedCompletionStatus(self.os_data.io_port, undefined, eventfd_node.completion_key, overlapped) catch {
+                                    // fine, we didn't need it anyway
+                                    _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                                    self.available_eventfd_resume_nodes.push(resume_stack_node);
+                                    resume handle;
+                                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                                    continue :start_over;
+                                };
+                            },
+                            else => @compileError("unsupported OS"),
+                        }
+                    } else {
+                        // threads are too busy, can't add another eventfd to wake one up
+                        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                        resume handle;
+                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        continue :start_over;
+                    }
+                }
+
+                const pending_event_count = @atomicLoad(usize, &self.pending_event_count, AtomicOrder.SeqCst);
+                if (pending_event_count == 0) {
+                    // cause all the threads to stop
+                    switch (builtin.os) {
+                        builtin.Os.linux => {
+                            // writing 8 bytes to an eventfd cannot fail
+                            std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
+                            return;
+                        },
+                        builtin.Os.macosx => {
+                            const final_kevent = (*[1]posix.Kevent)(&self.os_data.final_kevent);
+                            const eventlist = ([*]posix.Kevent)(undefined)[0..0];
+                            // cannot fail because we already added it and this just enables it
+                            _ = std.os.bsdKEvent(self.os_data.kqfd, final_kevent, eventlist, null) catch unreachable;
+                            return;
+                        },
+                        builtin.Os.windows => {
+                            var i: usize = 0;
+                            while (i < self.os_data.extra_thread_count) : (i += 1) {
+                                while (true) {
+                                    const overlapped = @intToPtr(?*windows.OVERLAPPED, 0x1);
+                                    std.os.windowsPostQueuedCompletionStatus(self.os_data.io_port, undefined, @ptrToInt(&self.final_resume_node), overlapped) catch continue;
+                                    break;
+                                }
+                            }
+                            return;
+                        },
+                        else => @compileError("unsupported OS"),
+                    }
+                }
+
+                _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+            }
+
+            switch (builtin.os) {
+                builtin.Os.linux => {
+                    // only process 1 event so we don't steal from other threads
+                    var events: [1]std.os.linux.epoll_event = undefined;
+                    const count = std.os.linuxEpollWait(self.os_data.epollfd, events[0..], -1);
+                    for (events[0..count]) |ev| {
+                        const resume_node = @intToPtr(*ResumeNode, ev.data.ptr);
+                        const handle = resume_node.handle;
+                        const resume_node_id = resume_node.id;
+                        switch (resume_node_id) {
+                            ResumeNode.Id.Basic => {},
+                            ResumeNode.Id.Stop => return,
+                            ResumeNode.Id.EventFd => {
+                                const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
+                                event_fd_node.epoll_op = posix.EPOLL_CTL_MOD;
+                                const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
+                                self.available_eventfd_resume_nodes.push(stack_node);
+                            },
+                        }
+                        resume handle;
+                        if (resume_node_id == ResumeNode.Id.EventFd) {
+                            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        }
+                    }
+                },
+                builtin.Os.macosx => {
+                    var eventlist: [1]posix.Kevent = undefined;
+                    const count = std.os.bsdKEvent(self.os_data.kqfd, self.os_data.kevents, eventlist[0..], null) catch unreachable;
+                    for (eventlist[0..count]) |ev| {
+                        const resume_node = @intToPtr(*ResumeNode, ev.udata);
+                        const handle = resume_node.handle;
+                        const resume_node_id = resume_node.id;
+                        switch (resume_node_id) {
+                            ResumeNode.Id.Basic => {},
+                            ResumeNode.Id.Stop => return,
+                            ResumeNode.Id.EventFd => {
+                                const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
+                                const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
+                                self.available_eventfd_resume_nodes.push(stack_node);
+                            },
+                        }
+                        resume handle;
+                        if (resume_node_id == ResumeNode.Id.EventFd) {
+                            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        }
+                    }
+                },
+                builtin.Os.windows => {
+                    var completion_key: usize = undefined;
+                    while (true) {
+                        var nbytes: windows.DWORD = undefined;
+                        var overlapped: ?*windows.OVERLAPPED = undefined;
+                        switch (std.os.windowsGetQueuedCompletionStatus(self.os_data.io_port, &nbytes, &completion_key, &overlapped, windows.INFINITE)) {
+                            std.os.WindowsWaitResult.Aborted => return,
+                            std.os.WindowsWaitResult.Normal => {},
+                        }
+                        if (overlapped != null) break;
+                    }
+                    const resume_node = @intToPtr(*ResumeNode, completion_key);
+                    const handle = resume_node.handle;
+                    const resume_node_id = resume_node.id;
+                    switch (resume_node_id) {
+                        ResumeNode.Id.Basic => {},
+                        ResumeNode.Id.Stop => return,
+                        ResumeNode.Id.EventFd => {
+                            const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
+                            const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
+                            self.available_eventfd_resume_nodes.push(stack_node);
+                        },
+                    }
+                    resume handle;
+                    if (resume_node_id == ResumeNode.Id.EventFd) {
+                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                    }
+                },
+                else => @compileError("unsupported OS"),
+            }
+        }
+    }
+
+    const OsData = switch (builtin.os) {
+        builtin.Os.linux => struct {
+            epollfd: i32,
+            final_eventfd: i32,
+            final_eventfd_event: std.os.linux.epoll_event,
+        },
+        builtin.Os.macosx => MacOsData,
+        builtin.Os.windows => struct {
+            io_port: windows.HANDLE,
+            extra_thread_count: usize,
+        },
+        else => struct {},
+    };
+
+    const MacOsData = struct {
+        kqfd: i32,
+        final_kevent: posix.Kevent,
+        kevents: []posix.Kevent,
+    };
+};
+
+test "std.event.Loop - basic" {
+    //var da = std.heap.DirectAllocator.init();
+    //defer da.deinit();
+
+    //const allocator = &da.allocator;
+
+    //var loop: Loop = undefined;
+    //try loop.initMultiThreaded(allocator);
+    //defer loop.deinit();
+
+    //loop.run();
+}
diff --git a/std/event/tcp.zig b/std/event/tcp.zig
new file mode 100644
index 0000000000..5151ecf934
--- /dev/null
+++ b/std/event/tcp.zig
@@ -0,0 +1,183 @@
+const std = @import("../index.zig");
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+const event = std.event;
+const mem = std.mem;
+const posix = std.os.posix;
+const windows = std.os.windows;
+const Loop = std.event.Loop;
+
+pub const Server = struct {
+    handleRequestFn: async<*mem.Allocator> fn (*Server, *const std.net.Address, *const std.os.File) void,
+
+    loop: *Loop,
+    sockfd: ?i32,
+    accept_coro: ?promise,
+    listen_address: std.net.Address,
+
+    waiting_for_emfile_node: PromiseNode,
+    listen_resume_node: event.Loop.ResumeNode,
+
+    const PromiseNode = std.LinkedList(promise).Node;
+
+    pub fn init(loop: *Loop) Server {
+        // TODO can't initialize handler coroutine here because we need well defined copy elision
+        return Server{
+            .loop = loop,
+            .sockfd = null,
+            .accept_coro = null,
+            .handleRequestFn = undefined,
+            .waiting_for_emfile_node = undefined,
+            .listen_address = undefined,
+            .listen_resume_node = event.Loop.ResumeNode{
+                .id = event.Loop.ResumeNode.Id.Basic,
+                .handle = undefined,
+            },
+        };
+    }
+
+    pub fn listen(
+        self: *Server,
+        address: *const std.net.Address,
+        handleRequestFn: async<*mem.Allocator> fn (*Server, *const std.net.Address, *const std.os.File) void,
+    ) !void {
+        self.handleRequestFn = handleRequestFn;
+
+        const sockfd = try std.os.posixSocket(posix.AF_INET, posix.SOCK_STREAM | posix.SOCK_CLOEXEC | posix.SOCK_NONBLOCK, posix.PROTO_tcp);
+        errdefer std.os.close(sockfd);
+        self.sockfd = sockfd;
+
+        try std.os.posixBind(sockfd, &address.os_addr);
+        try std.os.posixListen(sockfd, posix.SOMAXCONN);
+        self.listen_address = std.net.Address.initPosix(try std.os.posixGetSockName(sockfd));
+
+        self.accept_coro = try async<self.loop.allocator> Server.handler(self);
+        errdefer cancel self.accept_coro.?;
+
+        self.listen_resume_node.handle = self.accept_coro.?;
+        try self.loop.addFd(sockfd, &self.listen_resume_node);
+        errdefer self.loop.removeFd(sockfd);
+    }
+
+    /// Stop listening
+    pub fn close(self: *Server) void {
+        self.loop.removeFd(self.sockfd.?);
+        std.os.close(self.sockfd.?);
+    }
+
+    pub fn deinit(self: *Server) void {
+        if (self.accept_coro) |accept_coro| cancel accept_coro;
+        if (self.sockfd) |sockfd| std.os.close(sockfd);
+    }
+
+    pub async fn handler(self: *Server) void {
+        while (true) {
+            var accepted_addr: std.net.Address = undefined;
+            if (std.os.posixAccept(self.sockfd.?, &accepted_addr.os_addr, posix.SOCK_NONBLOCK | posix.SOCK_CLOEXEC)) |accepted_fd| {
+                var socket = std.os.File.openHandle(accepted_fd);
+                _ = async<self.loop.allocator> self.handleRequestFn(self, accepted_addr, socket) catch |err| switch (err) {
+                    error.OutOfMemory => {
+                        socket.close();
+                        continue;
+                    },
+                };
+            } else |err| switch (err) {
+                error.WouldBlock => {
+                    suspend; // we will get resumed by epoll_wait in the event loop
+                    continue;
+                },
+                error.ProcessFdQuotaExceeded => {
+                    errdefer std.os.emfile_promise_queue.remove(&self.waiting_for_emfile_node);
+                    suspend |p| {
+                        self.waiting_for_emfile_node = PromiseNode.init(p);
+                        std.os.emfile_promise_queue.append(&self.waiting_for_emfile_node);
+                    }
+                    continue;
+                },
+                error.ConnectionAborted, error.FileDescriptorClosed => continue,
+
+                error.PageFault => unreachable,
+                error.InvalidSyscall => unreachable,
+                error.FileDescriptorNotASocket => unreachable,
+                error.OperationNotSupported => unreachable,
+
+                error.SystemFdQuotaExceeded, error.SystemResources, error.ProtocolFailure, error.BlockedByFirewall, error.Unexpected => {
+                    @panic("TODO handle this error");
+                },
+            }
+        }
+    }
+};
+
+pub async fn connect(loop: *Loop, _address: *const std.net.Address) !std.os.File {
+    var address = _address.*; // TODO https://github.com/ziglang/zig/issues/733
+
+    const sockfd = try std.os.posixSocket(posix.AF_INET, posix.SOCK_STREAM | posix.SOCK_CLOEXEC | posix.SOCK_NONBLOCK, posix.PROTO_tcp);
+    errdefer std.os.close(sockfd);
+
+    try std.os.posixConnectAsync(sockfd, &address.os_addr);
+    try await try async loop.waitFd(sockfd);
+    try std.os.posixGetSockOptConnectError(sockfd);
+
+    return std.os.File.openHandle(sockfd);
+}
+
+test "listen on a port, send bytes, receive bytes" {
+    if (builtin.os != builtin.Os.linux) {
+        // TODO build abstractions for other operating systems
+        return;
+    }
+    const MyServer = struct {
+        tcp_server: Server,
+
+        const Self = this;
+        async<*mem.Allocator> fn handler(tcp_server: *Server, _addr: *const std.net.Address, _socket: *const std.os.File) void {
+            const self = @fieldParentPtr(Self, "tcp_server", tcp_server);
+            var socket = _socket.*; // TODO https://github.com/ziglang/zig/issues/733
+            defer socket.close();
+            // TODO guarantee elision of this allocation
+            const next_handler = async errorableHandler(self, _addr, socket) catch unreachable;
+            (await next_handler) catch |err| {
+                std.debug.panic("unable to handle connection: {}\n", err);
+            };
+            suspend |p| {
+                cancel p;
+            }
+        }
+        async fn errorableHandler(self: *Self, _addr: *const std.net.Address, _socket: *const std.os.File) !void {
+            const addr = _addr.*; // TODO https://github.com/ziglang/zig/issues/733
+            var socket = _socket.*; // TODO https://github.com/ziglang/zig/issues/733
+
+            var adapter = std.io.FileOutStream.init(&socket);
+            var stream = &adapter.stream;
+            try stream.print("hello from server\n");
+        }
+    };
+
+    const ip4addr = std.net.parseIp4("127.0.0.1") catch unreachable;
+    const addr = std.net.Address.initIp4(ip4addr, 0);
+
+    var loop: Loop = undefined;
+    try loop.initSingleThreaded(std.debug.global_allocator);
+    var server = MyServer{ .tcp_server = Server.init(&loop) };
+    defer server.tcp_server.deinit();
+    try server.tcp_server.listen(addr, MyServer.handler);
+
+    const p = try async<std.debug.global_allocator> doAsyncTest(&loop, server.tcp_server.listen_address, &server.tcp_server);
+    defer cancel p;
+    loop.run();
+}
+
+async fn doAsyncTest(loop: *Loop, address: *const std.net.Address, server: *Server) void {
+    errdefer @panic("test failure");
+
+    var socket_file = try await try async connect(loop, address);
+    defer socket_file.close();
+
+    var buf: [512]u8 = undefined;
+    const amt_read = try socket_file.read(buf[0..]);
+    const msg = buf[0..amt_read];
+    assert(mem.eql(u8, msg, "hello from server\n"));
+    server.close();
+}
+

From 1b82a9defce4daf2fea8e6290c7a8a4689878194 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 22:41:16 -0400
Subject: [PATCH 31/35] enable basic event loop test

---
 std/event/loop.zig | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/std/event/loop.zig b/std/event/loop.zig
index 61c7f80cdc..613d4f48a4 100644
--- a/std/event/loop.zig
+++ b/std/event/loop.zig
@@ -564,14 +564,14 @@ pub const Loop = struct {
 };
 
 test "std.event.Loop - basic" {
-    //var da = std.heap.DirectAllocator.init();
-    //defer da.deinit();
+    var da = std.heap.DirectAllocator.init();
+    defer da.deinit();
 
-    //const allocator = &da.allocator;
+    const allocator = &da.allocator;
 
-    //var loop: Loop = undefined;
-    //try loop.initMultiThreaded(allocator);
-    //defer loop.deinit();
+    var loop: Loop = undefined;
+    try loop.initMultiThreaded(allocator);
+    defer loop.deinit();
 
-    //loop.run();
+    loop.run();
 }

From b5cfbfd84ee26d5174abf3368b75b9e4502898a5 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 23:41:28 -0400
Subject: [PATCH 32/35] fix regression from b6eb4048

---
 std/event/locked.zig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/std/event/locked.zig b/std/event/locked.zig
index 41ab112aff..e7ad544d78 100644
--- a/std/event/locked.zig
+++ b/std/event/locked.zig
@@ -1,5 +1,6 @@
 const std = @import("../index.zig");
 const Lock = std.event.Lock;
+const Loop = std.event.Loop;
 
 /// Thread-safe async/await lock that protects one piece of data.
 /// Does not make any syscalls - coroutines which are waiting for the lock are suspended, and

From 28f9230b40ee7aa179705c39616aaf2a5f303b73 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Tue, 10 Jul 2018 10:12:08 -0400
Subject: [PATCH 33/35] fix crash when calling comptime-known undefined
 function ptr

closes #880
closes #1212
---
 src/ir.cpp              |  2 ++
 test/compile_errors.zig | 13 +++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/src/ir.cpp b/src/ir.cpp
index 2dc6ddad2c..10ce3254fd 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -13271,6 +13271,8 @@ static TypeTableEntry *ir_analyze_instruction_call(IrAnalyze *ira, IrInstruction
             return ir_finish_anal(ira, cast_instruction->value.type);
         } else if (fn_ref->value.type->id == TypeTableEntryIdFn) {
             FnTableEntry *fn_table_entry = ir_resolve_fn(ira, fn_ref);
+            if (fn_table_entry == nullptr)
+                return ira->codegen->builtin_types.entry_invalid;
             return ir_analyze_fn_call(ira, call_instruction, fn_table_entry, fn_table_entry->type_entry,
                 fn_ref, nullptr, is_comptime, call_instruction->fn_inline);
         } else if (fn_ref->value.type->id == TypeTableEntryIdBoundFn) {
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 1b76c01564..a6db8d50b4 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -1,6 +1,19 @@
 const tests = @import("tests.zig");
 
 pub fn addCases(cases: *tests.CompileErrorContext) void {
+    cases.add(
+        "use of comptime-known undefined function value",
+        \\const Cmd = struct {
+        \\    exec: fn () void,
+        \\};
+        \\export fn entry() void {
+        \\    const command = Cmd{ .exec = undefined };
+        \\    command.exec();
+        \\}
+    ,
+        ".tmp_source.zig:6:12: error: use of undefined value",
+    );
+
     cases.add(
         "bad @alignCast at comptime",
         \\comptime {

From 696ef0bc03ccbe61dff5b09a257c2de7b227290a Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Tue, 10 Jul 2018 10:37:58 -0400
Subject: [PATCH 34/35] langref: docs for union safety

---
 doc/langref.html.in | 88 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 84 insertions(+), 4 deletions(-)

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 16e9023f26..c90c847f92 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -6665,6 +6665,8 @@ comptime {
       {#code_end#}
       <p>At runtime:</p>
       {#code_begin|exe_err#}
+const std = @import("std");
+
 const Set1 = error{
     A,
     B,
@@ -6674,10 +6676,11 @@ const Set2 = error{
     C,
 };
 pub fn main() void {
-    _ = foo(Set1.B);
+    foo(Set1.B);
 }
-fn foo(set1: Set1) Set2 {
-    return @errSetCast(Set2, set1);
+fn foo(set1: Set1) void {
+    const x = @errSetCast(Set2, set1);
+    std.debug.warn("value: {}\n", x);
 }
       {#code_end#}
       {#header_close#}
@@ -6705,7 +6708,84 @@ fn foo(bytes: []u8) u32 {
       {#code_end#}
       {#header_close#}
       {#header_open|Wrong Union Field Access#}
-      <p>TODO</p>
+      <p>At compile-time:</p>
+      {#code_begin|test_err|accessing union field 'float' while field 'int' is set#}
+comptime {
+    var f = Foo{ .int = 42 };
+    f.float = 12.34;
+}
+
+const Foo = union {
+    float: f32,
+    int: u32,
+};
+      {#code_end#}
+      <p>At runtime:</p>
+      {#code_begin|exe_err#}
+const std = @import("std");
+
+const Foo = union {
+    float: f32,
+    int: u32,
+};
+
+pub fn main() void {
+    var f = Foo{ .int = 42 };
+    bar(&f);
+}
+
+fn bar(f: *Foo) void {
+    f.float = 12.34;
+    std.debug.warn("value: {}\n", f.float);
+}
+      {#code_end#}
+      <p>
+      This safety is not available for <code>extern</code> or <code>packed</code> unions.
+      </p>
+      <p>
+      To change the active field of a union, assign the entire union, like this:
+      </p>
+      {#code_begin|exe#}
+const std = @import("std");
+
+const Foo = union {
+    float: f32,
+    int: u32,
+};
+
+pub fn main() void {
+    var f = Foo{ .int = 42 };
+    bar(&f);
+}
+
+fn bar(f: *Foo) void {
+    f.* = Foo{ .float = 12.34 };
+    std.debug.warn("value: {}\n", f.float);
+}
+      {#code_end#}
+      <p>
+      To change the active field of a union when a meaningful value for the field is not known,
+      use {#link|undefined#}, like this:
+      </p>
+      {#code_begin|exe#}
+const std = @import("std");
+
+const Foo = union {
+    float: f32,
+    int: u32,
+};
+
+pub fn main() void {
+    var f = Foo{ .int = 42 };
+    f = Foo{ .float = undefined };
+    bar(&f);
+    std.debug.warn("value: {}\n", f.float);
+}
+
+fn bar(f: *Foo) void {
+    f.float = 12.34;
+}
+      {#code_end#}
       {#header_close#}
 
       {#header_open|Out of Bounds Float To Integer Cast#}

From 0ce6934e2631eb3beca817d3bce12ecb13aafa13 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Tue, 10 Jul 2018 11:44:47 -0400
Subject: [PATCH 35/35] allow var args calls to async functions

---
 src/ir.cpp         | 53 +++++++++++++++++++++++++++++-----------------
 std/event/loop.zig | 52 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 20 deletions(-)

diff --git a/src/ir.cpp b/src/ir.cpp
index 10ce3254fd..7f7436010e 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -12721,14 +12721,22 @@ static TypeTableEntry *ir_analyze_fn_call(IrAnalyze *ira, IrInstructionCall *cal
     // for extern functions, the var args argument is not counted.
     // for zig functions, it is.
     size_t var_args_1_or_0;
-    if (fn_type_id->cc == CallingConventionUnspecified) {
-        var_args_1_or_0 = fn_type_id->is_var_args ? 1 : 0;
-    } else {
+    if (fn_type_id->cc == CallingConventionC) {
         var_args_1_or_0 = 0;
+    } else {
+        var_args_1_or_0 = fn_type_id->is_var_args ? 1 : 0;
     }
     size_t src_param_count = fn_type_id->param_count - var_args_1_or_0;
 
     size_t call_param_count = call_instruction->arg_count + first_arg_1_or_0;
+    for (size_t i = 0; i < call_instruction->arg_count; i += 1) {
+        ConstExprValue *arg_tuple_value = &call_instruction->args[i]->other->value;
+        if (arg_tuple_value->type->id == TypeTableEntryIdArgTuple) {
+            call_param_count -= 1;
+            call_param_count += arg_tuple_value->data.x_arg_tuple.end_index -
+                arg_tuple_value->data.x_arg_tuple.start_index;
+        }
+    }
     AstNode *source_node = call_instruction->base.source_node;
 
     AstNode *fn_proto_node = fn_entry ? fn_entry->proto_node : nullptr;;
@@ -12909,11 +12917,6 @@ static TypeTableEntry *ir_analyze_fn_call(IrAnalyze *ira, IrInstructionCall *cal
                 buf_sprintf("calling a generic function requires compile-time known function value"));
             return ira->codegen->builtin_types.entry_invalid;
         }
-        if (call_instruction->is_async && fn_type_id->is_var_args) {
-            ir_add_error(ira, call_instruction->fn_ref,
-                buf_sprintf("compiler bug: TODO: implement var args async functions. https://github.com/ziglang/zig/issues/557"));
-            return ira->codegen->builtin_types.entry_invalid;
-        }
 
         // Count the arguments of the function type id we are creating
         size_t new_fn_arg_count = first_arg_1_or_0;
@@ -12988,18 +12991,18 @@ static TypeTableEntry *ir_analyze_fn_call(IrAnalyze *ira, IrInstructionCall *cal
             if (type_is_invalid(arg->value.type))
                 return ira->codegen->builtin_types.entry_invalid;
 
-            AstNode *param_decl_node = fn_proto_node->data.fn_proto.params.at(next_proto_i);
-            assert(param_decl_node->type == NodeTypeParamDecl);
-            bool is_var_args = param_decl_node->data.param_decl.is_var_args;
-            if (is_var_args && !found_first_var_arg) {
-                first_var_arg = inst_fn_type_id.param_count;
-                found_first_var_arg = true;
-            }
-
             if (arg->value.type->id == TypeTableEntryIdArgTuple) {
                 for (size_t arg_tuple_i = arg->value.data.x_arg_tuple.start_index;
                     arg_tuple_i < arg->value.data.x_arg_tuple.end_index; arg_tuple_i += 1)
                 {
+                    AstNode *param_decl_node = fn_proto_node->data.fn_proto.params.at(next_proto_i);
+                    assert(param_decl_node->type == NodeTypeParamDecl);
+                    bool is_var_args = param_decl_node->data.param_decl.is_var_args;
+                    if (is_var_args && !found_first_var_arg) {
+                        first_var_arg = inst_fn_type_id.param_count;
+                        found_first_var_arg = true;
+                    }
+
                     VariableTableEntry *arg_var = get_fn_var_by_index(parent_fn_entry, arg_tuple_i);
                     if (arg_var == nullptr) {
                         ir_add_error(ira, arg,
@@ -13020,10 +13023,20 @@ static TypeTableEntry *ir_analyze_fn_call(IrAnalyze *ira, IrInstructionCall *cal
                         return ira->codegen->builtin_types.entry_invalid;
                     }
                 }
-            } else if (!ir_analyze_fn_call_generic_arg(ira, fn_proto_node, arg, &impl_fn->child_scope,
-                &next_proto_i, generic_id, &inst_fn_type_id, casted_args, impl_fn))
-            {
-                return ira->codegen->builtin_types.entry_invalid;
+            } else {
+                AstNode *param_decl_node = fn_proto_node->data.fn_proto.params.at(next_proto_i);
+                assert(param_decl_node->type == NodeTypeParamDecl);
+                bool is_var_args = param_decl_node->data.param_decl.is_var_args;
+                if (is_var_args && !found_first_var_arg) {
+                    first_var_arg = inst_fn_type_id.param_count;
+                    found_first_var_arg = true;
+                }
+
+                if (!ir_analyze_fn_call_generic_arg(ira, fn_proto_node, arg, &impl_fn->child_scope,
+                    &next_proto_i, generic_id, &inst_fn_type_id, casted_args, impl_fn))
+                {
+                    return ira->codegen->builtin_types.entry_invalid;
+                }
             }
         }
 
diff --git a/std/event/loop.zig b/std/event/loop.zig
index 613d4f48a4..646f15875f 100644
--- a/std/event/loop.zig
+++ b/std/event/loop.zig
@@ -360,6 +360,28 @@ pub const Loop = struct {
         }
     }
 
+    /// This is equivalent to an async call, except instead of beginning execution of the async function,
+    /// it immediately returns to the caller, and the async function is queued in the event loop. It still
+    /// returns a promise to be awaited.
+    pub fn call(self: *Loop, comptime func: var, args: ...) !(promise->@typeOf(func).ReturnType) {
+        const S = struct {
+            async fn asyncFunc(loop: *Loop, handle: *promise->@typeOf(func).ReturnType, args2: ...) @typeOf(func).ReturnType {
+                suspend |p| {
+                    handle.* = p;
+                    var my_tick_node = Loop.NextTickNode{
+                        .next = undefined,
+                        .data = p,
+                    };
+                    loop.onNextTick(&my_tick_node);
+                }
+                // TODO guaranteed allocation elision for await in same func as async
+                return await (async func(args2) catch unreachable);
+            }
+        };
+        var handle: promise->@typeOf(func).ReturnType = undefined;
+        return async<self.allocator> S.asyncFunc(self, &handle, args);
+    }
+
     fn workerRun(self: *Loop) void {
         start_over: while (true) {
             if (@atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst) == 0) {
@@ -575,3 +597,33 @@ test "std.event.Loop - basic" {
 
     loop.run();
 }
+
+test "std.event.Loop - call" {
+    var da = std.heap.DirectAllocator.init();
+    defer da.deinit();
+
+    const allocator = &da.allocator;
+
+    var loop: Loop = undefined;
+    try loop.initMultiThreaded(allocator);
+    defer loop.deinit();
+
+    var did_it = false;
+    const handle = try loop.call(testEventLoop);
+    const handle2 = try loop.call(testEventLoop2, handle, &did_it);
+    defer cancel handle2;
+
+    loop.run();
+
+    assert(did_it);
+}
+
+async fn testEventLoop() i32 {
+    return 1234;
+}
+
+async fn testEventLoop2(h: promise->i32, did_it: *bool) void {
+    const value = await h;
+    assert(value == 1234);
+    did_it.* = true;
+}