diff --git a/lib/std/zig.zig b/lib/std/zig.zig
index 057b79f11c..b070fbdcd5 100644
--- a/lib/std/zig.zig
+++ b/lib/std/zig.zig
@@ -43,12 +43,19 @@ pub fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usi
     return .{ .line = line, .column = column };
 }
 
-pub fn lineDelta(source: []const u8, start: usize, end: usize) usize {
-    var line: usize = 0;
-    for (source[start..end]) |byte| switch (byte) {
-        '\n' => line += 1,
-        else => continue,
-    };
+pub fn lineDelta(source: []const u8, start: usize, end: usize) isize {
+    var line: isize = 0;
+    if (end >= start) {
+        for (source[start..end]) |byte| switch (byte) {
+            '\n' => line += 1,
+            else => continue,
+        };
+    } else {
+        for (source[end..start]) |byte| switch (byte) {
+            '\n' => line -= 1,
+            else => continue,
+        };
+    }
     return line;
 }
 
diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig
index e84cfe5c14..d6cc35b2b6 100644
--- a/src-self-hosted/Module.zig
+++ b/src-self-hosted/Module.zig
@@ -1484,6 +1484,9 @@ fn getAstTree(self: *Module, root_scope: *Scope.File) !*ast.Tree {
 }
 
 fn analyzeRootSrcFile(self: *Module, root_scope: *Scope.File) !void {
+    const tracy = trace(@src());
+    defer tracy.end();
+
     // We may be analyzing it for the first time, or this may be
     // an incremental update. This code handles both cases.
     const tree = try self.getAstTree(root_scope);
diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index d45226b4cb..06a76b8f3c 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -225,6 +225,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         prev_di_src: usize,
         /// Relative to the beginning of `code`.
         prev_di_pc: usize,
+        /// The is_stmt register value, used to avoid redundant LNS_negate_stmt ops.
+        prev_di_is_stmt: bool,
         /// Used to find newlines and count line deltas.
         source: []const u8,
         /// Byte offset within the source file of the ending curly.
@@ -420,6 +422,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .stack_align = undefined,
                 .prev_di_pc = 0,
                 .prev_di_src = lbrace_src,
+                .prev_di_is_stmt = true,
                 .rbrace_src = rbrace_src,
                 .source = tree.source,
             };
@@ -523,7 +526,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 },
             }
             // Drop them off at the rbrace.
-            try self.dbgAdvancePCAndLine(self.rbrace_src);
+            try self.dbgAdvancePCAndLine(self.rbrace_src, true);
         }
 
         fn genBody(self: *Self, body: ir.Body) InnerError!void {
@@ -542,15 +545,15 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
 
         fn dbgSetPrologueEnd(self: *Self) InnerError!void {
             try self.dbg_line.append(DW.LNS_set_prologue_end);
-            try self.dbgAdvancePCAndLine(self.prev_di_src);
+            try self.dbgAdvancePCAndLine(self.prev_di_src, true);
         }
 
         fn dbgSetEpilogueBegin(self: *Self) InnerError!void {
             try self.dbg_line.append(DW.LNS_set_epilogue_begin);
-            try self.dbgAdvancePCAndLine(self.prev_di_src);
+            try self.dbgAdvancePCAndLine(self.prev_di_src, true);
         }
 
-        fn dbgAdvancePCAndLine(self: *Self, src: usize) InnerError!void {
+        fn dbgAdvancePCAndLine(self: *Self, src: usize, is_stmt: bool) InnerError!void {
             // TODO Look into improving the performance here by adding a token-index-to-line
             // lookup table, and changing ir.Inst from storing byte offset to token. Currently
             // this involves scanning over the source code for newlines
@@ -562,12 +565,16 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             // TODO Look into using the DWARF special opcodes to compress this data. It lets you emit
             // single-byte opcodes that add different numbers to both the PC and the line number
             // at the same time.
-            try self.dbg_line.ensureCapacity(self.dbg_line.items.len + 11);
+            try self.dbg_line.ensureCapacity(self.dbg_line.items.len + 12);
+            if (self.prev_di_is_stmt != is_stmt) {
+                self.dbg_line.appendAssumeCapacity(DW.LNS_negate_stmt);
+                self.prev_di_is_stmt = is_stmt;
+            }
             self.dbg_line.appendAssumeCapacity(DW.LNS_advance_pc);
             leb128.writeULEB128(self.dbg_line.writer(), delta_pc) catch unreachable;
             if (delta_line != 0) {
                 self.dbg_line.appendAssumeCapacity(DW.LNS_advance_line);
-                leb128.writeULEB128(self.dbg_line.writer(), delta_line) catch unreachable;
+                leb128.writeILEB128(self.dbg_line.writer(), delta_line) catch unreachable;
             }
             self.dbg_line.appendAssumeCapacity(DW.LNS_copy);
         }
@@ -1172,7 +1179,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         }
 
         fn genDbgStmt(self: *Self, inst: *ir.Inst.NoOp) !MCValue {
-            try self.dbgAdvancePCAndLine(inst.base.src);
+            try self.dbgAdvancePCAndLine(inst.base.src, true);
             return MCValue.none;
         }
 
diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig
index 353baf72d1..bd572019c0 100644
--- a/src-self-hosted/link.zig
+++ b/src-self-hosted/link.zig
@@ -1263,7 +1263,7 @@ pub const File = struct {
                     @panic("TODO: handle .debug_line header exceeding its padding");
                 }
                 const jmp_amt = dbg_line_prg_off - di_buf.items.len;
-                try self.pwriteWithNops(di_buf.items, jmp_amt, debug_line_sect.sh_offset);
+                try self.pwriteWithNops(0, di_buf.items, jmp_amt, debug_line_sect.sh_offset);
                 self.debug_line_header_dirty = false;
             }
 
@@ -1958,12 +1958,13 @@ pub const File = struct {
                     self.shdr_table_dirty = true; // TODO look into making only the one section dirty
                     self.debug_line_header_dirty = true;
                 }
-                const padding_size: u32 = if (src_fn.next) |next| next.off - (src_fn.off + src_fn.len) else 0;
+                const prev_padding_size: u32 = if (src_fn.prev) |prev| src_fn.off - (prev.off + prev.len) else 0;
+                const next_padding_size: u32 = if (src_fn.next) |next| next.off - (src_fn.off + src_fn.len) else 0;
 
                 // We only have support for one compilation unit so far, so the offsets are directly
                 // from the .debug_line section.
                 const file_pos = debug_line_sect.sh_offset + src_fn.off;
-                try self.pwriteWithNops(dbg_line_buffer.items, padding_size, file_pos);
+                try self.pwriteWithNops(prev_padding_size, dbg_line_buffer.items, next_padding_size, file_pos);
             }
 
             // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated.
@@ -2282,44 +2283,82 @@ pub const File = struct {
 
         }
 
-        /// Writes to the file a buffer, followed by the specified number of bytes of NOPs.
-        /// Asserts `padding_size >= 2` and less than 126,976 bytes (if this limit is ever
-        /// reached, this function can be improved to make more than one pwritev call).
-        fn pwriteWithNops(self: *Elf, buf: []const u8, padding_size: usize, offset: usize) !void {
+        /// Writes to the file a buffer, prefixed and suffixed by the specified number of
+        /// bytes of NOPs. Asserts each padding size is at least two bytes and total padding bytes
+        /// are less than 126,976 bytes (if this limit is ever reached, this function can be
+        /// improved to make more than one pwritev call, or the limit can be raised by a fixed
+        /// amount by increasing the length of `vecs`).
+        fn pwriteWithNops(
+            self: *Elf,
+            prev_padding_size: usize,
+            buf: []const u8,
+            next_padding_size: usize,
+            offset: usize,
+        ) !void {
             const page_of_nops = [1]u8{DW.LNS_negate_stmt} ** 4096;
             const three_byte_nop = [3]u8{DW.LNS_advance_pc, 0b1000_0000, 0};
             var vecs: [32]std.os.iovec_const = undefined;
             var vec_index: usize = 0;
+            {
+                var padding_left = prev_padding_size;
+                if (padding_left % 2 != 0) {
+                    vecs[vec_index] = .{
+                        .iov_base = &three_byte_nop,
+                        .iov_len = three_byte_nop.len,
+                    };
+                    vec_index += 1;
+                    padding_left -= three_byte_nop.len;
+                }
+                while (padding_left > page_of_nops.len) {
+                    vecs[vec_index] = .{
+                        .iov_base = &page_of_nops,
+                        .iov_len = page_of_nops.len,
+                    };
+                    vec_index += 1;
+                    padding_left -= page_of_nops.len;
+                }
+                if (padding_left > 0) {
+                    vecs[vec_index] = .{
+                        .iov_base = &page_of_nops,
+                        .iov_len = padding_left,
+                    };
+                    vec_index += 1;
+                }
+            }
+
             vecs[vec_index] = .{
                 .iov_base = buf.ptr,
                 .iov_len = buf.len,
             };
             vec_index += 1;
-            var padding_left = padding_size;
-            if (padding_left % 2 != 0) {
-                vecs[vec_index] = .{
-                    .iov_base = &three_byte_nop,
-                    .iov_len = three_byte_nop.len,
-                };
-                vec_index += 1;
-                padding_left -= three_byte_nop.len;
+
+            {
+                var padding_left = next_padding_size;
+                if (padding_left % 2 != 0) {
+                    vecs[vec_index] = .{
+                        .iov_base = &three_byte_nop,
+                        .iov_len = three_byte_nop.len,
+                    };
+                    vec_index += 1;
+                    padding_left -= three_byte_nop.len;
+                }
+                while (padding_left > page_of_nops.len) {
+                    vecs[vec_index] = .{
+                        .iov_base = &page_of_nops,
+                        .iov_len = page_of_nops.len,
+                    };
+                    vec_index += 1;
+                    padding_left -= page_of_nops.len;
+                }
+                if (padding_left > 0) {
+                    vecs[vec_index] = .{
+                        .iov_base = &page_of_nops,
+                        .iov_len = padding_left,
+                    };
+                    vec_index += 1;
+                }
             }
-            while (padding_left > page_of_nops.len) {
-                vecs[vec_index] = .{
-                    .iov_base = &page_of_nops,
-                    .iov_len = page_of_nops.len,
-                };
-                vec_index += 1;
-                padding_left -= page_of_nops.len;
-            }
-            if (padding_left > 0) {
-                vecs[vec_index] = .{
-                    .iov_base = &page_of_nops,
-                    .iov_len = padding_left,
-                };
-                vec_index += 1;
-            }
-            try self.file.?.pwritevAll(vecs[0..vec_index], offset);
+            try self.file.?.pwritevAll(vecs[0..vec_index], offset - prev_padding_size);
         }
 
     };