fix: use col_offset for CJK word wrap boundary detection

kskang · kskang · commit 02572b345ddd · 2026-01-12T08:52:23.000+09:00
Add col_offset field to WrapBreak struct for tracking display column.
CJK characters have display width 2, so char_offset (character count)
differs from col_offset (display column). This fix ensures word wrap
boundaries are calculated using display columns.
diff --git a/packages/core/src/zig/buffer.zig b/packages/core/src/zig/buffer.zig
@@ -865,7 +865,7 @@ pub const OptimizedBuffer = struct {
             } else {
                 if (byte_offset >= text.len) break;
                 grapheme_bytes = text[byte_offset .. byte_offset + 1];
-                g_width = 1;
+                g_width = @intCast(utf8.getWidthAt(text, byte_offset, tab_width, self.width_method));
                 byte_offset += 1;
             }
 
@@ -1195,7 +1195,7 @@ pub const OptimizedBuffer = struct {
                         const cp_len = std.unicode.utf8ByteSequenceLength(chunk_bytes[byte_offset]) catch 1;
                         const next_byte_offset = @min(byte_offset + cp_len, chunk_bytes.len);
                         grapheme_bytes = chunk_bytes[byte_offset..next_byte_offset];
-                        g_width = 1; // Assuming width 1 for non-special characters (ASCII mostly)
+                        g_width = @intCast(utf8.getWidthAt(chunk_bytes, byte_offset, text_buffer.tab_width, text_buffer.width_method));
                         byte_offset = next_byte_offset;
                     }
 
diff --git a/packages/core/src/zig/edit-buffer.zig b/packages/core/src/zig/edit-buffer.zig
@@ -677,7 +677,7 @@ pub const EditBuffer = struct {
                     const local_cursor_col = if (cursor.col > cols_before) cursor.col - cols_before else 0;
 
                     for (wrap_offsets) |wrap_break| {
-                        const break_col = @as(u32, wrap_break.char_offset);
+                        const break_col = @as(u32, wrap_break.col_offset);
                         // If we've passed the cursor chunk, any break is valid
                         // If we're in the cursor chunk, break must be after cursor position
                         if (passed_cursor or break_col > local_cursor_col) {
@@ -728,7 +728,7 @@ pub const EditBuffer = struct {
                 };
 
                 for (wrap_offsets) |wrap_break| {
-                    const break_col = cols_before + @as(u32, wrap_break.char_offset) + 1;
+                    const break_col = cols_before + @as(u32, wrap_break.col_offset) + 1;
                     if (break_col < cursor.col) {
                         last_boundary = break_col;
                     }
diff --git a/packages/core/src/zig/tests/text-buffer-drawing_test.zig b/packages/core/src/zig/tests/text-buffer-drawing_test.zig
@@ -2766,6 +2766,32 @@ test "drawTextBuffer - word wrap CJK text preserves UTF-8 boundaries" {
     }
 }
 
+test "drawTextBuffer - word wrap CJK text with space boundary" {
+    const pool = gp.initGlobalPool(std.testing.allocator);
+    defer gp.deinitGlobalPool();
+
+    var tb = try TextBuffer.init(std.testing.allocator, pool, .wcwidth);
+    defer tb.deinit();
+
+    var view = try TextBufferView.init(std.testing.allocator, tb);
+    defer view.deinit();
+
+    // Text: "한글 English 中文 日本語"
+    // Layout: "한글" (4 cols) + " " (1 col) + "English" (7 cols) + " " (1 col) + "中文" (4 cols) + " " (1 col) + "日本語" (6 cols) = 24 cols
+    try tb.setText("한글 English 中文 日本語");
+
+    view.setWrapMode(.word);
+    view.setWrapWidth(14);
+    view.updateVirtualLines();
+
+    const vlines = view.getVirtualLines();
+
+    try std.testing.expectEqual(@as(usize, 2), vlines.len);
+    if (vlines.len >= 2) {
+        try std.testing.expectEqual(@as(u32, 11), vlines[1].width);
+    }
+}
+
 test "drawTextBuffer - wcwidth mode does not render ZWJ or VS16 as characters" {
     const pool = gp.initGlobalPool(std.testing.allocator);
     defer gp.deinitGlobalPool();
diff --git a/packages/core/src/zig/text-buffer-view.zig b/packages/core/src/zig/text-buffer-view.zig
@@ -258,7 +258,7 @@ pub const UnifiedTextBufferView = struct {
         var first_boundary: ?u32 = null;
 
         for (wrap_offsets) |wrap_break| {
-            const offset = @as(u32, wrap_break.char_offset);
+            const offset = @as(u32, wrap_break.col_offset);
             if (offset < char_offset_in_chunk) continue;
 
             const local_offset = offset - char_offset_in_chunk;
@@ -937,7 +937,7 @@ pub const UnifiedTextBufferView = struct {
                             var saved_wrap_idx = wrap_idx;
                             while (wrap_idx < wrap_offsets.len) : (wrap_idx += 1) {
                                 const wrap_break = wrap_offsets[wrap_idx];
-                                const offset = @as(u32, wrap_break.char_offset);
+                                const offset = @as(u32, wrap_break.col_offset);
                                 if (offset < char_offset) continue;
                                 const width_to_boundary = offset - char_offset + 1;
                                 if (width_to_boundary > remaining_on_line or width_to_boundary > remaining_in_chunk) break;
diff --git a/packages/core/src/zig/utf8.zig b/packages/core/src/zig/utf8.zig
@@ -102,6 +102,7 @@ pub const TabStopResult = struct {
 pub const WrapBreak = struct {
     byte_offset: u32,
     char_offset: u32,
+    col_offset: u32,
 };
 
 pub const WrapBreakResult = struct {
@@ -190,6 +191,7 @@ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method:
 
     var pos: usize = 0;
     var char_offset: u32 = 0;
+    var col_offset: u32 = 0;
     var prev_cp: ?u21 = null; // Track previous codepoint for grapheme detection
     var break_state: uucode.grapheme.BreakState = .default;
 
@@ -242,12 +244,14 @@ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method:
                 try result.breaks.append(result.allocator, .{
                     .byte_offset = @intCast(pos + bit_pos),
                     .char_offset = char_offset + @as(u32, @intCast(bit_pos)),
+                    .col_offset = col_offset + @as(u32, @intCast(bit_pos)),
                 });
                 bitmask &= bitmask - 1;
             }
 
             pos += vector_len;
             char_offset += vector_len;
+            col_offset += vector_len;
             prev_cp = text[pos - 1]; // Last ASCII char
             continue;
         }
@@ -270,11 +274,13 @@ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method:
                     try result.breaks.append(result.allocator, .{
                         .byte_offset = @intCast(pos + i),
                         .char_offset = char_offset,
+                        .col_offset = col_offset,
                     });
                 }
                 i += 1;
                 if (is_break) {
                     char_offset += 1;
+                    col_offset += 1;
                 }
                 prev_cp = curr_cp;
             } else {
@@ -292,11 +298,13 @@ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method:
                     try result.breaks.append(result.allocator, .{
                         .byte_offset = @intCast(pos + i),
                         .char_offset = char_offset,
+                        .col_offset = col_offset,
                     });
                 }
                 i += dec.len;
                 if (is_break) {
                     char_offset += 1;
+                    col_offset += eastAsianWidth(dec.cp);
                 }
                 prev_cp = dec.cp;
             }
@@ -319,11 +327,13 @@ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method:
                 try result.breaks.append(result.allocator, .{
                     .byte_offset = @intCast(i),
                     .char_offset = char_offset,
+                    .col_offset = col_offset,
                 });
             }
             i += 1;
             if (is_break) {
                 char_offset += 1;
+                col_offset += 1;
             }
             prev_cp = curr_cp;
         } else {
@@ -339,11 +349,13 @@ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method:
                 try result.breaks.append(result.allocator, .{
                     .byte_offset = @intCast(i),
                     .char_offset = char_offset,
+                    .col_offset = col_offset,
                 });
             }
             i += dec.len;
             if (is_break) {
                 char_offset += 1;
+                col_offset += eastAsianWidth(dec.cp);
             }
             prev_cp = dec.cp;
         }