Skip to content

Commit 02572b3

Browse files
committed
fix: use col_offset for CJK word wrap boundary detection
Add col_offset field to WrapBreak struct for tracking display column. CJK characters have display width 2, so char_offset (character count) differs from col_offset (display column). This fix ensures word wrap boundaries are calculated using display columns.
1 parent 1ef2fae commit 02572b3

5 files changed

Lines changed: 44 additions & 6 deletions

File tree

packages/core/src/zig/buffer.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,7 @@ pub const OptimizedBuffer = struct {
865865
} else {
866866
if (byte_offset >= text.len) break;
867867
grapheme_bytes = text[byte_offset .. byte_offset + 1];
868-
g_width = 1;
868+
g_width = @intCast(utf8.getWidthAt(text, byte_offset, tab_width, self.width_method));
869869
byte_offset += 1;
870870
}
871871

@@ -1195,7 +1195,7 @@ pub const OptimizedBuffer = struct {
11951195
const cp_len = std.unicode.utf8ByteSequenceLength(chunk_bytes[byte_offset]) catch 1;
11961196
const next_byte_offset = @min(byte_offset + cp_len, chunk_bytes.len);
11971197
grapheme_bytes = chunk_bytes[byte_offset..next_byte_offset];
1198-
g_width = 1; // Assuming width 1 for non-special characters (ASCII mostly)
1198+
g_width = @intCast(utf8.getWidthAt(chunk_bytes, byte_offset, text_buffer.tab_width, text_buffer.width_method));
11991199
byte_offset = next_byte_offset;
12001200
}
12011201

packages/core/src/zig/edit-buffer.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,7 @@ pub const EditBuffer = struct {
677677
const local_cursor_col = if (cursor.col > cols_before) cursor.col - cols_before else 0;
678678

679679
for (wrap_offsets) |wrap_break| {
680-
const break_col = @as(u32, wrap_break.char_offset);
680+
const break_col = @as(u32, wrap_break.col_offset);
681681
// If we've passed the cursor chunk, any break is valid
682682
// If we're in the cursor chunk, break must be after cursor position
683683
if (passed_cursor or break_col > local_cursor_col) {
@@ -728,7 +728,7 @@ pub const EditBuffer = struct {
728728
};
729729

730730
for (wrap_offsets) |wrap_break| {
731-
const break_col = cols_before + @as(u32, wrap_break.char_offset) + 1;
731+
const break_col = cols_before + @as(u32, wrap_break.col_offset) + 1;
732732
if (break_col < cursor.col) {
733733
last_boundary = break_col;
734734
}

packages/core/src/zig/tests/text-buffer-drawing_test.zig

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2766,6 +2766,32 @@ test "drawTextBuffer - word wrap CJK text preserves UTF-8 boundaries" {
27662766
}
27672767
}
27682768

2769+
test "drawTextBuffer - word wrap CJK text with space boundary" {
2770+
const pool = gp.initGlobalPool(std.testing.allocator);
2771+
defer gp.deinitGlobalPool();
2772+
2773+
var tb = try TextBuffer.init(std.testing.allocator, pool, .wcwidth);
2774+
defer tb.deinit();
2775+
2776+
var view = try TextBufferView.init(std.testing.allocator, tb);
2777+
defer view.deinit();
2778+
2779+
// Text: "한글 English 中文 日本語"
2780+
// Layout: "한글" (4 cols) + " " (1 col) + "English" (7 cols) + " " (1 col) + "中文" (4 cols) + " " (1 col) + "日本語" (6 cols) = 24 cols
2781+
try tb.setText("한글 English 中文 日本語");
2782+
2783+
view.setWrapMode(.word);
2784+
view.setWrapWidth(14);
2785+
view.updateVirtualLines();
2786+
2787+
const vlines = view.getVirtualLines();
2788+
2789+
try std.testing.expectEqual(@as(usize, 2), vlines.len);
2790+
if (vlines.len >= 2) {
2791+
try std.testing.expectEqual(@as(u32, 11), vlines[1].width);
2792+
}
2793+
}
2794+
27692795
test "drawTextBuffer - wcwidth mode does not render ZWJ or VS16 as characters" {
27702796
const pool = gp.initGlobalPool(std.testing.allocator);
27712797
defer gp.deinitGlobalPool();

packages/core/src/zig/text-buffer-view.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ pub const UnifiedTextBufferView = struct {
258258
var first_boundary: ?u32 = null;
259259

260260
for (wrap_offsets) |wrap_break| {
261-
const offset = @as(u32, wrap_break.char_offset);
261+
const offset = @as(u32, wrap_break.col_offset);
262262
if (offset < char_offset_in_chunk) continue;
263263

264264
const local_offset = offset - char_offset_in_chunk;
@@ -937,7 +937,7 @@ pub const UnifiedTextBufferView = struct {
937937
var saved_wrap_idx = wrap_idx;
938938
while (wrap_idx < wrap_offsets.len) : (wrap_idx += 1) {
939939
const wrap_break = wrap_offsets[wrap_idx];
940-
const offset = @as(u32, wrap_break.char_offset);
940+
const offset = @as(u32, wrap_break.col_offset);
941941
if (offset < char_offset) continue;
942942
const width_to_boundary = offset - char_offset + 1;
943943
if (width_to_boundary > remaining_on_line or width_to_boundary > remaining_in_chunk) break;

packages/core/src/zig/utf8.zig

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ pub const TabStopResult = struct {
102102
pub const WrapBreak = struct {
103103
byte_offset: u32,
104104
char_offset: u32,
105+
col_offset: u32,
105106
};
106107

107108
pub const WrapBreakResult = struct {
@@ -190,6 +191,7 @@ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method:
190191

191192
var pos: usize = 0;
192193
var char_offset: u32 = 0;
194+
var col_offset: u32 = 0;
193195
var prev_cp: ?u21 = null; // Track previous codepoint for grapheme detection
194196
var break_state: uucode.grapheme.BreakState = .default;
195197

@@ -242,12 +244,14 @@ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method:
242244
try result.breaks.append(result.allocator, .{
243245
.byte_offset = @intCast(pos + bit_pos),
244246
.char_offset = char_offset + @as(u32, @intCast(bit_pos)),
247+
.col_offset = col_offset + @as(u32, @intCast(bit_pos)),
245248
});
246249
bitmask &= bitmask - 1;
247250
}
248251

249252
pos += vector_len;
250253
char_offset += vector_len;
254+
col_offset += vector_len;
251255
prev_cp = text[pos - 1]; // Last ASCII char
252256
continue;
253257
}
@@ -270,11 +274,13 @@ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method:
270274
try result.breaks.append(result.allocator, .{
271275
.byte_offset = @intCast(pos + i),
272276
.char_offset = char_offset,
277+
.col_offset = col_offset,
273278
});
274279
}
275280
i += 1;
276281
if (is_break) {
277282
char_offset += 1;
283+
col_offset += 1;
278284
}
279285
prev_cp = curr_cp;
280286
} else {
@@ -292,11 +298,13 @@ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method:
292298
try result.breaks.append(result.allocator, .{
293299
.byte_offset = @intCast(pos + i),
294300
.char_offset = char_offset,
301+
.col_offset = col_offset,
295302
});
296303
}
297304
i += dec.len;
298305
if (is_break) {
299306
char_offset += 1;
307+
col_offset += eastAsianWidth(dec.cp);
300308
}
301309
prev_cp = dec.cp;
302310
}
@@ -319,11 +327,13 @@ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method:
319327
try result.breaks.append(result.allocator, .{
320328
.byte_offset = @intCast(i),
321329
.char_offset = char_offset,
330+
.col_offset = col_offset,
322331
});
323332
}
324333
i += 1;
325334
if (is_break) {
326335
char_offset += 1;
336+
col_offset += 1;
327337
}
328338
prev_cp = curr_cp;
329339
} else {
@@ -339,11 +349,13 @@ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method:
339349
try result.breaks.append(result.allocator, .{
340350
.byte_offset = @intCast(i),
341351
.char_offset = char_offset,
352+
.col_offset = col_offset,
342353
});
343354
}
344355
i += dec.len;
345356
if (is_break) {
346357
char_offset += 1;
358+
col_offset += eastAsianWidth(dec.cp);
347359
}
348360
prev_cp = dec.cp;
349361
}

0 commit comments

Comments
 (0)