@@ -455,17 +455,32 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
455455 nil ->
456456 { input , [ ] }
457457
458- { start_pos , end_pos , comment_text } ->
458+ { start_pos , end_pos , _comment_text } ->
459459 if not comment_inside_string? ( input , start_pos ) do
460- before = String . slice ( input , 0 , start_pos )
461- after_comment = String . slice ( input , end_pos + 2 , String . length ( input ) )
460+ before =
461+ if start_pos > 0 do
462+ binary_part ( input , 0 , start_pos )
463+ else
464+ ""
465+ end
466+
467+ comment_length = end_pos - start_pos + 2
468+ after_start = end_pos + 2
469+
470+ after_comment =
471+ if after_start >= byte_size ( input ) do
472+ ""
473+ else
474+ binary_part ( input , after_start , byte_size ( input ) - after_start )
475+ end
476+
462477 result = before <> after_comment
463478
464479 repair = % {
465480 layer: :content_cleaning ,
466481 action: "removed block comment" ,
467482 position: start_pos ,
468- original: comment_text ,
483+ original: binary_part ( input , start_pos , comment_length ) ,
469484 replacement: ""
470485 }
471486
@@ -531,18 +546,20 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
531546 defp find_matching_block_comment_end ( _input , _pos , 0 ) , do: nil
532547
533548 defp find_substring_position ( string , substring , start_offset ) do
534- # Search from the start_offset position
535- search_string = String . slice ( string , start_offset , String . length ( string ) )
536-
537- case String . split ( search_string , substring , parts: 2 ) do
538- [ before , _after ] ->
539- start_offset + byte_size ( before )
549+ total_size = byte_size ( string )
540550
541- [ _single_part ] ->
551+ cond do
552+ start_offset >= total_size ->
542553 nil
543554
544- _ ->
545- nil
555+ true ->
556+ slice_size = total_size - start_offset
557+ slice = binary_part ( string , start_offset , slice_size )
558+
559+ case :binary . match ( slice , substring ) do
560+ { match_start , _length } -> start_offset + match_start
561+ :nomatch -> nil
562+ end
546563 end
547564 end
548565
@@ -643,11 +660,17 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
643660 end
644661
645662 defp extract_balanced_content ( input , start_pos , open_char , close_char ) do
646- substring = String . slice ( input , start_pos , String . length ( input ) )
663+ total_size = byte_size ( input )
647664
648- case find_balanced_end ( substring , open_char , close_char ) do
649- nil -> nil
650- end_pos -> String . slice ( substring , 0 , end_pos + 1 )
665+ if start_pos >= total_size do
666+ nil
667+ else
668+ substring = binary_part ( input , start_pos , total_size - start_pos )
669+
670+ case find_balanced_end ( substring , open_char , close_char ) do
671+ nil -> nil
672+ end_pos -> binary_part ( substring , 0 , end_pos + byte_size ( close_char ) )
673+ end
651674 end
652675 end
653676
@@ -667,22 +690,25 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
667690
668691 defp find_balanced_end ( << char :: utf8 , rest :: binary >> , open , close , pos , balance , false )
669692 when << char :: utf8 >> == open do
670- find_balanced_end ( rest , open , close , pos + 1 , balance + 1 , false )
693+ char_size = byte_size ( << char :: utf8 >> )
694+ find_balanced_end ( rest , open , close , pos + char_size , balance + 1 , false )
671695 end
672696
673697 defp find_balanced_end ( << char :: utf8 , rest :: binary >> , open , close , pos , balance , false )
674698 when << char :: utf8 >> == close do
699+ char_size = byte_size ( << char :: utf8 >> )
675700 new_balance = balance - 1
676701
677702 if new_balance == 0 do
678703 pos
679704 else
680- find_balanced_end ( rest , open , close , pos + 1 , new_balance , false )
705+ find_balanced_end ( rest , open , close , pos + char_size , new_balance , false )
681706 end
682707 end
683708
684- defp find_balanced_end ( << _char :: utf8 , rest :: binary >> , open , close , pos , balance , in_string ) do
685- find_balanced_end ( rest , open , close , pos + 1 , balance , in_string )
709+ defp find_balanced_end ( << char :: utf8 , rest :: binary >> , open , close , pos , balance , in_string ) do
710+ char_size = byte_size ( << char :: utf8 >> )
711+ find_balanced_end ( rest , open , close , pos + char_size , balance , in_string )
686712 end
687713
688714 # Check if a string is valid JSON (not just starts with valid char)
@@ -715,12 +741,13 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
715741 # Find where the JSON structure starts
716742 json_start =
717743 case String . split ( input , open_char , parts: 2 ) do
718- [ prefix , _ ] -> String . length ( prefix )
744+ [ prefix , _ ] -> byte_size ( prefix )
719745 _ -> 0
720746 end
721747
722- # Extract from the JSON start to find the balanced end
723- substring_from_json = String . slice ( input , json_start , String . length ( input ) )
748+ total_size = byte_size ( input )
749+ substring_size = total_size - json_start
750+ substring_from_json = binary_part ( input , json_start , substring_size )
724751
725752 case find_balanced_end ( substring_from_json , open_char , close_char ) do
726753 nil ->
@@ -732,7 +759,15 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
732759 json_end = json_start + end_pos + 1
733760
734761 # Check if there's non-whitespace content after JSON ends
735- after_json = String . slice ( input , json_end , String . length ( input ) )
762+ after_size = max ( total_size - json_end , 0 )
763+
764+ after_json =
765+ if after_size > 0 do
766+ binary_part ( input , json_end , after_size )
767+ else
768+ ""
769+ end
770+
736771 after_json_trimmed = String . trim ( after_json )
737772
738773 cond do
@@ -747,7 +782,7 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
747782
748783 true ->
749784 # Extract only the JSON portion, remove wrapper text
750- json_content = String . slice ( input , 0 , json_end )
785+ json_content = binary_part ( input , 0 , json_end )
751786
752787 repair = % {
753788 layer: :content_cleaning ,
0 commit comments