Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make invalid Unicode data raise when encoding through Oj::Rails::Encoder #912

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 34 additions & 14 deletions ext/oj/dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,18 @@ inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) {
}

inline static size_t rails_friendly_size(const uint8_t *str, size_t len) {
return calculate_string_size(str, len, rails_friendly_chars);
long size = 0;
size_t i = len;
uint8_t hi = 0;

for (; 0 < i; str++, i--) {
size += rails_friendly_chars[*str];
hi |= *str & 0x80;
}
if (0 == hi) {
return size - len * (size_t)'0';
}
return -(size - len * (size_t)'0');
}

const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, int *lenp) {
Expand Down Expand Up @@ -750,8 +761,9 @@ void oj_dump_raw_json(VALUE obj, int depth, Out out) {
void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out out) {
size_t size;
char *cmap;
const char *orig = str;
bool has_hi = false;
const char *orig = str;
bool has_hi = false;
bool do_unicode_validation = false;

switch (out->opts->escape_mode) {
case NLEsc:
Expand All @@ -772,8 +784,9 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
size = xss_friendly_size((uint8_t *)str, cnt);
break;
case JXEsc:
cmap = hixss_friendly_chars;
size = hixss_friendly_size((uint8_t *)str, cnt);
cmap = hixss_friendly_chars;
size = hixss_friendly_size((uint8_t *)str, cnt);
do_unicode_validation = true;
break;
case RailsXEsc: {
long sz;
Expand All @@ -786,12 +799,22 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
} else {
size = (size_t)sz;
}
do_unicode_validation = true;
break;
}
case RailsEsc:
case RailsEsc: {
long sz;
cmap = rails_friendly_chars;
size = rails_friendly_size((uint8_t *)str, cnt);
sz = rails_friendly_size((uint8_t *)str, cnt);
if (sz < 0) {
has_hi = true;
size = (size_t)-sz;
} else {
size = (size_t)sz;
}
ohler55 marked this conversation as resolved.
Show resolved Hide resolved
do_unicode_validation = true;
break;
}
case JSONEsc:
default: cmap = hibit_friendly_chars; size = hibit_friendly_size((uint8_t *)str, cnt);
}
Expand Down Expand Up @@ -822,7 +845,7 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
for (; str < end; str++) {
switch (cmap[(uint8_t)*str]) {
case '1':
if ((JXEsc == out->opts->escape_mode || RailsXEsc == out->opts->escape_mode) && check_start <= str) {
if (do_unicode_validation && check_start <= str) {
if (0 != (0x80 & (uint8_t)*str)) {
if (0xC0 == (0xC0 & (uint8_t)*str)) {
check_start = check_unicode(str, end, orig);
Expand All @@ -846,8 +869,7 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
}
break;
case '3': // Unicode
if (0xe2 == (uint8_t)*str && (JXEsc == out->opts->escape_mode || RailsXEsc == out->opts->escape_mode) &&
2 <= end - str) {
if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
str = dump_unicode(str, end, out, orig);
} else {
Expand All @@ -866,8 +888,7 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
APPEND_CHARS(out->cur, "\\u00", 4);
dump_hex((uint8_t)*str, out);
} else {
if (0xe2 == (uint8_t)*str &&
(JXEsc == out->opts->escape_mode || RailsXEsc == out->opts->escape_mode) && 2 <= end - str) {
if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
str = dump_unicode(str, end, out, orig);
} else {
Expand All @@ -884,8 +905,7 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
}
*out->cur++ = '"';
}
if ((JXEsc == out->opts->escape_mode || RailsXEsc == out->opts->escape_mode) && 0 < str - orig &&
0 != (0x80 & *(str - 1))) {
if (do_unicode_validation && 0 < str - orig && 0 != (0x80 & *(str - 1))) {
uint8_t c = (uint8_t) * (str - 1);
int i;
int scnt = (int)(str - orig);
Expand Down
91 changes: 63 additions & 28 deletions test/activesupport6/encoding_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -74,42 +74,77 @@ def test_hash_keys_encoding
ActiveSupport.escape_html_entities_in_json = false
end

def test_utf8_string_encoded_properly
# The original test seems to expect that
# ActiveSupport.escape_html_entities_in_json reverts to true even after
# being set to false. I haven't been able to figure that out so the value is
# set to true, the default, before running the test. This might be wrong but
# for now it will have to do.
ActiveSupport.escape_html_entities_in_json = true
result = ActiveSupport::JSON.encode("€2.99")
assert_equal '"€2.99"', result
assert_equal(Encoding::UTF_8, result.encoding)

result = ActiveSupport::JSON.encode("✎☺")
assert_equal '"✎☺"', result
assert_equal(Encoding::UTF_8, result.encoding)
ohler55 marked this conversation as resolved.
Show resolved Hide resolved
def test_hash_keys_encoding_without_escaping
assert_equal "{\"<>\":\"<>\"}", ActiveSupport::JSON.encode("<>" => "<>")
end

def test_non_utf8_string_transcodes
s = "二".encode("Shift_JIS")
result = ActiveSupport::JSON.encode(s)
assert_equal '"二"', result
assert_equal Encoding::UTF_8, result.encoding
module UnicodeTests
def test_utf8_string_encoded_properly
result = ActiveSupport::JSON.encode("€2.99")
assert_equal '"€2.99"', result
assert_equal(Encoding::UTF_8, result.encoding)

result = ActiveSupport::JSON.encode("✎☺")
assert_equal '"✎☺"', result
assert_equal(Encoding::UTF_8, result.encoding)
end

def test_non_utf8_string_transcodes
s = "二".encode("Shift_JIS")
result = ActiveSupport::JSON.encode(s)
assert_equal '"二"', result
assert_equal Encoding::UTF_8, result.encoding
end

def test_wide_utf8_chars
w = "𠜎"
result = ActiveSupport::JSON.encode(w)
assert_equal '"𠜎"', result
end

def test_wide_utf8_roundtrip
hash = { string: "𐒑" }
json = ActiveSupport::JSON.encode(hash)
decoded_hash = ActiveSupport::JSON.decode(json)
assert_equal "𐒑", decoded_hash["string"]
end

def test_invalid_encoding_raises
s = "\xAE\xFF\x9F"
refute s.valid_encoding?

# n.b. this raises EncodingError, because we didn't call Oj.mimic_JSON in the test setup; but,
# if you do that (even indirectly through Oj.optimize_rails), then this raises a
# JSON::GeneratorError instead of an EncodingError.
assert_raises(EncodingError) do
ActiveSupport::JSON.encode([s])
end
end
end

def test_wide_utf8_chars
w = "𠜎"
result = ActiveSupport::JSON.encode(w)
assert_equal '"𠜎"', result
module UnicodeTestsWithEscapingOn
def setup
ActiveSupport.escape_html_entities_in_json = true
end

def teardown
ActiveSupport.escape_html_entities_in_json = false
end

include UnicodeTests
end

def test_wide_utf8_roundtrip
hash = { string: "𐒑" }
json = ActiveSupport::JSON.encode(hash)
decoded_hash = ActiveSupport::JSON.decode(json)
assert_equal "𐒑", decoded_hash["string"]
module UnicodeTestsWithEscapingOff
def setup
ActiveSupport.escape_html_entities_in_json = false
end

include UnicodeTests
end

include UnicodeTestsWithEscapingOn
include UnicodeTestsWithEscapingOff

def test_hash_key_identifiers_are_always_quoted
values = { 0 => 0, 1 => 1, :_ => :_, "$" => "$", "a" => "a", :A => :A, :A0 => :A0, "A0B" => "A0B" }
assert_equal %w( "$" "A" "A0" "A0B" "_" "a" "0" "1" ).sort, object_keys(ActiveSupport::JSON.encode(values))
Expand Down
94 changes: 72 additions & 22 deletions test/activesupport7/encoding_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,18 @@
require_relative "time_zone_test_helpers"
require_relative "encoding_test_cases"

require 'oj'
# Sets the ActiveSupport encoder to be Oj and also wraps the setting of globals.
Oj::Rails.set_encoder()
Oj::Rails.optimize()

class TestJSONEncoding < ActiveSupport::TestCase
include TimeZoneTestHelpers

def test_is_actually_oj
assert_equal Oj::Rails::Encoder, ActiveSupport.json_encoder
end

def sorted_json(json)
if json.start_with?("{") && json.end_with?("}")
"{" + json[1..-2].split(",").sort.join(",") + "}"
Expand Down Expand Up @@ -61,36 +70,77 @@ def test_hash_keys_encoding
ActiveSupport.escape_html_entities_in_json = false
end

def test_utf8_string_encoded_properly
result = ActiveSupport::JSON.encode("€2.99")
assert_equal '"€2.99"', result
assert_equal(Encoding::UTF_8, result.encoding)

result = ActiveSupport::JSON.encode("✎☺")
assert_equal '"✎☺"', result
assert_equal(Encoding::UTF_8, result.encoding)
def test_hash_keys_encoding_without_escaping
assert_equal "{\"<>\":\"<>\"}", ActiveSupport::JSON.encode("<>" => "<>")
end

def test_non_utf8_string_transcodes
s = "二".encode("Shift_JIS")
result = ActiveSupport::JSON.encode(s)
assert_equal '"二"', result
assert_equal Encoding::UTF_8, result.encoding
module UnicodeTests
def test_utf8_string_encoded_properly
result = ActiveSupport::JSON.encode("€2.99")
assert_equal '"€2.99"', result
assert_equal(Encoding::UTF_8, result.encoding)

result = ActiveSupport::JSON.encode("✎☺")
assert_equal '"✎☺"', result
assert_equal(Encoding::UTF_8, result.encoding)
end

def test_non_utf8_string_transcodes
s = "二".encode("Shift_JIS")
result = ActiveSupport::JSON.encode(s)
assert_equal '"二"', result
assert_equal Encoding::UTF_8, result.encoding
end

def test_wide_utf8_chars
w = "𠜎"
result = ActiveSupport::JSON.encode(w)
assert_equal '"𠜎"', result
end

def test_wide_utf8_roundtrip
hash = { string: "𐒑" }
json = ActiveSupport::JSON.encode(hash)
decoded_hash = ActiveSupport::JSON.decode(json)
assert_equal "𐒑", decoded_hash["string"]
end

def test_invalid_encoding_raises
s = "\xAE\xFF\x9F"
refute s.valid_encoding?

# n.b. this raises EncodingError, because we didn't call Oj.mimic_JSON in the test setup; but,
# if you do that (even indirectly through Oj.optimize_rails), then this raises a
# JSON::GeneratorError instead of an EncodingError.
assert_raises(EncodingError) do
ActiveSupport::JSON.encode([s])
end
end
end

def test_wide_utf8_chars
w = "𠜎"
result = ActiveSupport::JSON.encode(w)
assert_equal '"𠜎"', result
module UnicodeTestsWithEscapingOn
def setup
ActiveSupport.escape_html_entities_in_json = true
end

def teardown
ActiveSupport.escape_html_entities_in_json = false
end

include UnicodeTests
end

def test_wide_utf8_roundtrip
hash = { string: "𐒑" }
json = ActiveSupport::JSON.encode(hash)
decoded_hash = ActiveSupport::JSON.decode(json)
assert_equal "𐒑", decoded_hash["string"]
module UnicodeTestsWithEscapingOff
def setup
ActiveSupport.escape_html_entities_in_json = false
end

include UnicodeTests
end

include UnicodeTestsWithEscapingOn
include UnicodeTestsWithEscapingOff

def test_hash_key_identifiers_are_always_quoted
values = { 0 => 0, 1 => 1, :_ => :_, "$" => "$", "a" => "a", :A => :A, :A0 => :A0, "A0B" => "A0B" }
assert_equal %w( "$" "A" "A0" "A0B" "_" "a" "0" "1" ).sort, object_keys(ActiveSupport::JSON.encode(values))
Expand Down
Loading