From 50ff112f68c99246754528dfcbd2472b18b3abb3 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 11 Feb 2023 21:05:51 +0100 Subject: [PATCH] Implement GH-10526: array_unique: add way to compare items with an identity check (===) Implements GH-10526. This adds a SORT_STRICT flag to use in sorting and in array_unique, although it is most useful in the latter case. If the types are equal we will use the identity check first before using zend_compare to avoid possibly inconsistent results (e.g. null and false are equal according to zend_compare). If the types are equal, but the values are not identical we can rely on zend_compare to perform the comparison without introducing inconsistencies. If the types are not equal, we will sort in a way that groups the values of the same type together, and within those groups the values are sorted. We determine the order of the types based on the alphabetical order (as exposed to userland!), which is consistent with the inequality comparisons of `zend_compare` (e.g. false < object < true etc.) Note that double comes before long because the name exposed to userland is float which comes alphabetically before long. --- ext/standard/array.c | 85 +++ ext/standard/basic_functions.stub.php | 5 + ext/standard/basic_functions_arginfo.h | 3 +- ext/standard/php_array.h | 1 + .../tests/array/sort_strict_flag.phpt | 537 ++++++++++++++++++ 5 files changed, 630 insertions(+), 1 deletion(-) create mode 100644 ext/standard/tests/array/sort_strict_flag.phpt diff --git a/ext/standard/array.c b/ext/standard/array.c index 88d2335c0afb9..5671d1f806e01 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -244,6 +244,25 @@ static int php_array_key_compare_string_natural_general(Bucket *f, Bucket *s, in } /* }}} */ +static zend_always_inline int php_array_key_compare_strict_unstable_i(Bucket *f, Bucket *s) /* {{{ */ +{ + if (f->key == NULL) { + if (s->key == NULL) { + return (zend_long) f->h > (zend_long) s->h ? 1 : -1; + } else { + /* f's keytype is int, s's keytype is string, and int < string */ + return -1; + } + } else { + if (s->key == NULL) { + /* f's keytype is string, s's keytype is int, and string > int */ + return 1; + } else { + return zendi_smart_strcmp(f->key, s->key); + } + } +} + static int php_array_key_compare_string_natural_case(Bucket *a, Bucket *b) /* {{{ */ { RETURN_STABLE_SORT(a, b, php_array_key_compare_string_natural_general(a, b, 1)); @@ -312,6 +331,45 @@ static zend_always_inline int php_array_data_compare_string_unstable_i(Bucket *f } /* }}} */ +static zend_always_inline int php_array_data_compare_strict_unstable_i(Bucket *f, Bucket *s) /* {{{ */ +{ + /* We must first check for identity before using a type-juggly zend_compare. + * Otherwise there would be cases which are inconsistent; e.g. `null == false` + * but `null !== false` and we want the latter behaviour. */ + if (fast_is_identical_function(&f->val, &s->val)) { + return 0; + } + + uint32_t type1 = Z_TYPE(f->val); + uint32_t type2 = Z_TYPE(s->val); + + if (type1 == type2) { + return zend_compare(&f->val, &s->val); + } else { + /* If the types are not equal, we will sort in a way that groups the values of the same + * type together, and within those groups the values are sorted. + * We determine the order of the types based on the alphabetical order (as exposed to userland!), + * which is consistent with the inequality comparisons of `zend_compare` (e.g. false < object < true etc.) + * Order: array, false, double (float), long (int), null, object, reference, resource, string, true. + * Note that double comes before long because the name exposed to userland is float which comes alphabetically + * before long. */ + static const signed char type_to_order_mapping[] = { + [IS_ARRAY] = 0, + [IS_FALSE] = 1, + [IS_DOUBLE] = 2, + [IS_LONG] = 3, + [IS_NULL] = 4, + [IS_OBJECT] = 5, + [IS_REFERENCE] = 6, + [IS_RESOURCE] = 7, + [IS_STRING] = 8, + [IS_TRUE] = 9, + }; + + return ZEND_NORMALIZE_BOOL(type_to_order_mapping[type1] - type_to_order_mapping[type2]); + } +} + static int php_array_natural_general_compare(Bucket *f, Bucket *s, int fold_case) /* {{{ */ { zend_string *tmp_str1, *tmp_str2; @@ -349,11 +407,13 @@ DEFINE_SORT_VARIANTS(key_compare_numeric); DEFINE_SORT_VARIANTS(key_compare_string_case); DEFINE_SORT_VARIANTS(key_compare_string); DEFINE_SORT_VARIANTS(key_compare_string_locale); +DEFINE_SORT_VARIANTS(key_compare_strict); DEFINE_SORT_VARIANTS(data_compare); DEFINE_SORT_VARIANTS(data_compare_numeric); DEFINE_SORT_VARIANTS(data_compare_string_case); DEFINE_SORT_VARIANTS(data_compare_string); DEFINE_SORT_VARIANTS(data_compare_string_locale); +DEFINE_SORT_VARIANTS(data_compare_strict); DEFINE_SORT_VARIANTS(natural_compare); DEFINE_SORT_VARIANTS(natural_case_compare); @@ -408,6 +468,14 @@ static bucket_compare_func_t php_get_key_compare_func(zend_long sort_type, int r } break; + case PHP_SORT_STRICT: + if (reverse) { + return php_array_reverse_key_compare_strict; + } else { + return php_array_key_compare_strict; + } + break; + case PHP_SORT_REGULAR: default: if (reverse) { @@ -472,6 +540,14 @@ static bucket_compare_func_t php_get_data_compare_func(zend_long sort_type, int } break; + case PHP_SORT_STRICT: + if (reverse) { + return php_array_reverse_data_compare_strict; + } else { + return php_array_data_compare_strict; + } + break; + case PHP_SORT_REGULAR: default: if (reverse) { @@ -536,6 +612,14 @@ static bucket_compare_func_t php_get_data_compare_func_unstable(zend_long sort_t } break; + case PHP_SORT_STRICT: + if (reverse) { + return php_array_reverse_data_compare_strict_unstable; + } else { + return php_array_data_compare_strict_unstable; + } + break; + case PHP_SORT_REGULAR: default: if (reverse) { @@ -5653,6 +5737,7 @@ PHP_FUNCTION(array_multisort) case PHP_SORT_STRING: case PHP_SORT_NATURAL: case PHP_SORT_LOCALE_STRING: + case PHP_SORT_STRICT: /* flag allowed here */ if (parse_state[MULTISORT_TYPE] == 1) { /* Save the flag and make sure then next arg is not the current flag. */ diff --git a/ext/standard/basic_functions.stub.php b/ext/standard/basic_functions.stub.php index ab56a8c0e8fbb..e199e5a76ed55 100755 --- a/ext/standard/basic_functions.stub.php +++ b/ext/standard/basic_functions.stub.php @@ -81,6 +81,11 @@ * @cvalue PHP_SORT_NATURAL */ const SORT_NATURAL = UNKNOWN; +/** + * @var int + * @cvalue PHP_SORT_STRICT + */ +const SORT_STRICT = UNKNOWN; /** * @var int * @cvalue PHP_SORT_FLAG_CASE diff --git a/ext/standard/basic_functions_arginfo.h b/ext/standard/basic_functions_arginfo.h index 077f9876df7c2..b50fde89def15 100644 --- a/ext/standard/basic_functions_arginfo.h +++ b/ext/standard/basic_functions_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 39d455982dfdea9d0b9b646bc207b05f7108d1b2 */ + * Stub hash: 3ca9818a79249e52dd0cc0940fdad55febbe223c */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_set_time_limit, 0, 1, _IS_BOOL, 0) ZEND_ARG_TYPE_INFO(0, seconds, IS_LONG, 0) @@ -3498,6 +3498,7 @@ static void register_basic_functions_symbols(int module_number) REGISTER_LONG_CONSTANT("SORT_STRING", PHP_SORT_STRING, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("SORT_LOCALE_STRING", PHP_SORT_LOCALE_STRING, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("SORT_NATURAL", PHP_SORT_NATURAL, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("SORT_STRICT", PHP_SORT_STRICT, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("SORT_FLAG_CASE", PHP_SORT_FLAG_CASE, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("CASE_LOWER", PHP_CASE_LOWER, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("CASE_UPPER", PHP_CASE_UPPER, CONST_PERSISTENT); diff --git a/ext/standard/php_array.h b/ext/standard/php_array.h index 5d42a22f42040..ef2449859aefc 100644 --- a/ext/standard/php_array.h +++ b/ext/standard/php_array.h @@ -54,6 +54,7 @@ PHPAPI bool php_array_pick_keys(const php_random_algo *algo, php_random_status * #define PHP_SORT_ASC 4 #define PHP_SORT_LOCALE_STRING 5 #define PHP_SORT_NATURAL 6 +#define PHP_SORT_STRICT 7 #define PHP_SORT_FLAG_CASE 8 #define PHP_COUNT_NORMAL 0 diff --git a/ext/standard/tests/array/sort_strict_flag.phpt b/ext/standard/tests/array/sort_strict_flag.phpt new file mode 100644 index 0000000000000..8ee25516ef6d1 --- /dev/null +++ b/ext/standard/tests/array/sort_strict_flag.phpt @@ -0,0 +1,537 @@ +--TEST-- +array_unique() and sorting functions: SORT_STRICT flag +--FILE-- + "b", 2 => "a", 3 => "c"]); + +test_keys(["b" => 0, "c" => 1, "a" => 2]); + +test_keys(["b" => 0, 0 => 10, -5 => 11, "c" => 1, "a" => 2, -99 => 1]); + +?> +--EXPECT-- +=== TEST COMPARE VALUES === +array(5) { + [0]=> + int(1) + [1]=> + string(1) "1" + [2]=> + int(2) + [4]=> + string(1) "2" + [5]=> + float(2) +} +bool(true) +array(6) { + [0]=> + float(2) + [1]=> + int(1) + [2]=> + int(1) + [3]=> + int(2) + [4]=> + string(1) "1" + [5]=> + string(1) "2" +} +bool(true) +array(6) { + [0]=> + string(1) "2" + [1]=> + string(1) "1" + [2]=> + int(2) + [3]=> + int(1) + [4]=> + int(1) + [5]=> + float(2) +} +bool(true) +bool(true) +array(6) { + [0]=> + NULL + [1]=> + string(11) "hello world" + [2]=> + string(4) "test" + [3]=> + bool(true) + [4]=> + bool(false) + [5]=> + string(3) "abc" +} +bool(true) +array(7) { + [0]=> + bool(false) + [1]=> + NULL + [2]=> + NULL + [3]=> + string(3) "abc" + [4]=> + string(11) "hello world" + [5]=> + string(4) "test" + [6]=> + bool(true) +} +bool(true) +array(7) { + [0]=> + bool(true) + [1]=> + string(4) "test" + [2]=> + string(11) "hello world" + [3]=> + string(3) "abc" + [4]=> + NULL + [5]=> + NULL + [6]=> + bool(false) +} +bool(true) +bool(true) +array(4) { + [0]=> + &int(1) + [1]=> + int(1) + [2]=> + int(0) + [3]=> + int(2) +} +bool(true) +array(5) { + [0]=> + int(0) + [1]=> + int(1) + [2]=> + int(1) + [3]=> + int(2) + [4]=> + &int(1) +} +bool(true) +array(5) { + [0]=> + &int(1) + [1]=> + int(2) + [2]=> + int(1) + [3]=> + int(1) + [4]=> + int(0) +} +bool(true) +bool(true) +array(12) { + [0]=> + bool(true) + [1]=> + string(1) "a" + [2]=> + resource(5) of type (stream) + [3]=> + &int(1) + [5]=> + object(stdClass)#1 (0) { + } + [6]=> + NULL + [8]=> + float(1.5) + [10]=> + int(0) + [11]=> + int(1) + [12]=> + float(1) + [13]=> + bool(false) + [14]=> + array(0) { + } +} +bool(true) +array(19) { + [0]=> + array(0) { + } + [1]=> + bool(false) + [2]=> + bool(false) + [3]=> + float(1) + [4]=> + float(1.5) + [5]=> + float(1.5) + [6]=> + int(0) + [7]=> + int(1) + [8]=> + NULL + [9]=> + NULL + [10]=> + object(stdClass)#1 (0) { + } + [11]=> + object(stdClass)#1 (0) { + } + [12]=> + &int(1) + [13]=> + &int(1) + [14]=> + resource(5) of type (stream) + [15]=> + resource(5) of type (stream) + [16]=> + string(1) "a" + [17]=> + bool(true) + [18]=> + bool(true) +} +bool(true) +array(19) { + [0]=> + bool(true) + [1]=> + bool(true) + [2]=> + string(1) "a" + [3]=> + resource(5) of type (stream) + [4]=> + resource(5) of type (stream) + [5]=> + &int(1) + [6]=> + &int(1) + [7]=> + object(stdClass)#1 (0) { + } + [8]=> + object(stdClass)#1 (0) { + } + [9]=> + NULL + [10]=> + NULL + [11]=> + int(1) + [12]=> + int(0) + [13]=> + float(1.5) + [14]=> + float(1.5) + [15]=> + float(1) + [16]=> + bool(false) + [17]=> + bool(false) + [18]=> + array(0) { + } +} +bool(true) +bool(true) +array(16) { + [0]=> + array(2) { + [0]=> + int(1) + [1]=> + int(2) + } + [1]=> + bool(true) + [2]=> + bool(false) + [3]=> + string(1) "b" + [4]=> + string(1) "a" + [5]=> + &int(2) + [7]=> + &int(1) + [8]=> + object(stdClass)#2 (0) { + } + [9]=> + resource(5) of type (stream) + [11]=> + NULL + [12]=> + int(1) + [14]=> + int(-5) + [15]=> + float(-5) + [16]=> + float(5) + [17]=> + float(1) + [19]=> + array(0) { + } +} +bool(true) +array(22) { + [0]=> + array(0) { + } + [1]=> + array(2) { + [0]=> + int(1) + [1]=> + int(2) + } + [2]=> + array(2) { + [0]=> + int(1) + [1]=> + int(2) + } + [3]=> + bool(false) + [4]=> + bool(false) + [5]=> + bool(false) + [6]=> + float(-5) + [7]=> + float(1) + [8]=> + float(5) + [9]=> + int(-5) + [10]=> + int(1) + [11]=> + NULL + [12]=> + NULL + [13]=> + object(stdClass)#2 (0) { + } + [14]=> + object(stdClass)#3 (0) { + } + [15]=> + &int(1) + [16]=> + &int(2) + [17]=> + resource(5) of type (stream) + [18]=> + string(1) "a" + [19]=> + string(1) "a" + [20]=> + string(1) "b" + [21]=> + bool(true) +} +bool(true) +array(22) { + [0]=> + bool(true) + [1]=> + string(1) "b" + [2]=> + string(1) "a" + [3]=> + string(1) "a" + [4]=> + resource(5) of type (stream) + [5]=> + &int(2) + [6]=> + &int(1) + [7]=> + object(stdClass)#2 (0) { + } + [8]=> + object(stdClass)#3 (0) { + } + [9]=> + NULL + [10]=> + NULL + [11]=> + int(1) + [12]=> + int(-5) + [13]=> + float(5) + [14]=> + float(1) + [15]=> + float(-5) + [16]=> + bool(false) + [17]=> + bool(false) + [18]=> + bool(false) + [19]=> + array(2) { + [0]=> + int(1) + [1]=> + int(2) + } + [20]=> + array(2) { + [0]=> + int(1) + [1]=> + int(2) + } + [21]=> + array(0) { + } +} +bool(true) +bool(true) +=== TEST COMPARE KEYS === +bool(true) +array(3) { + [2]=> + string(1) "a" + [3]=> + string(1) "c" + [5]=> + string(1) "b" +} +bool(true) +array(3) { + [2]=> + string(1) "c" + [1]=> + string(1) "a" + [0]=> + string(1) "b" +} +bool(true) +array(3) { + ["a"]=> + int(2) + ["b"]=> + int(0) + ["c"]=> + int(1) +} +bool(true) +array(3) { + ["c"]=> + int(1) + ["b"]=> + int(0) + ["a"]=> + int(2) +} +bool(true) +array(6) { + [-99]=> + int(1) + [-5]=> + int(11) + [0]=> + int(10) + ["a"]=> + int(2) + ["b"]=> + int(0) + ["c"]=> + int(1) +} +bool(true) +array(6) { + ["c"]=> + int(1) + ["b"]=> + int(0) + ["a"]=> + int(2) + [2]=> + int(1) + [1]=> + int(11) + [0]=> + int(10) +}