@@ -370,90 +370,41 @@ static inline unsigned int get_next_char(
370370static enum entity_charset determine_charset (char * charset_hint )
371371{
372372 size_t i ;
373- enum entity_charset charset = cs_utf_8 ;
374- size_t len = 0 ;
375373 const zend_encoding * zenc ;
376374
377- /* Default is now UTF-8 */
378- if (charset_hint == NULL )
379- return cs_utf_8 ;
375+ if (charset_hint && * charset_hint ) {
376+ /* Explicitly passed charset */
377+ goto det_charset ;
378+ }
380379
381- if ((len = strlen (charset_hint )) != 0 ) {
380+ charset_hint = get_default_charset ();
381+ if (charset_hint && * charset_hint ) {
382+ /* default_charset or internal_encoding */
382383 goto det_charset ;
383384 }
384385
385386 zenc = zend_multibyte_get_internal_encoding ();
386387 if (zenc != NULL ) {
388+ /* mbstring.internal_encoding or mb_internal_encoding() */
389+ // TODO: We *shouldn't* be taking this into account anymore.
387390 charset_hint = (char * )zend_multibyte_get_encoding_name (zenc );
388- if (charset_hint != NULL && (len = strlen (charset_hint )) != 0 ) {
389- if (len == sizeof ("auto" )- 1 && !memcmp ("auto" , charset_hint , sizeof ("auto" )- 1 )) {
390- charset_hint = NULL ;
391- len = 0 ;
392- } else {
393- goto det_charset ;
394- }
395- }
396- }
397-
398- charset_hint = SG (default_charset );
399- if (charset_hint != NULL && (len = strlen (charset_hint )) != 0 ) {
400- goto det_charset ;
401- }
402-
403- /* try to detect the charset for the locale */
404- #if HAVE_NL_LANGINFO && defined(CODESET )
405- charset_hint = nl_langinfo (CODESET );
406- if (charset_hint != NULL && (len = strlen (charset_hint )) != 0 ) {
407- goto det_charset ;
408- }
409- #endif
410-
411- /* try to figure out the charset from the locale */
412- {
413- char * localename ;
414- char * dot , * at ;
415-
416- /* lang[_territory][.codeset][@modifier] */
417- localename = setlocale (LC_CTYPE , NULL );
418-
419- dot = strchr (localename , '.' );
420- if (dot ) {
421- dot ++ ;
422- /* locale specifies a codeset */
423- at = strchr (dot , '@' );
424- if (at )
425- len = at - dot ;
426- else
427- len = strlen (dot );
428- charset_hint = dot ;
429- } else {
430- /* no explicit name; see if the name itself
431- * is the charset */
432- charset_hint = localename ;
433- len = strlen (charset_hint );
434- }
435391 }
436392
437393det_charset :
438-
439394 if (charset_hint ) {
440- int found = 0 ;
441-
395+ size_t len = strlen (charset_hint );
442396 /* now walk the charset map and look for the codeset */
443397 for (i = 0 ; i < sizeof (charset_map )/sizeof (charset_map [0 ]); i ++ ) {
444398 if (len == charset_map [i ].codeset_len &&
445399 zend_binary_strcasecmp (charset_hint , len , charset_map [i ].codeset , len ) == 0 ) {
446- charset = charset_map [i ].charset ;
447- found = 1 ;
448- break ;
400+ return charset_map [i ].charset ;
449401 }
450402 }
451- if (!found ) {
452- php_error_docref (NULL , E_WARNING , "Charset `%s' not supported, assuming utf-8" ,
453- charset_hint );
454- }
403+
404+ php_error_docref (NULL , E_WARNING , "Charset `%s' not supported, assuming utf-8" ,
405+ charset_hint );
455406 }
456- return charset ;
407+ return cs_utf_8 ;
457408}
458409/* }}} */
459410
@@ -1384,7 +1335,6 @@ PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldle
13841335static void php_html_entities (INTERNAL_FUNCTION_PARAMETERS , int all )
13851336{
13861337 zend_string * str , * hint_charset = NULL ;
1387- char * default_charset ;
13881338 zend_long flags = ENT_COMPAT ;
13891339 zend_string * replaced ;
13901340 zend_bool double_encode = 1 ;
@@ -1397,10 +1347,9 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
13971347 Z_PARAM_BOOL (double_encode );
13981348 ZEND_PARSE_PARAMETERS_END ();
13991349
1400- if (!hint_charset ) {
1401- default_charset = get_default_charset ();
1402- }
1403- replaced = php_escape_html_entities_ex ((unsigned char * )ZSTR_VAL (str ), ZSTR_LEN (str ), all , (int ) flags , (hint_charset ? ZSTR_VAL (hint_charset ) : default_charset ), double_encode );
1350+ replaced = php_escape_html_entities_ex (
1351+ (unsigned char * )ZSTR_VAL (str ), ZSTR_LEN (str ), all , (int ) flags ,
1352+ hint_charset ? ZSTR_VAL (hint_charset ) : NULL , double_encode );
14041353 RETVAL_STR (replaced );
14051354}
14061355/* }}} */
@@ -1462,7 +1411,6 @@ PHP_FUNCTION(htmlspecialchars_decode)
14621411PHP_FUNCTION (html_entity_decode )
14631412{
14641413 zend_string * str , * hint_charset = NULL ;
1465- char * default_charset ;
14661414 zend_long quote_style = ENT_COMPAT ;
14671415 zend_string * replaced ;
14681416
@@ -1473,10 +1421,8 @@ PHP_FUNCTION(html_entity_decode)
14731421 Z_PARAM_STR (hint_charset )
14741422 ZEND_PARSE_PARAMETERS_END ();
14751423
1476- if (!hint_charset ) {
1477- default_charset = get_default_charset ();
1478- }
1479- replaced = php_unescape_html_entities (str , 1 /*all*/ , (int )quote_style , (hint_charset ? ZSTR_VAL (hint_charset ) : default_charset ));
1424+ replaced = php_unescape_html_entities (
1425+ str , 1 /*all*/ , (int )quote_style , hint_charset ? ZSTR_VAL (hint_charset ) : NULL );
14801426
14811427 if (replaced ) {
14821428 RETURN_STR (replaced );
0 commit comments