diff --git a/utf8.c b/utf8.c index dd00149421b3..1bdfd740521b 100644 --- a/utf8.c +++ b/utf8.c @@ -4759,8 +4759,18 @@ See also L. =for apidoc Amnh||UNI_DISPLAY_ISPRINT =for apidoc Amnh||UNI_DISPLAY_QQ =for apidoc Amnh||UNI_DISPLAY_REGEX + +=for apidoc Cmn||UNI_DISPLAY_TR + +This is an extra flag for L which is for internal use +only. It displays an operand of the tr/// operation. These operands have a +peculiar, deliberate UTF-8 malformation which this flag enables the proper +handling of. It turns on ISPRINT and BACKSLASH as well. + =cut + */ + char * Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim, UV flags) @@ -4782,6 +4792,14 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim, break; } + /* The minus is unambiguously the range indicator within a UTF-8 tr/// + * operand */ + if (UNLIKELY(flags & UNI_DISPLAY_TR_ && *s == ILLEGAL_UTF8_BYTE)) { + sv_catpvs(dsv, "-"); + next_len = 1; + continue; + } + (void) utf8_to_uv(s, e, &u, &next_len); assert(next_len > 0); diff --git a/utf8.h b/utf8.h index f37fa33159e1..162d8c054e4d 100644 --- a/utf8.h +++ b/utf8.h @@ -1339,7 +1339,10 @@ point's representation. #define UNI_DISPLAY_BACKSLASH 0x0002 #define UNI_DISPLAY_BACKSPACE 0x0004 /* Allow \b when also UNI_DISPLAY_BACKSLASH */ -#define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT \ +#define UNI_DISPLAY_TR_ ( 0x0008 \ + |UNI_DISPLAY_ISPRINT \ + |UNI_DISPLAY_BACKSLASH) +#define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT \ |UNI_DISPLAY_BACKSLASH \ |UNI_DISPLAY_BACKSPACE)