diff --git a/embed.fnc b/embed.fnc index 97c8511c525f..2fa3906a6c30 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1157,6 +1157,7 @@ AOdp |SV * |eval_pv |NN const char *p \ |I32 croak_on_error AOdp |SSize_t|eval_sv |NN SV *sv \ |I32 flags +EMTpx |Size_t |expected_size |UV size ATdmp |bool |extended_utf8_to_uv \ |NN const U8 * const s \ |NN const U8 * const e \ diff --git a/perl.h b/perl.h index f1cd818221b9..e3493b573c3e 100644 --- a/perl.h +++ b/perl.h @@ -1686,6 +1686,23 @@ Use L to declare variables of the maximum usable size on this platform. # define PERL_STRLEN_EXPAND_SHIFT 2 #endif +/* "expected_size" is a functional stub for building on in the future. + * The intention is to pass it a number of bytes, prior to using that + * number in a call to something like malloc() or realloc(), and a best- + * guess at the size to be allocated will be returned. ("Best-guess" + * may vary by platform and malloc implementation.) + * This will be useful to (1) grow strings (or anything else) in a way + * that results in SvLEN more accurately reflecting the usable space + * that has been allocated, and (2) we don't try to shrink an + * allocation that won't actually shrink in practice. + * Right now, this macro just rounds up a given number to the nearest + * multiple of PTRSIZE, for a minimum of PERL_STRLEN_NEW_MIN. This is + * not entirely useless, just not terribly accurate. + */ +#define expected_size(n) ( ((n) > PERL_STRLEN_NEW_MIN) \ + ? (((n) + PTRSIZE - 1) & ~(PTRSIZE - 1)) \ + : PERL_STRLEN_NEW_MIN ) + /* This use of offsetof() requires /Zc:offsetof- for VS2017 (and presumably * onwards) when building Socket.xs, but we can just use a different definition * for STRUCT_OFFSET instead. */ diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 280ac72b07b5..f0903eaed2d3 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -350,6 +350,26 @@ well. XXX +=item * + +C has been added as an experimental stub macro. The +intention is to build on this such that it can be passed a size in bytes, +then returns the interpreter's best informed guess of what actual usable +allocation size would be returned by the malloc implementation in use. + +This would help with sizing allocations such that SvLEN is more accurate +and not trying to shrink string buffers to save size when the intended +saving is unrealistic. + +=item * + +C has been revised to include a byte for COW, so that +the resulting string could be COWed in the future. + +It also now uses C, compared against the current +buffer size, and does not try to do a reallocation if the requested +memory saving is unrealistic. + =back =head1 Selected Bug Fixes diff --git a/proto.h b/proto.h index da3fd83efa7b..a198c2a34720 100644 --- a/proto.h +++ b/proto.h @@ -1101,6 +1101,11 @@ Perl_eval_sv(pTHX_ SV *sv, I32 flags); #define PERL_ARGS_ASSERT_EVAL_SV \ assert(sv) +PERL_CALLCONV Size_t +Perl_expected_size(UV size) + __attribute__visibility__("hidden"); +#define PERL_ARGS_ASSERT_EXPECTED_SIZE + /* PERL_CALLCONV bool Perl_extended_utf8_to_uv(const U8 * const s, const U8 * const e, UV *cp_p, Size_t *advance_p); */ diff --git a/sv.h b/sv.h index b92b35ea0a10..6374370f7b46 100644 --- a/sv.h +++ b/sv.h @@ -1590,9 +1590,22 @@ L> before calling this. =cut */ -#define SvPV_shrink_to_cur(sv) STMT_START { \ - const STRLEN _lEnGtH = SvCUR(sv) + 1; \ - SvPV_renew(sv, _lEnGtH); \ +/* Notes: Ensure the buffer is big enough to be COWed in the future, so + + 1 for the trailing null byte + 1 for the COW count. + * The `expected_size` call will, at worst, ensure that the buffer size + * is no smaller than the expected minimim allocation and that the given + * size is rounded up to the closest PTRSIZE boundary. Depending on + * per-malloc implementation, it might return the exact size that would + * be allocated for the specified _lEnGtH. If the return value from + * `expected_size` is not smaller than the current buffer allocation, + * there is no point in calling SvPV_renew. +*/ + +#define SvPV_shrink_to_cur(sv) STMT_START { \ + const STRLEN _lEnGtH = SvCUR(sv) + 2; \ + const STRLEN _eXpEcT = expected_size(_lEnGtH); \ + if (SvLEN(sv) > _eXpEcT) \ + SvPV_renew(sv, _eXpEcT); \ } STMT_END /* diff --git a/toke.c b/toke.c index 5759c7890d32..671c4af209a2 100644 --- a/toke.c +++ b/toke.c @@ -4422,9 +4422,7 @@ S_scan_const(pTHX_ char *start) } /* shrink the sv if we allocated more than we used */ - if (SvCUR(sv) + 5 < SvLEN(sv)) { - SvPV_shrink_to_cur(sv); - } + SvPV_shrink_to_cur(sv); /* return the substring (via pl_yylval) only if we parsed anything */ if (s > start) { @@ -11348,9 +11346,7 @@ S_scan_heredoc(pTHX_ char *s) SvREFCNT_dec_NN(newstr); } - if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { - SvPV_shrink_to_cur(tmpstr); - } + SvPV_shrink_to_cur(tmpstr); if (!IN_BYTES) { if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) @@ -11877,10 +11873,7 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int PL_parser->herelines = herelines; /* if we allocated too much space, give some back */ - if (SvCUR(sv) + 5 < SvLEN(sv)) { - SvLEN_set(sv, SvCUR(sv) + 1); - SvPV_shrink_to_cur(sv); - } + SvPV_shrink_to_cur(sv); /* decide whether this is the first or second quoted string we've read for this op