Skip to content

Commit 0920c05

Browse files
committed
MSVC compat fixes for intrinsics enabled 4/8 bytes at a time pp_reverse
1 parent 67d79fe commit 0920c05

File tree

2 files changed

+116
-73
lines changed

2 files changed

+116
-73
lines changed

perl.h

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1452,18 +1452,26 @@ Use C<L</UINTMAX_C>> to get the largest type available on the platform.
14521452
=cut
14531453
*/
14541454
#ifndef UINT16_C
1455-
# if INTSIZE >= 2
1456-
# define UINT16_C(x) ((U16_TYPE)x##U)
1455+
# ifdef _MSC_VER
1456+
# define UINT16_C(x) ((U16TYPE)x##ui16)
14571457
# else
1458-
# define UINT16_C(x) ((U16_TYPE)x##UL)
1458+
# if INTSIZE >= 2
1459+
# define UINT16_C(x) ((U16TYPE)x##U)
1460+
# else
1461+
# define UINT16_C(x) ((U16TYPE)x##UL)
1462+
# endif
14591463
# endif
14601464
#endif
14611465

14621466
#ifndef UINT32_C
1463-
# if INTSIZE >= 4
1464-
# define UINT32_C(x) ((U32_TYPE)x##U)
1467+
# ifdef _MSC_VER
1468+
# define UINT32_C(x) ((U32TYPE)x##ui32)
14651469
# else
1466-
# define UINT32_C(x) ((U32_TYPE)x##UL)
1470+
# if INTSIZE >= 4
1471+
# define UINT32_C(x) ((U32TYPE)x##U)
1472+
# else
1473+
# define UINT32_C(x) ((U32TYPE)x##UL)
1474+
# endif
14671475
# endif
14681476
#endif
14691477

pp.c

Lines changed: 102 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -6529,6 +6529,20 @@ PP(pp_unshift)
65296529
return NORMAL;
65306530
}
65316531

6532+
#ifdef _MSC_VER
6533+
# pragma intrinsic(_byteswap_ushort, _byteswap_ulong, _byteswap_uint64)
6534+
# define S_bswap16(_x) _byteswap_ushort(_x)
6535+
# define S_bswap32(_x) _byteswap_ulong(_x)
6536+
# define S_bswap64(_x) _byteswap_uint64(_x)
6537+
PERL_STATIC_FORCE_INLINE void *
6538+
S_memcpy(void *dest, const void *src,size_t count);
6539+
#else
6540+
# define S_bswap16(_x) _swab_16_(_x)
6541+
# define S_bswap32(_x) _swab_32_(_x)
6542+
# define S_bswap64(_x) _swab_64_(_x)
6543+
# define S_memcpy(_d,_s,_n) memcpy((_d),(_s),(_n))
6544+
#endif
6545+
65326546
PP_wrapped(pp_reverse, 0, 1)
65336547
{
65346548
dSP; dMARK;
@@ -6554,15 +6568,17 @@ PP_wrapped(pp_reverse, 0, 1)
65546568
SV *begin, *end;
65556569

65566570
if (can_preserve) {
6557-
if (!av_exists(av, i)) {
6558-
if (av_exists(av, j)) {
6571+
bool exi = av_exists(av, i);
6572+
bool exj = av_exists(av, j);
6573+
if (!exi) {
6574+
if (exj) {
65596575
SV *sv = av_delete(av, j, 0);
65606576
begin = *av_fetch(av, i, TRUE);
65616577
sv_setsv_mg(begin, sv);
65626578
}
65636579
continue;
65646580
}
6565-
else if (!av_exists(av, j)) {
6581+
else if (!exj) {
65666582
SV *sv = av_delete(av, i, 0);
65676583
end = *av_fetch(av, j, TRUE);
65686584
sv_setsv_mg(end, sv);
@@ -6643,18 +6659,19 @@ PP_wrapped(pp_reverse, 0, 1)
66436659
* in a single pass, rather than 2-3 passes. */
66446660

66456661
const char * src = SvPV_const(src_sv, len);
6662+
U8* dd;
66466663

66476664
/* Prepare the TARG. */
6648-
if (SvTYPE(TARG) < SVt_PV) {
6665+
if (SvTHINKFIRST(TARG))
6666+
SV_CHECK_THINKFIRST_COW_DROP(TARG); /* Drops any buffer or RV */
6667+
if (SvTYPE(TARG) < SVt_PV)
66496668
SvUPGRADE(TARG, SvTYPE(src_sv)); /* No buffer allocation here */
6650-
} else if(SvTHINKFIRST(TARG)) {
6651-
SV_CHECK_THINKFIRST_COW_DROP(TARG); /* Drops any buffer */
6652-
}
6653-
SvSETMAGIC(TARG);
6654-
SvGROW(TARG, len + 1);
6669+
else /* can't have SMG if < PVMG, SvROK/SvAMAGIC doesn't apply */
6670+
SvSETMAGIC(TARG);
6671+
dd = (U8*)SvGROW(TARG, len + 1);
66556672
SvCUR_set(TARG, len);
66566673
SvPOK_only(TARG);
6657-
*SvEND(TARG) = '\0';
6674+
dd[len] = '\0';
66586675
if (SvTAINTED(src_sv))
66596676
SvTAINT(TARG);
66606677

@@ -6663,9 +6680,9 @@ PP_wrapped(pp_reverse, 0, 1)
66636680
SvUTF8_on(TARG);
66646681

66656682
const U8* s = (const U8*)src;
6666-
U8* dd = (U8*)(SvPVX(TARG) + len);
66676683
const U8* send = (const U8*)(s + len);
66686684
int bytes = 0;
6685+
dd = dd + len;
66696686
while (s < send) {
66706687
bytes = UTF8SKIP(s);
66716688
if (bytes == 1) {
@@ -6680,57 +6697,57 @@ PP_wrapped(pp_reverse, 0, 1)
66806697
} else {
66816698
STRLEN i = 0;
66826699
STRLEN j = len;
6683-
uint32_t u32_1, u32_2;
6684-
uint16_t u16_1, u16_2;
6685-
char * outp= SvPVX(TARG);
6700+
U32 u32_1, u32_2;
6701+
U16 u16_1, u16_2;
6702+
char * outp = dd;
66866703
/* Take a chunk of bytes from the front and from the
66876704
* back, reverse the bytes in each and and swap the
66886705
* chunks over. This should have generally good
66896706
* performance but also is likely to be optimised
66906707
* into bswap instructions by the compiler.
66916708
*/
66926709
#ifdef HAS_QUAD
6693-
uint64_t u64_1, u64_2;
6710+
U64 u64_1, u64_2;
66946711
while (j - i >= 16) {
6695-
memcpy(&u64_1, src + j - 8, 8);
6696-
memcpy(&u64_2, src + i, 8);
6697-
u64_1 = _swab_64_(u64_1);
6698-
u64_2 = _swab_64_(u64_2);
6699-
memcpy(outp + j - 8, &u64_2, 8);
6700-
memcpy(outp + i, &u64_1, 8);
6712+
S_memcpy(&u64_1, src + j - 8, 8);
6713+
S_memcpy(&u64_2, src + i, 8);
6714+
u64_1 = S_bswap64(u64_1);
6715+
u64_2 = S_bswap64(u64_2);
6716+
S_memcpy(outp + j - 8, &u64_2, 8);
6717+
S_memcpy(outp + i, &u64_1, 8);
67016718
i += 8;
67026719
j -= 8;
67036720
}
67046721

67056722
if (j - i >= 8) {
6706-
memcpy(&u32_1, src + j - 4, 4);
6707-
memcpy(&u32_2, src + i, 4);
6708-
u32_1 = _swab_32_(u32_1);
6709-
u32_2 = _swab_32_(u32_2);
6710-
memcpy(outp + j - 4, &u32_2, 4);
6711-
memcpy(outp + i, &u32_1, 4);
6723+
S_memcpy(&u32_1, src + j - 4, 4);
6724+
S_memcpy(&u32_2, src + i, 4);
6725+
u32_1 = S_bswap32(u32_1);
6726+
u32_2 = S_bswap32(u32_2);
6727+
S_memcpy(outp + j - 4, &u32_2, 4);
6728+
S_memcpy(outp + i, &u32_1, 4);
67126729
i += 4;
67136730
j -= 4;
67146731
}
67156732
#else
67166733
while (j - i >= 8) {
6717-
memcpy(&u32_1, src + j - 4, 4);
6718-
memcpy(&u32_2, src + i, 4);
6719-
u32_1 = _swab_32_(u32_1);
6720-
u32_2 = _swab_32_(u32_2);
6721-
memcpy(outp + j - 4, &u32_2, 4);
6722-
memcpy(outp + i, &u32_1, 4);
6734+
S_memcpy(&u32_1, src + j - 4, 4);
6735+
S_memcpy(&u32_2, src + i, 4);
6736+
u32_1 = S_bswap32(u32_1);
6737+
u32_2 = S_bswap32(u32_2);
6738+
S_memcpy(outp + j - 4, &u32_2, 4);
6739+
S_memcpy(outp + i, &u32_1, 4);
67236740
i += 4;
67246741
j -= 4;
67256742
}
67266743
#endif
67276744
if (j - i >= 4) {
6728-
memcpy(&u16_1, src + j - 2, 2);
6729-
memcpy(&u16_2, src + i, 2);
6730-
u16_1 = _swab_16_(u16_1);
6731-
u16_2 = _swab_16_(u16_2);
6732-
memcpy(outp + j - 2, &u16_2, 2);
6733-
memcpy(outp + i, &u16_1, 2);
6745+
S_memcpy(&u16_1, src + j - 2, 2);
6746+
S_memcpy(&u16_2, src + i, 2);
6747+
u16_1 = S_bswap16(u16_1);
6748+
u16_2 = S_bswap16(u16_2);
6749+
S_memcpy(outp + j - 2, &u16_2, 2);
6750+
S_memcpy(outp + i, &u16_1, 2);
67346751
i += 2;
67356752
j -= 2;
67366753
}
@@ -6755,7 +6772,8 @@ PP_wrapped(pp_reverse, 0, 1)
67556772
/* The traditional way, operate on the current byte buffer */
67566773
if (DO_UTF8(TARG)) { /* first reverse each character */
67576774
char *down;
6758-
U8* s = (U8*)SvPVX(TARG);
6775+
assert(SvPVX(TARG) == up);
6776+
U8* s = (U8*)up;
67596777
const U8* send = (U8*)(s + len);
67606778
while (s < send) {
67616779
if (UTF8_IS_INVARIANT(*s)) {
@@ -6780,51 +6798,51 @@ PP_wrapped(pp_reverse, 0, 1)
67806798
}
67816799
STRLEN i = 0;
67826800
STRLEN j = len;
6783-
uint32_t u32_1, u32_2;
6784-
uint16_t u16_1, u16_2;
6801+
U32 u32_1, u32_2;
6802+
U16 u16_1, u16_2;
67856803
/* Reverse the buffer in place, in chunks where possible */
67866804
#ifdef HAS_QUAD
6787-
uint64_t u64_1, u64_2;
6805+
U64 u64_1, u64_2;
67886806
while (j - i >= 16) {
6789-
memcpy(&u64_1, up + j - 8, 8);
6790-
memcpy(&u64_2, up + i, 8);
6791-
u64_1 = _swab_64_(u64_1);
6792-
u64_2 = _swab_64_(u64_2);
6793-
memcpy(up + j - 8, &u64_2, 8);
6794-
memcpy(up + i, &u64_1, 8);
6807+
S_memcpy(&u64_1, up + j - 8, 8);
6808+
S_memcpy(&u64_2, up + i, 8);
6809+
u64_1 = S_bswap64(u64_1);
6810+
u64_2 = S_bswap64(u64_2);
6811+
S_memcpy(up + j - 8, &u64_2, 8);
6812+
S_memcpy(up + i, &u64_1, 8);
67956813
i += 8;
67966814
j -= 8;
67976815
}
67986816

67996817
if (j - i >= 8) {
6800-
memcpy(&u32_1, up + j - 4, 4);
6801-
memcpy(&u32_2, up + i, 4);
6802-
u32_1 = _swab_32_(u32_1);
6803-
u32_2 = _swab_32_(u32_2);
6804-
memcpy(up + j - 4, &u32_2, 4);
6805-
memcpy(up + i, &u32_1, 4);
6818+
S_memcpy(&u32_1, up + j - 4, 4);
6819+
S_memcpy(&u32_2, up + i, 4);
6820+
u32_1 = S_bswap32(u32_1);
6821+
u32_2 = S_bswap32(u32_2);
6822+
S_memcpy(up + j - 4, &u32_2, 4);
6823+
S_memcpy(up + i, &u32_1, 4);
68066824
i += 4;
68076825
j -= 4;
68086826
}
68096827
#else
68106828
while (j - i >= 8) {
6811-
memcpy(&u32_1, up + j - 4, 4);
6812-
memcpy(&u32_2, up + i, 4);
6813-
u32_1 = _swab_32_(u32_1);
6814-
u32_2 = _swab_32_(u32_2);
6815-
memcpy(up + j - 4, &u32_2, 4);
6816-
memcpy(up + i, &u32_1, 4);
6829+
S_memcpy(&u32_1, up + j - 4, 4);
6830+
S_memcpy(&u32_2, up + i, 4);
6831+
u32_1 = S_bswap32(u32_1);
6832+
u32_2 = S_bswap32(u32_2);
6833+
S_memcpy(up + j - 4, &u32_2, 4);
6834+
S_memcpy(up + i, &u32_1, 4);
68176835
i += 4;
68186836
j -= 4;
68196837
}
68206838
#endif
68216839
if (j - i >= 4) {
6822-
memcpy(&u16_1, up + j - 2, 2);
6823-
memcpy(&u16_2, up + i, 2);
6824-
u16_1 = _swab_16_(u16_1);
6825-
u16_2 = _swab_16_(u16_2);
6826-
memcpy(up + j - 2, &u16_2, 2);
6827-
memcpy(up + i, &u16_1, 2);
6840+
S_memcpy(&u16_1, up + j - 2, 2);
6841+
S_memcpy(&u16_2, up + i, 2);
6842+
u16_1 = S_bswap16(u16_1);
6843+
u16_2 = S_bswap16(u16_2);
6844+
S_memcpy(up + j - 2, &u16_2, 2);
6845+
S_memcpy(up + i, &u16_1, 2);
68286846
i += 2;
68296847
j -= 2;
68306848
}
@@ -6843,6 +6861,11 @@ PP_wrapped(pp_reverse, 0, 1)
68436861
RETURN;
68446862
}
68456863

6864+
#undef S_memcpy
6865+
#undef S_bswap16
6866+
#undef S_bswap32
6867+
#undef S_bswap64
6868+
68466869
PP_wrapped(pp_split,
68476870
( (PL_op->op_private & OPpSPLIT_ASSIGN)
68486871
&& (PL_op->op_flags & OPf_STACKED))
@@ -8179,6 +8202,18 @@ PP(pp_is_tainted)
81798202
return NORMAL;
81808203
}
81818204

8205+
#ifdef _MSC_VER
8206+
/* this pragma can't be push/pop-ed vs whatever the cmd line to cl.exe was */
8207+
# pragma intrinsic(memcpy)
8208+
8209+
void *
8210+
S_memcpy(void *dest, const void *src, size_t count)
8211+
{
8212+
return memcpy(dest, src, count);
8213+
}
8214+
8215+
#endif
8216+
81828217
/*
81838218
* ex: set ts=8 sts=4 sw=4 et:
81848219
*/

0 commit comments

Comments
 (0)