diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ad1962274..d5b51477db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,27 +17,35 @@ include(cmake/macros.cmake) message(STATUS "CMAKE_GENERATOR=${CMAKE_GENERATOR}") # Build options -option(VERBOSE_BUILD "Verbose Build" OFF) -option(FEATURE_FFI "Enable FFI" ON) -option(FEATURE_THREADED_FFI "Enable Threaded (running in another thread) FFI" ON) -option(FEATURE_MESSAGE_COUNT "Enable the option to count messages, only valid for StackVM" OFF) -option(FEATURE_NETWORK "Enable network and sockets" ON) -option(FEATURE_LIB_SDL2 "Build SDL2 support" ON) -option(FEATURE_LIB_CAIRO "Build Cairo support" ON) -option(FEATURE_LIB_FREETYPE2 "Build freetype2 support" ON) -option(FEATURE_LIB_GIT2 "Build LibGit2 support" ON) -option(FEATURE_LIB_PTHREADW32 "Windows only, link to win32 version of pthread" OFF) -option(GENERATE_VMMAKER "If it generates the VMMaker image" ON) -option(GENERATE_SOURCES "If it generates the C sources" ON) -option(ALWAYS_INTERACTIVE "Be interactive by default" OFF) -option(BUILD_BUNDLE "Builds a bundle with all dependencies" ON) -option(FEATURE_COMPILE_GNUISATION "Use gcc gnu extensions to compile the VM" ON) -option(PHARO_DEPENDENCIES_PREFER_DOWNLOAD_BINARIES "Prefer downloading dependencies" OFF) -option(FEATURE_COMPILE_INLINE_MEMORY_ACCESSORS "Use inline memory accessors instead of macros" ON) -option(PHARO_VM_IN_WORKER_THREAD "Have support for pharo running in a different thread that the main one" ON) -option(DEPENDENCIES_FORCE_BUILD "Force build libraries" OFF) -option(BUILD_WITH_GRAPHVIZ "Generate dependency graphs" ON) -option(VERSION_UPDATE_FROM_GIT "Extract version information from git tags. Default to true. Follow vX.Y.Z-suffix" TRUE) +option(VERBOSE_BUILD "Verbose Build" OFF) +option(FEATURE_FFI "Enable FFI" ON) +option(FEATURE_THREADED_FFI "Enable Threaded (running in another thread) FFI" ON) +option(FEATURE_MESSAGE_COUNT "Enable the option to count messages, only valid for StackVM" OFF) +option(FEATURE_NETWORK "Enable network and sockets" ON) +option(FEATURE_JIT_SIMD "Use SIMD support in JIT compilation when available" ON) +option(FEATURE_LIB_SDL2 "Build SDL2 support" ON) +option(FEATURE_LIB_CAIRO "Build Cairo support" ON) +option(FEATURE_LIB_FREETYPE2 "Build freetype2 support" ON) +option(FEATURE_LIB_GIT2 "Build LibGit2 support" ON) +option(FEATURE_LIB_PTHREADW32 "Windows only, link to win32 version of pthread" OFF) +option(GENERATE_VMMAKER "If it generates the VMMaker image" ON) +option(GENERATE_SOURCES "If it generates the C sources" ON) +option(ALWAYS_INTERACTIVE "Be interactive by default" OFF) +option(BUILD_BUNDLE "Builds a bundle with all dependencies" ON) +option(FEATURE_COMPILE_GNUISATION + "Use gcc gnu extensions to compile the VM" ON) +option(FEATURE_COMPILE_INLINE_MEMORY_ACCESSORS + "Use inline memory accessors instead of macros" ON) +option(PHARO_DEPENDENCIES_PREFER_DOWNLOAD_BINARIES + "Prefer downloading dependencies" OFF) +option(PHARO_VM_IN_WORKER_THREAD + "Run the VM in a thread different that the main" ON) +option(DEPENDENCIES_FORCE_BUILD + "Force build libraries" OFF) +option(BUILD_WITH_GRAPHVIZ + "Generate dependency graphs" ON) +option(VERSION_UPDATE_FROM_GIT + "Extract version information from git tags. Default to true. Follow vX.Y.Z-suffix" ON) set(APPNAME "Pharo" CACHE STRING "VM Application name") @@ -248,10 +256,6 @@ else() set(GENERATED_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR} CACHE STRING "Source directory where to find the generated source. Default value is CMAKE_CURRENT_BINARY_DIR") endif() -if (${FEATURE_COMPILE_INLINE_MEMORY_ACCESSORS}) - add_compile_definitions(USE_INLINE_MEMORY_ACCESSORS=1) -endif() - set(PLUGIN_GENERATED_FILES ${GENERATED_SOURCE_DIR}/generated/plugins/src/FilePlugin/FilePlugin.c) @@ -470,6 +474,14 @@ set(VM_SOURCES add_executable(${VM_EXECUTABLE_NAME} ${VM_FRONTEND_APPLICATION_TYPE} ${VM_FRONTEND_SOURCES}) addLibraryWithRPATH(${VM_LIBRARY_NAME} ${VM_SOURCES}) +if (${FEATURE_COMPILE_INLINE_MEMORY_ACCESSORS}) + target_compile_definitions(${VM_LIBRARY_NAME} PRIVATE USE_INLINE_MEMORY_ACCESSORS=1) +endif() + +if(${FEATURE_JIT_SIMD}) + target_compile_definitions(${VM_LIBRARY_NAME} PRIVATE FEATURE_JIT_SIMD=1) +endif() + # # LibFFI # diff --git a/smalltalksrc/Melchor/VMBasicConstants.class.st b/smalltalksrc/Melchor/VMBasicConstants.class.st index 8ff11c859e..ac549ee350 100644 --- a/smalltalksrc/Melchor/VMBasicConstants.class.st +++ b/smalltalksrc/Melchor/VMBasicConstants.class.st @@ -99,6 +99,7 @@ VMBasicConstants class >> namesDefinedAtCompileTime [ will be emitted within #if defined(ANameDefinedAtCompileTime)...#endif." ^#( VMBIGENDIAN IMMUTABILITY + FEATURE_JIT_SIMD STACKVM COGVM SPURVM PharoVM "Pharo vs Squeak" CheckRememberedInTrampoline "IMMUTABILITY" diff --git a/smalltalksrc/VMMaker/CogARMv8Compiler.class.st b/smalltalksrc/VMMaker/CogARMv8Compiler.class.st index 755eb70b68..2207f75e8c 100644 --- a/smalltalksrc/VMMaker/CogARMv8Compiler.class.st +++ b/smalltalksrc/VMMaker/CogARMv8Compiler.class.st @@ -3910,15 +3910,19 @@ CogARMv8Compiler >> fcmpFType: ftype leftRegister: firstRegister rightRegister: { #category : 'concretize' } CogARMv8Compiler >> fillFrom: startMemoryAddr until: endMemoryAddr with: fillReg usingVr: vectorRegister [ - | fillLoop | - cogit DupS: 64 R: fillReg Vr: vectorRegister. - "St1 copies data in 128-bit chunks. This may exceed the size of an object (that only needs to be a multiple of 64 bits). - This overflow is not an issue, however. The reason for that is that objects are allocated sequentially in the Eden space - using a bumpAllocator, which means that the overflowing bits won't overwrite another object. This is true even when reaching - the end of the Eden space, as after it we reserve additional headroom." - fillLoop := cogit St1S: 64 Vr: vectorRegister R: startMemoryAddr Mw: 16. - cogit CmpR: startMemoryAddr R: endMemoryAddr. - cogit JumpAbove: fillLoop. + + self cppIf: #FEATURE_JIT_SIMD + ifTrue: [ | fillLoop | + cogit DupS: 64 R: fillReg Vr: vectorRegister. + "St1 copies data in 128-bit chunks. This may exceed the size of an object (that only needs to be a multiple of 64 bits). + This overflow is not an issue, however. The reason for that is that objects are allocated sequentially in the Eden space + using a bumpAllocator, which means that the overflowing bits won't overwrite another object. This is true even when reaching + the end of the Eden space, as after it we reserve additional headroom." + fillLoop := cogit St1S: 64 Vr: vectorRegister R: startMemoryAddr Mw: 16. + cogit CmpR: startMemoryAddr R: endMemoryAddr. + cogit JumpAbove: fillLoop. + ] ifFalse: [ + super fillFrom: startMemoryAddr until: endMemoryAddr with: fillReg usingVr: vectorRegister ]. ^0 "Necessary to keep Slang happy" ] diff --git a/smalltalksrc/VMMaker/CogX64Compiler.class.st b/smalltalksrc/VMMaker/CogX64Compiler.class.st index e90cda4299..9a5a2e55b1 100644 --- a/smalltalksrc/VMMaker/CogX64Compiler.class.st +++ b/smalltalksrc/VMMaker/CogX64Compiler.class.st @@ -433,6 +433,13 @@ CogX64Compiler >> canZeroExtend [ ^true ] +{ #category : 'testing' } +CogX64Compiler >> checkIs32bit: offset [ + + (offset between: -2147483648 and: 2147483647) ifFalse: [ + self error: 'Cannot jump to distances larger than 32 bits' ] +] + { #category : 'accessing' } CogX64Compiler >> cmpC32RTempByteSize [ ^5 @@ -3596,21 +3603,24 @@ CogX64Compiler >> dispatchConcretizeProcessorSpecific [ { #category : 'concretize' } CogX64Compiler >> fillFrom: startMemoryAddr until: endMemoryAddr with: fillReg usingVr: vectorRegister [ - | fillLoop | - - cogit MoveR: fillReg Mw: 0 r: startMemoryAddr. - cogit AddCq: 8 R: startMemoryAddr. - - cogit AndCq: 16rFFFFFFFFFFFFFFF0 R: startMemoryAddr. - cogit DupS: 64 R: fillReg Vr: vectorRegister. - "St1 copies data in 128-bit chunks. This may exceed the size of an object (that only needs to be a multiple of 64 bits). - This overflow is not an issue, however. The reason for that is that objects are allocated sequentially in the Eden space - using a bumpAllocator, which means that the overflowing bits won't overwrite another object. This is true even when reaching - the end of the Eden space, as after it we reserve additional headroom." - fillLoop := cogit AlignedSt1S: 64 Vr: vectorRegister R: startMemoryAddr Mw: 16. - cogit CmpR: startMemoryAddr R: endMemoryAddr. - cogit JumpAbove: fillLoop. + self cppIf: #FEATURE_JIT_SIMD + ifTrue: [ | fillLoop | + cogit MoveR: fillReg Mw: 0 r: startMemoryAddr. + cogit AddCq: 8 R: startMemoryAddr. + + cogit AndCq: 16rFFFFFFFFFFFFFFF0 R: startMemoryAddr. + + cogit DupS: 64 R: fillReg Vr: vectorRegister. + "St1 copies data in 128-bit chunks. This may exceed the size of an object (that only needs to be a multiple of 64 bits). + This overflow is not an issue, however. The reason for that is that objects are allocated sequentially in the Eden space + using a bumpAllocator, which means that the overflowing bits won't overwrite another object. This is true even when reaching + the end of the Eden space, as after it we reserve additional headroom." + fillLoop := cogit AlignedSt1S: 64 Vr: vectorRegister R: startMemoryAddr Mw: 16. + cogit CmpR: startMemoryAddr R: endMemoryAddr. + cogit JumpAbove: fillLoop. + ] ifFalse: [ + super fillFrom: startMemoryAddr until: endMemoryAddr with: fillReg usingVr: vectorRegister ]. ^0 "Necessary to keep Slang happy" ] @@ -4276,13 +4286,6 @@ CogX64Compiler >> is32BitSignedImmediate: a64BitUnsignedOperand [ inSmalltalk: [((a64BitUnsignedOperand >> 32) signedIntFromLong + 1 bitXor: 1) = (a64BitUnsignedOperand >> 31 bitAnd: 1)] ] -{ #category : 'testing' } -CogX64Compiler >> checkIs32bit: offset [ - - (offset between: -2147483648 and: 2147483647) ifFalse: [ - self error: 'Cannot jump to distances larger than 32 bits' ] -] - { #category : 'testing' } CogX64Compiler >> isAddressRelativeToVarBase: varAddress [ "Support for addressing variables off the dedicated VarBaseReg. Allow for 1Mb of variables. diff --git a/smalltalksrc/VMMaker/Cogit.class.st b/smalltalksrc/VMMaker/Cogit.class.st index 7f0e74b5fd..4756c0dfb2 100644 --- a/smalltalksrc/VMMaker/Cogit.class.st +++ b/smalltalksrc/VMMaker/Cogit.class.st @@ -1588,6 +1588,8 @@ Cogit >> AddcR: reg1 R: reg2 [ { #category : 'abstract instructions' } Cogit >> AlignedSt1S: aSize Vr: aVectorRegister R: aMemoryRegister Mw: anOffset [ + + ^ self gen: AlignedSt1VrRMw operand: aSize @@ -1877,6 +1879,7 @@ Cogit >> DivRs: dpreg1 Rs: dpreg2 [ { #category : 'abstract instructions' } Cogit >> DupS: aSize R: aGeneralRegister Vr: aVectorRegister [ + ^ self gen: DupRVr operand: aSize @@ -2771,6 +2774,8 @@ Cogit >> SqrtRs: dpreg [ { #category : 'abstract instructions' } Cogit >> St1S: aSize Vr: aVectorRegister R: aMemoryRegister Mw: anOffset [ + + ^ self gen: St1VrRMw operand: aSize diff --git a/smalltalksrc/VMMaker/StackToRegisterMappingCogit.class.st b/smalltalksrc/VMMaker/StackToRegisterMappingCogit.class.st index cedcb373b9..c64e63becf 100644 --- a/smalltalksrc/VMMaker/StackToRegisterMappingCogit.class.st +++ b/smalltalksrc/VMMaker/StackToRegisterMappingCogit.class.st @@ -765,6 +765,7 @@ StackToRegisterMappingCogit >> allocateRegNotConflictingWith: regMask [ StackToRegisterMappingCogit >> allocateVectorRegForStackEntryAt: index notConflictingWith: regMask [ "If the stack entry is already in a register not conflicting with regMask, answers it, else allocate a new register not conflicting with reg mask" + | stackEntry mask | @@ -792,6 +793,8 @@ StackToRegisterMappingCogit >> allocateVectorRegForStackEntryAt: index notConfli { #category : 'simulation stack' } StackToRegisterMappingCogit >> allocateVectorRegNotConflictingWith: regMask [ + + | reg | "if there's a free register, use it" reg := backEnd availableVectorRegisterOrNoneFor: (self liveVectorRegisters bitOr: regMask). @@ -1368,6 +1371,7 @@ StackToRegisterMappingCogit >> freeAnyRegNotConflictingWith: regMask [ StackToRegisterMappingCogit >> genAddFloat32Vector [ | array1Reg array2Reg sumReg | + objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ]. array2Reg := self allocateVectorRegForStackEntryAt: 0 notConflictingWith: 0. @@ -1387,6 +1391,7 @@ StackToRegisterMappingCogit >> genAddFloat32Vector [ StackToRegisterMappingCogit >> genAddFloat64Vector [ | array1Reg array2Reg sumReg | + objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ]. array2Reg := self allocateVectorRegForStackEntryAt: 0 notConflictingWith: 0. @@ -1938,52 +1943,25 @@ StackToRegisterMappingCogit >> genLoadTemp: objectIndex in: destReg [ { #category : 'mapped inline primitive generators' } StackToRegisterMappingCogit >> genMappedInlinePrimitive: primIndex [ - "SistaV1: 236 11101100 iiiiiiii callMappedInlinedPrimitive" - - "Number of arguments: - 0-49 nullary - 50-99 unary - 100-149 binary - 150-199 trinary - 200-255 variable" - - "Specification: - 50 EnsureEnoughWords - literal which is a Smi => ret value is receiver - 150 immCheckPointerAt:put: - pointer object (Fixed sized or not) and not a context, Smi, Anything => arg2 (1-based, optimised if arg1 is a constant) - 151 immCheckStoreCheckPointerAt:put: - pointer object (Fixed sized or not) and not a context, Smi, Anything => arg2 (1-based, optimised if arg1 is a constant) - 152 immCheckMaybeContextPointerAt:put: - pointer object (Fixed sized or not), Smi, Anything => arg2 (1-based, optimised if arg1 is a constant) - 153 immCheckMaybeContextStoreCheckPointerAt:put: - pointer object (Fixed sized or not), Smi, Anything => arg2 (1-based, optimised if arg1 is a constant) - 154 immCheckByteAt:put: - byte object, Smi, 8 bits unsigned Smi => arg2 (1-based, optimised if arg1 is a constant) - 155 immCheckShortAt:put: - short object, Smi, 16 bits unsigned Smi => arg2 (1-based, optimised if arg1 is a constant) - 156 immCheckWordAt:put: - word object, Smi, 32 bits unsigned Smi => arg2 (1-based, optimised if arg1 is a constant) - 157 immCheckDoubleWordAt:put: - double word object, Smi, 64 bits unsigned Smi or LargePositiveInteger => arg2 (1-based, optimised if arg1 is a constant) - 250 directCall - method to call on top of stack => (variable number of parameters)" - - | result | - result := primIndex - caseOf: { - ([ 0 ] -> [ self genAddFloat64Vector ]). - ([ 1 ] -> [ self genPushFloat64ArrayToRegister ]). - ([ 2 ] -> [ self genStoreFloat64RegisterIntoArray ]). - ([ 3 ] -> [ self genAddFloat32Vector ]). - ([ 4 ] -> [ self genPushFloat32ArrayToRegister ]). - ([ 5 ] -> [ self genStoreFloat32RegisterIntoArray ]). - ([ 6 ] -> [ self genSubFloat64Vector ]) } - otherwise: [ EncounteredUnknownBytecode ]. - "These primitives may end up calling a message send if preconditions are not met. Thus, the + self + cppIf: #FEATURE_JIT_SIMD + ifTrue: [ + | result | + result := primIndex + caseOf: { + ([ 0 ] -> [ self genAddFloat64Vector ]). + ([ 1 ] -> [ self genPushFloat64ArrayToRegister ]). + ([ 2 ] -> [ self genStoreFloat64RegisterIntoArray ]). + ([ 3 ] -> [ self genAddFloat32Vector ]). + ([ 4 ] -> [ self genPushFloat32ArrayToRegister ]). + ([ 5 ] -> [ self genStoreFloat32RegisterIntoArray ]). + ([ 6 ] -> [ self genSubFloat64Vector ]) } + otherwise: [ EncounteredUnknownBytecode ]. + "These primitives may end up calling a message send if preconditions are not met. Thus, the bytecode needs to be annotated with `isMapped`, and each primitive must be annotated" - self annotateBytecode: self Label. - ^ result + self annotateBytecode: self Label. + ^ result ] + ifFalse: [ self error: 'VM built without SIMD support' ] ] { #category : 'bytecode generator support' } @@ -2163,6 +2141,7 @@ StackToRegisterMappingCogit >> genPushFloat32ArrayToRegister [ | arrayReg indexReg vectorReg | + objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ]. arrayReg := self allocateRegForStackEntryAt: 0 notConflictingWith: 0. @@ -2193,6 +2172,7 @@ StackToRegisterMappingCogit >> genPushFloat64ArrayToRegister [ | arrayReg indexReg vectorReg | + objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ]. arrayReg := self allocateRegForStackEntryAt: 0 notConflictingWith: 0. @@ -2792,6 +2772,7 @@ StackToRegisterMappingCogit >> genStaticallyResolvedSpecialSelectorComparison [ StackToRegisterMappingCogit >> genStoreFloat32RegisterIntoArray [ | arrayReg indexReg vectorReg | + objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ]. arrayReg := self allocateRegForStackEntryAt: 0 notConflictingWith: 0. @@ -2821,6 +2802,7 @@ StackToRegisterMappingCogit >> genStoreFloat32RegisterIntoArray [ StackToRegisterMappingCogit >> genStoreFloat64RegisterIntoArray [ | arrayReg indexReg vectorReg | + objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ]. arrayReg := self allocateRegForStackEntryAt: 0 notConflictingWith: 0. @@ -3010,6 +2992,7 @@ StackToRegisterMappingCogit >> genStorePop: popBoolean TemporaryVariable: tempIn StackToRegisterMappingCogit >> genSubFloat64Vector [ | array1Reg array2Reg subReg | + objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ]. array1Reg := self allocateVectorRegForStackEntryAt: 0 notConflictingWith: 0.