Skip to content

Add FEATURE_JIT_SIMD option #966

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: pharo-12
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 37 additions & 25 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,35 @@ include(cmake/macros.cmake)
message(STATUS "CMAKE_GENERATOR=${CMAKE_GENERATOR}")

# Build options
option(VERBOSE_BUILD "Verbose Build" OFF)
option(FEATURE_FFI "Enable FFI" ON)
option(FEATURE_THREADED_FFI "Enable Threaded (running in another thread) FFI" ON)
option(FEATURE_MESSAGE_COUNT "Enable the option to count messages, only valid for StackVM" OFF)
option(FEATURE_NETWORK "Enable network and sockets" ON)
option(FEATURE_LIB_SDL2 "Build SDL2 support" ON)
option(FEATURE_LIB_CAIRO "Build Cairo support" ON)
option(FEATURE_LIB_FREETYPE2 "Build freetype2 support" ON)
option(FEATURE_LIB_GIT2 "Build LibGit2 support" ON)
option(FEATURE_LIB_PTHREADW32 "Windows only, link to win32 version of pthread" OFF)
option(GENERATE_VMMAKER "If it generates the VMMaker image" ON)
option(GENERATE_SOURCES "If it generates the C sources" ON)
option(ALWAYS_INTERACTIVE "Be interactive by default" OFF)
option(BUILD_BUNDLE "Builds a bundle with all dependencies" ON)
option(FEATURE_COMPILE_GNUISATION "Use gcc gnu extensions to compile the VM" ON)
option(PHARO_DEPENDENCIES_PREFER_DOWNLOAD_BINARIES "Prefer downloading dependencies" OFF)
option(FEATURE_COMPILE_INLINE_MEMORY_ACCESSORS "Use inline memory accessors instead of macros" ON)
option(PHARO_VM_IN_WORKER_THREAD "Have support for pharo running in a different thread that the main one" ON)
option(DEPENDENCIES_FORCE_BUILD "Force build libraries" OFF)
option(BUILD_WITH_GRAPHVIZ "Generate dependency graphs" ON)
option(VERSION_UPDATE_FROM_GIT "Extract version information from git tags. Default to true. Follow vX.Y.Z-suffix" TRUE)
option(VERBOSE_BUILD "Verbose Build" OFF)
option(FEATURE_FFI "Enable FFI" ON)
option(FEATURE_THREADED_FFI "Enable Threaded (running in another thread) FFI" ON)
option(FEATURE_MESSAGE_COUNT "Enable the option to count messages, only valid for StackVM" OFF)
option(FEATURE_NETWORK "Enable network and sockets" ON)
option(FEATURE_JIT_SIMD "Use SIMD support in JIT compilation when available" ON)
option(FEATURE_LIB_SDL2 "Build SDL2 support" ON)
option(FEATURE_LIB_CAIRO "Build Cairo support" ON)
option(FEATURE_LIB_FREETYPE2 "Build freetype2 support" ON)
option(FEATURE_LIB_GIT2 "Build LibGit2 support" ON)
option(FEATURE_LIB_PTHREADW32 "Windows only, link to win32 version of pthread" OFF)
option(GENERATE_VMMAKER "If it generates the VMMaker image" ON)
option(GENERATE_SOURCES "If it generates the C sources" ON)
option(ALWAYS_INTERACTIVE "Be interactive by default" OFF)
option(BUILD_BUNDLE "Builds a bundle with all dependencies" ON)
option(FEATURE_COMPILE_GNUISATION
"Use gcc gnu extensions to compile the VM" ON)
option(FEATURE_COMPILE_INLINE_MEMORY_ACCESSORS
"Use inline memory accessors instead of macros" ON)
option(PHARO_DEPENDENCIES_PREFER_DOWNLOAD_BINARIES
"Prefer downloading dependencies" OFF)
option(PHARO_VM_IN_WORKER_THREAD
"Run the VM in a thread different that the main" ON)
option(DEPENDENCIES_FORCE_BUILD
"Force build libraries" OFF)
option(BUILD_WITH_GRAPHVIZ
"Generate dependency graphs" ON)
option(VERSION_UPDATE_FROM_GIT
"Extract version information from git tags. Default to true. Follow vX.Y.Z-suffix" ON)
Comment on lines +20 to +48
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I formatted a bit this code, the actually important line is line 25 adding

FEATURE_JIT_SIMD

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you formatted or unformatted the code 😆



set(APPNAME "Pharo" CACHE STRING "VM Application name")
Expand Down Expand Up @@ -248,10 +256,6 @@ else()
set(GENERATED_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR} CACHE STRING "Source directory where to find the generated source. Default value is CMAKE_CURRENT_BINARY_DIR")
endif()

if (${FEATURE_COMPILE_INLINE_MEMORY_ACCESSORS})
add_compile_definitions(USE_INLINE_MEMORY_ACCESSORS=1)
endif()

Comment on lines -251 to -254
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cleaned a bit these compile time definitions to make them local to the VM library compilation and not global options to the entire project.

set(PLUGIN_GENERATED_FILES
${GENERATED_SOURCE_DIR}/generated/plugins/src/FilePlugin/FilePlugin.c)

Expand Down Expand Up @@ -470,6 +474,14 @@ set(VM_SOURCES
add_executable(${VM_EXECUTABLE_NAME} ${VM_FRONTEND_APPLICATION_TYPE} ${VM_FRONTEND_SOURCES})
addLibraryWithRPATH(${VM_LIBRARY_NAME} ${VM_SOURCES})

if (${FEATURE_COMPILE_INLINE_MEMORY_ACCESSORS})
target_compile_definitions(${VM_LIBRARY_NAME} PRIVATE USE_INLINE_MEMORY_ACCESSORS=1)
endif()

if(${FEATURE_JIT_SIMD})
target_compile_definitions(${VM_LIBRARY_NAME} PRIVATE FEATURE_JIT_SIMD=1)
endif()

#
# LibFFI
#
Expand Down
1 change: 1 addition & 0 deletions smalltalksrc/Melchor/VMBasicConstants.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ VMBasicConstants class >> namesDefinedAtCompileTime [
will be emitted within #if defined(ANameDefinedAtCompileTime)...#endif."
^#( VMBIGENDIAN
IMMUTABILITY
FEATURE_JIT_SIMD
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to move this VMBasicConstants class >> namesDefinedAtCompileTime somewhere else, it looks like a strange dependency.

STACKVM COGVM SPURVM
PharoVM "Pharo vs Squeak"
CheckRememberedInTrampoline "IMMUTABILITY"
Expand Down
22 changes: 13 additions & 9 deletions smalltalksrc/VMMaker/CogARMv8Compiler.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -3910,15 +3910,19 @@ CogARMv8Compiler >> fcmpFType: ftype leftRegister: firstRegister rightRegister:
{ #category : 'concretize' }
CogARMv8Compiler >> fillFrom: startMemoryAddr until: endMemoryAddr with: fillReg usingVr: vectorRegister [
<inline: true>
| fillLoop |
cogit DupS: 64 R: fillReg Vr: vectorRegister.
"St1 copies data in 128-bit chunks. This may exceed the size of an object (that only needs to be a multiple of 64 bits).
This overflow is not an issue, however. The reason for that is that objects are allocated sequentially in the Eden space
using a bumpAllocator, which means that the overflowing bits won't overwrite another object. This is true even when reaching
the end of the Eden space, as after it we reserve additional headroom."
fillLoop := cogit St1S: 64 Vr: vectorRegister R: startMemoryAddr Mw: 16.
cogit CmpR: startMemoryAddr R: endMemoryAddr.
cogit JumpAbove: fillLoop.

self cppIf: #FEATURE_JIT_SIMD
ifTrue: [ | fillLoop |
cogit DupS: 64 R: fillReg Vr: vectorRegister.
"St1 copies data in 128-bit chunks. This may exceed the size of an object (that only needs to be a multiple of 64 bits).
This overflow is not an issue, however. The reason for that is that objects are allocated sequentially in the Eden space
using a bumpAllocator, which means that the overflowing bits won't overwrite another object. This is true even when reaching
the end of the Eden space, as after it we reserve additional headroom."
fillLoop := cogit St1S: 64 Vr: vectorRegister R: startMemoryAddr Mw: 16.
cogit CmpR: startMemoryAddr R: endMemoryAddr.
cogit JumpAbove: fillLoop.
] ifFalse: [
super fillFrom: startMemoryAddr until: endMemoryAddr with: fillReg usingVr: vectorRegister ].

^0 "Necessary to keep Slang happy"
]
Expand Down
45 changes: 24 additions & 21 deletions smalltalksrc/VMMaker/CogX64Compiler.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,13 @@ CogX64Compiler >> canZeroExtend [
^true
]

{ #category : 'testing' }
CogX64Compiler >> checkIs32bit: offset [

(offset between: -2147483648 and: 2147483647) ifFalse: [
self error: 'Cannot jump to distances larger than 32 bits' ]
]

{ #category : 'accessing' }
CogX64Compiler >> cmpC32RTempByteSize [
^5
Expand Down Expand Up @@ -3596,21 +3603,24 @@ CogX64Compiler >> dispatchConcretizeProcessorSpecific [
{ #category : 'concretize' }
CogX64Compiler >> fillFrom: startMemoryAddr until: endMemoryAddr with: fillReg usingVr: vectorRegister [
<inline: true>
| fillLoop |

cogit MoveR: fillReg Mw: 0 r: startMemoryAddr.
cogit AddCq: 8 R: startMemoryAddr.

cogit AndCq: 16rFFFFFFFFFFFFFFF0 R: startMemoryAddr.

cogit DupS: 64 R: fillReg Vr: vectorRegister.
"St1 copies data in 128-bit chunks. This may exceed the size of an object (that only needs to be a multiple of 64 bits).
This overflow is not an issue, however. The reason for that is that objects are allocated sequentially in the Eden space
using a bumpAllocator, which means that the overflowing bits won't overwrite another object. This is true even when reaching
the end of the Eden space, as after it we reserve additional headroom."
fillLoop := cogit AlignedSt1S: 64 Vr: vectorRegister R: startMemoryAddr Mw: 16.
cogit CmpR: startMemoryAddr R: endMemoryAddr.
cogit JumpAbove: fillLoop.
self cppIf: #FEATURE_JIT_SIMD
ifTrue: [ | fillLoop |
cogit MoveR: fillReg Mw: 0 r: startMemoryAddr.
cogit AddCq: 8 R: startMemoryAddr.

cogit AndCq: 16rFFFFFFFFFFFFFFF0 R: startMemoryAddr.

cogit DupS: 64 R: fillReg Vr: vectorRegister.
"St1 copies data in 128-bit chunks. This may exceed the size of an object (that only needs to be a multiple of 64 bits).
This overflow is not an issue, however. The reason for that is that objects are allocated sequentially in the Eden space
using a bumpAllocator, which means that the overflowing bits won't overwrite another object. This is true even when reaching
the end of the Eden space, as after it we reserve additional headroom."
fillLoop := cogit AlignedSt1S: 64 Vr: vectorRegister R: startMemoryAddr Mw: 16.
cogit CmpR: startMemoryAddr R: endMemoryAddr.
cogit JumpAbove: fillLoop.
] ifFalse: [
super fillFrom: startMemoryAddr until: endMemoryAddr with: fillReg usingVr: vectorRegister ].

^0 "Necessary to keep Slang happy"
]
Expand Down Expand Up @@ -4276,13 +4286,6 @@ CogX64Compiler >> is32BitSignedImmediate: a64BitUnsignedOperand [
inSmalltalk: [((a64BitUnsignedOperand >> 32) signedIntFromLong + 1 bitXor: 1) = (a64BitUnsignedOperand >> 31 bitAnd: 1)]
]

{ #category : 'testing' }
CogX64Compiler >> checkIs32bit: offset [

(offset between: -2147483648 and: 2147483647) ifFalse: [
self error: 'Cannot jump to distances larger than 32 bits' ]
]

Comment on lines -4279 to -4285
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method has just been reordered by tonel...

{ #category : 'testing' }
CogX64Compiler >> isAddressRelativeToVarBase: varAddress [
"Support for addressing variables off the dedicated VarBaseReg. Allow for 1Mb of variables.
Expand Down
5 changes: 5 additions & 0 deletions smalltalksrc/VMMaker/Cogit.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -1588,6 +1588,8 @@ Cogit >> AddcR: reg1 R: reg2 [

{ #category : 'abstract instructions' }
Cogit >> AlignedSt1S: aSize Vr: aVectorRegister R: aMemoryRegister Mw: anOffset [

<option: #FEATURE_JIT_SIMD>
^ self
gen: AlignedSt1VrRMw
operand: aSize
Expand Down Expand Up @@ -1877,6 +1879,7 @@ Cogit >> DivRs: dpreg1 Rs: dpreg2 [
{ #category : 'abstract instructions' }
Cogit >> DupS: aSize R: aGeneralRegister Vr: aVectorRegister [

<option: #FEATURE_JIT_SIMD>
^ self
gen: DupRVr
operand: aSize
Expand Down Expand Up @@ -2771,6 +2774,8 @@ Cogit >> SqrtRs: dpreg [

{ #category : 'abstract instructions' }
Cogit >> St1S: aSize Vr: aVectorRegister R: aMemoryRegister Mw: anOffset [

<option: #FEATURE_JIT_SIMD>
^ self
gen: St1VrRMw
operand: aSize
Expand Down
73 changes: 28 additions & 45 deletions smalltalksrc/VMMaker/StackToRegisterMappingCogit.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,7 @@ StackToRegisterMappingCogit >> allocateRegNotConflictingWith: regMask [
StackToRegisterMappingCogit >> allocateVectorRegForStackEntryAt: index notConflictingWith: regMask [
"If the stack entry is already in a register not conflicting with regMask, answers it,
else allocate a new register not conflicting with reg mask"
<option: #FEATURE_JIT_SIMD>
<var: #stackEntry type: #'CogSimStackEntry *'>
<returnTypeC: #'signed char'>
| stackEntry mask |
Expand Down Expand Up @@ -792,6 +793,8 @@ StackToRegisterMappingCogit >> allocateVectorRegForStackEntryAt: index notConfli

{ #category : 'simulation stack' }
StackToRegisterMappingCogit >> allocateVectorRegNotConflictingWith: regMask [

<option: #FEATURE_JIT_SIMD>
| reg |
"if there's a free register, use it"
reg := backEnd availableVectorRegisterOrNoneFor: (self liveVectorRegisters bitOr: regMask).
Expand Down Expand Up @@ -1368,6 +1371,7 @@ StackToRegisterMappingCogit >> freeAnyRegNotConflictingWith: regMask [
StackToRegisterMappingCogit >> genAddFloat32Vector [
| array1Reg array2Reg sumReg |

<option: #FEATURE_JIT_SIMD>
objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ].

array2Reg := self allocateVectorRegForStackEntryAt: 0 notConflictingWith: 0.
Expand All @@ -1387,6 +1391,7 @@ StackToRegisterMappingCogit >> genAddFloat32Vector [
StackToRegisterMappingCogit >> genAddFloat64Vector [
| array1Reg array2Reg sumReg |

<option: #FEATURE_JIT_SIMD>
objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ].

array2Reg := self allocateVectorRegForStackEntryAt: 0 notConflictingWith: 0.
Expand Down Expand Up @@ -1938,52 +1943,25 @@ StackToRegisterMappingCogit >> genLoadTemp: objectIndex in: destReg [
{ #category : 'mapped inline primitive generators' }
StackToRegisterMappingCogit >> genMappedInlinePrimitive: primIndex [

"SistaV1: 236 11101100 iiiiiiii callMappedInlinedPrimitive"

"Number of arguments:
0-49 nullary
50-99 unary
100-149 binary
150-199 trinary
200-255 variable"

"Specification:
50 EnsureEnoughWords
literal which is a Smi => ret value is receiver
150 immCheckPointerAt:put:
pointer object (Fixed sized or not) and not a context, Smi, Anything => arg2 (1-based, optimised if arg1 is a constant)
151 immCheckStoreCheckPointerAt:put:
pointer object (Fixed sized or not) and not a context, Smi, Anything => arg2 (1-based, optimised if arg1 is a constant)
152 immCheckMaybeContextPointerAt:put:
pointer object (Fixed sized or not), Smi, Anything => arg2 (1-based, optimised if arg1 is a constant)
153 immCheckMaybeContextStoreCheckPointerAt:put:
pointer object (Fixed sized or not), Smi, Anything => arg2 (1-based, optimised if arg1 is a constant)
154 immCheckByteAt:put:
byte object, Smi, 8 bits unsigned Smi => arg2 (1-based, optimised if arg1 is a constant)
155 immCheckShortAt:put:
short object, Smi, 16 bits unsigned Smi => arg2 (1-based, optimised if arg1 is a constant)
156 immCheckWordAt:put:
word object, Smi, 32 bits unsigned Smi => arg2 (1-based, optimised if arg1 is a constant)
157 immCheckDoubleWordAt:put:
double word object, Smi, 64 bits unsigned Smi or LargePositiveInteger => arg2 (1-based, optimised if arg1 is a constant)
250 directCall
method to call on top of stack => (variable number of parameters)"

| result |
result := primIndex
caseOf: {
([ 0 ] -> [ self genAddFloat64Vector ]).
([ 1 ] -> [ self genPushFloat64ArrayToRegister ]).
([ 2 ] -> [ self genStoreFloat64RegisterIntoArray ]).
([ 3 ] -> [ self genAddFloat32Vector ]).
([ 4 ] -> [ self genPushFloat32ArrayToRegister ]).
([ 5 ] -> [ self genStoreFloat32RegisterIntoArray ]).
([ 6 ] -> [ self genSubFloat64Vector ]) }
otherwise: [ EncounteredUnknownBytecode ].
"These primitives may end up calling a message send if preconditions are not met. Thus, the
self
cppIf: #FEATURE_JIT_SIMD
ifTrue: [
| result |
result := primIndex
caseOf: {
([ 0 ] -> [ self genAddFloat64Vector ]).
([ 1 ] -> [ self genPushFloat64ArrayToRegister ]).
([ 2 ] -> [ self genStoreFloat64RegisterIntoArray ]).
([ 3 ] -> [ self genAddFloat32Vector ]).
([ 4 ] -> [ self genPushFloat32ArrayToRegister ]).
([ 5 ] -> [ self genStoreFloat32RegisterIntoArray ]).
([ 6 ] -> [ self genSubFloat64Vector ]) }
otherwise: [ EncounteredUnknownBytecode ].
"These primitives may end up calling a message send if preconditions are not met. Thus, the
bytecode needs to be annotated with `isMapped`, and each primitive must be annotated"
self annotateBytecode: self Label.
^ result
self annotateBytecode: self Label.
^ result ]
ifFalse: [ self error: 'VM built without SIMD support' ]
]

{ #category : 'bytecode generator support' }
Expand Down Expand Up @@ -2163,6 +2141,7 @@ StackToRegisterMappingCogit >> genPushFloat32ArrayToRegister [

| arrayReg indexReg vectorReg |

<option: #FEATURE_JIT_SIMD>
objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ].

arrayReg := self allocateRegForStackEntryAt: 0 notConflictingWith: 0.
Expand Down Expand Up @@ -2193,6 +2172,7 @@ StackToRegisterMappingCogit >> genPushFloat64ArrayToRegister [

| arrayReg indexReg vectorReg |

<option: #FEATURE_JIT_SIMD>
objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ].

arrayReg := self allocateRegForStackEntryAt: 0 notConflictingWith: 0.
Expand Down Expand Up @@ -2792,6 +2772,7 @@ StackToRegisterMappingCogit >> genStaticallyResolvedSpecialSelectorComparison [
StackToRegisterMappingCogit >> genStoreFloat32RegisterIntoArray [
| arrayReg indexReg vectorReg |

<option: #FEATURE_JIT_SIMD>
objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ].

arrayReg := self allocateRegForStackEntryAt: 0 notConflictingWith: 0.
Expand Down Expand Up @@ -2821,6 +2802,7 @@ StackToRegisterMappingCogit >> genStoreFloat32RegisterIntoArray [
StackToRegisterMappingCogit >> genStoreFloat64RegisterIntoArray [
| arrayReg indexReg vectorReg |

<option: #FEATURE_JIT_SIMD>
objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ].

arrayReg := self allocateRegForStackEntryAt: 0 notConflictingWith: 0.
Expand Down Expand Up @@ -3010,6 +2992,7 @@ StackToRegisterMappingCogit >> genStorePop: popBoolean TemporaryVariable: tempIn
StackToRegisterMappingCogit >> genSubFloat64Vector [
| array1Reg array2Reg subReg |

<option: #FEATURE_JIT_SIMD>
objectMemory wordSize = 4 ifTrue: [ ^ EncounteredUnknownBytecode ].

array1Reg := self allocateVectorRegForStackEntryAt: 0 notConflictingWith: 0.
Expand Down