diff --git a/Extends.md b/Extends.md deleted file mode 100644 index 5c7140b..0000000 --- a/Extends.md +++ /dev/null @@ -1,20 +0,0 @@ -# Struct Inheritance for WGSL - -(TBD) - -It’s convenient to be able to express structures as combinations of other structures as in Java/TypeScript. - -wgsl-linker currently supports an `extends` syntax for struct inheritance (but `extends` is only lightly -used upstream). - -Combining structures by simple composition is possible in standard wgsl. -by referencing through a member field. But: - -* Using fields of the referenced struct requires an additional field dereference "." in the syntax - for each level of indirection. -* Some abstractions may be hard to express w/o inheritance (since we also don't have interfaces in wgsl). - -Issues: - -* TBD demonstrate an example or drop the feature. -* Also, would generics for structs be an alternate way to parameterize composition? diff --git a/GLOSSARY.md b/GLOSSARY.md index f3fc6c4..0b7b6cb 100644 --- a/GLOSSARY.md +++ b/GLOSSARY.md @@ -1,15 +1,19 @@ # Glossary of WGSL Importing terms - WESL: The extended WGSL language, and is pronounced like "weasel". Stands for WGSL Extended Shading Language -- Importable item +- Global Items: - Structs - Functions - Type aliases - [Const declarations, override declarations](https://www.w3.org/TR/WGSL/#value-decls) - [Var declarations](https://www.w3.org/TR/WGSL/#var-decls) -- Module: A single WESL file -- Root Module: A WESL module from which compilation starts. A single project can have many root modules. -- Module Path: Hierarchical address of a module file or partial path, akin to a filesystem path + - Modules +- WESL File: A single WESL file +- Main File: A WESL file from which compilation starts. A single project can have many main files. +- WESL File Path: Hierarchical or relative address of a file, akin to a filesystem path with additional restrictions +- Namespace: Either the collection of symbols contained within the global scope, or the contents of a module +- Module: A construct appearing within a WESL file that groups global items together. +- Module Path: Hierarchical or relative address of a module. Similar in concept to accessing nested namespaces or modules in other languages. - Side effects: WGSL code that can affect other modules when imported - Things that are specified when [creating a WGSL pipeline](https://developer.mozilla.org/en-US/docs/Web/API/GPUDevice/createRenderPipeline#fragment_object_structure) - Shader entry-points diff --git a/GenericModules.md b/GenericModules.md new file mode 100644 index 0000000..09579b2 --- /dev/null +++ b/GenericModules.md @@ -0,0 +1,432 @@ +# Generics Modules for WGSL + +# Summary + +We propose adding a mechanism to allow generic programming using WESL modules as an extension to the language. + +## Assumptions + +Assumes that [`load`](./Imports.md), [Modules](./Modules.md) and [Module Interfaces](./ModulesInterfaces.md) have already been implemented. + +# Motivation + +Generic programming is useful for WESL, particularly for libraries. + +With generic modules, operations like `Reduce` or `PrefixSum` wouldn't need to be manually rewritten for each combination of element type and binary operation. + +Larger projects like GPU accelerated particle systems could also benefit from generic modules. This is because the +encapsulation of types, functions and other globals in modules provides a means of structuring libraries in a way that is +readily user extensible, reusable, and maintainable. + + +# Guide-level explanation + +Generic modules and signatures are declared using the `mod` keyword and like generic types and built-in functions, use angle brackets (`<>`) to denote the generic parameters. Parameters are permitted to be other modules. + +An insantiation of a generic module can be declared inline when used, added to the current namespace using [`include`](./Include.md) (if implemented) or aliased to give the module a concrete name. + +In addition to generic modules, this proposal also requires that [module signatures](./ModulesInterfaces.md) support generic parameters. Module signatures may be used to constrain the type of a generic argument. +Generic module signatures in generic type constraints may additionally use `_` as a "hole" in arguments to indicate that the user doesn't care about the type of a particular generic parameter. + +Below is an annotated example of how generic modules may be used in practise. This has been translated from the +[StoneBerry WebGPU Repository](https://github.com/stoneberry-webgpu/) into the proposed WESL format + +```rescript +// Module signature that simply exposes the single type T. Could perhaps later be sugared to elide the module in follow-up +// work +mod sig Type { + type T; +} + +// Module signature that exposes a single constant `value`. Could perhaps later be sugared to elide the module in follow-up work +mod sig Const { + const value: Type::T; +} + +// Abstract representation of a binary operation. +mod sig BinaryOp { + // This is a common pattern to allow transfer of type information from generic input to output module + type LoadElem : LoadElem::T; + type OpElem : OpElem::T; + fn identityOp() -> OpElem; + fn loadOp(a: LoadElem::T) -> OpElem; + fn binaryOp(a: OpElem, b: OpElem) -> OpElem; +} + +// In future, modules representing numbers, vectors, matrices and other built in types +// would be part of the standard library. But lets define some common operations for now +mod sig Number { + type T; + + fn add(a: T, b: T) -> T; + fn identity() -> T; +} + +mod Sum { + struct T { + sum: N::T; + } +} + + +mod SumBinaryOp -> BinaryOp, Sum> { + alias OpElem = Sum::T; + alias LoadElem = Sum::T; + + fn identityOp() -> OpElem { + return OpElem(); + } + + fn loadOp(a: LoadElem) -> OpElem { + return OpElem(a.sum); + } + + fn binaryOp(a: OpElem, b: OpElem) -> OpElem { + return OpElem(N::add(a.sum, b.sum)); + } +} + +mod F32 { + alias T = f32; + + fn add(a: T, b: T) -> T { + return a + b; + } + + fn identity() -> T { + return 0.0; + } +} + +mod U32 { + alias T = u32; +} + +// Here we don't care about the exact generic mod values +// passed to BinaryOp as we can extract the underlying types from the module +// members +mod ReduceWorkgroup, WorkSize: Const, Threads: Const> { + var work: array; + fn reduceWorkgroup(localId: u32) { + let workDex = localId << 1u; + for (var step = 1u; step < Threads::value; step <<= 1u) { + workgroupBarrier(); + if localId % step == 0u { + work[workDex] = Op::binaryOp(work[workDex], work[workDex + step]); + } + } + } +} + +// Same here +mod ReduceBuffer, BlockArea: Const, WorkSize: Const, Threads: Const> { + // Including brings the module members into the namespace + include ReduceWorkgroup; + + alias Input = Op::LoadElem::T; + alias Output = Op::OpElem::T; + + struct Uniforms { + sourceOffset: u32, // offset in Input elements to start reading in the source + resultOffset: u32, // offset in Output elements to start writing in the results + } + + @group(0) @binding(0) var u: Uniforms; + @group(0) @binding(1) var src: array; + @group(0) @binding(2) var out: array; + @group(0) @binding(11) var debug: array; // buffer to hold debug values + + override workgroupThreads = 4u; + + var work: array; + + // reduce a buffer of values to a single value, returned as the last element of the out array + // + // each dispatch does two reductions: + // . each invocation reduces from a src buffer to the workgroup buffer + // . one invocation per workgroup reduces from the workgroup buffer to the out buffer + // the driver issues multiple dispatches until the output is 1 element long + // (subsequent passes uses the output of the previous pass as the src) + // the same output buffer can be used as input and output in subsequent passes + // . start and end indices in the uniforms indicate input and output positions in the buffer + // + @compute + @workgroup_size(workgroupThreads, 1, 1) + fn main( + @builtin(global_invocation_id) grid: vec3, // coords in the global compute grid + @builtin(local_invocation_index) localIndex: u32, // index inside the this workgroup + @builtin(num_workgroups) numWorkgroups: vec3, // number of workgroups in this dispatch + @builtin(workgroup_id) workgroupId: vec3 // workgroup id in the dispatch + ) { + reduceBufferToWork(grid.xy, localIndex); + let outDex = workgroupId.x + u.resultOffset; + reduceWorkgroup(localIndex); + if localIndex == 0u { + out[outDex] = work[0]; + } + } + + fn reduceBufferToWork(grid: vec2, localId: u32) { + var values = fetchSrcBuffer(grid.x); + var v = reduceSrcBlock(values); + work[localId] = v; + } + + fn fetchSrcBuffer(gridX: u32) -> array { + let start = u.sourceOffset + (gridX * BlockArea::value); + let end = arrayLength(&src); + var a = array(); + for (var i = 0u; i < BlockArea::value; i = i + 1u) { + var idx = i + start; + if idx < end { + a[i] = Op::loadOp(src[idx]); + } else { + a[i] = Op::identityOp(); + } + } + return a; + } + + fn reduceSrcBlock(a: array) -> Output { + var v = a[0]; + for (var i = 1u; i < BlockArea::value; i = i + 1u) { + v = Op::binaryOp(v, a[i]); + } + return v; + } +} +// To actually realize a concrete ReduceBuffer module, we need concrete const values: + +mod BlockArea -> Const { + const value: u32 = 4u; +} + +mod WorkSize -> Const { + const value: u32 = 18u; +} + +mod Threads -> Const { + const value: u32 = 10u; +} + +// Putting everything together and into the global namespace +include ReduceBuffer, BlockArea, WorkSize, Threads>; +``` + + +# Reference-level explanation + +Generic modules and signatures are parsed as follows, with spaces and comments allowed between tokens: + +```bnf +module_decl: + attribute * 'mod' ident _disambiguate_template module_template_param_list ('->' module_type_specifier_set)? "{" module_member_decl * "}" ";"? +; + +module_template_param_list : + _template_args_start module_template_param_comma_list _template_args_end +; + +module_template_param_comma_list : + module_template_param ( ',' module_template_param ) * ',' ? +; + +module_type_specifier_set : + module_type_specifier ('+' module_type_specifier) * +; + +module_template_param : + attribute * ident ':' module_type_specifier_set +; + +module_sig_decl : + attribute * 'mod' 'sig' ident _disambiguate_template module_template_param_list '{' global_sig * '}' ';'? +; + +module_type_specifier : + (module_path '::')? ident _disambiguate_template template_list ? +; + +module_path : + module_path_part ('::' module_path_part)* +; + +module_path_part : + ident _disambiguate_template template_list ? + +nested_mod_sig : + attribute * 'mod' ident ':' (module_type_specifier ('+' module_type_specifier) *) ';' +; +``` + +Where `ident`, `_disambiguate_template`, `template_list`, `_template_args_start` and `_template_args_end` are defined in the WGSL grammar. + +## Linking WESL Files + +Further linking complexity is introduced with this proposal. This is because generic modules need to an additional specialization pass prior to linking. + +A basic specialization pass could be implemented according to the following handwavy logic (though includes makes this actually quite a bit subtler, see below for further discussion): + +- for each generic module: + - for each unique set of generic parameters: + 0. a new concrete module should be produced and given a unique name + 0. for each usage: + - the generic reference should be rewritten to refer to the concrete module + +### Includes + +The `include` feature of course breaks this relatively simple scheme because of variables. + +Each time a module with variables is included (regardless of whether it is generic or not), it needs to copy +the variables into the including namespace, along with at minimum all the dependancies of these variables. + +A naïve approach would be rather to avoid specialization with includes and instead just blindly copy symbols. +This would work, though would lead to larger amounts of output shader code. + +### Optimizations + +Generics have a lot of room for optimization. + +For example implementations may want to find functions within a module which do not in their usage graph reference any variables or generic parameters; these functions would not need to be specialized. + + +Another potential optimization approach would be producing groups of module members which only depend upon subsets of the declared generic parameters. For each of these sets of module members, specialization would only need to occur for each unique combination of generic argument in the subset. + +## Type Checking + +Type checking of generics brings its own challenges. The main additions are: module constraints in generic arguments (which might themselves be generic signatures), and the addition of generic "holes" which may be present in module type constraints. + +From a type checking perspective, the constraints is similar to checking modules against their signatures in [Module Interfaces](./ModulesInterfaces.md), however the holes introduce "don't care" semantics. These "don't cares" are necessary +for brevity but also introduce an indeterminate state to module signatures, relaxing the generic paramaters within a generic module signature to their most general type. + + +# Rationale and alternatives + +- Why is this design the best in the space of possible designs? + - Well conceived, consistent abstraction inspired by Ocaml + - Doesn't introduce introduce additional runtime costs to WESL, and module contents remain valid WGSL + - Builds upon modules, meaning that there are no additional language concepts to learn beyond understanding modules + - More flexible than just generic functions + - Good base for adding syntactic sugar +- What other designs have been considered and what is the rationale for not choosing them? + - See below +- What is the impact of not doing this? + - Harder to build abstractions in WESL + - Users resorting once again to the escape hatch of basic string templating + + +## Alternatives + +### Generic functions +- Less powerful than generic modules +- Harder to build larger, more complicated abstractions +- Potentially simpler to understand for users +- Could be quite easily added later as a syntactic transform using generic modules + +### String templating/Substituition +- Hard to have reasonable language server behaviour +- Simple to understand +- Extremely flexible & powerful +- Simple to implement +- Hard to document + +### Abstract Modules +_Modules which are partially implemented and require concrete implementation to realize the module_ + +- Less explicit, and expressive +- Harder to compose behaviour +- Simpler to typecheck +- Likely simpler to implement +- Could be emulated using `include` and module signatures + +## Drawbacks + +- Generic modules could be quite complex to implement as soon as you step beyond a naïve approach. +- Unfamiliar programming model +- Without additional syntactic sugar, can sometimes be verbose +- Some form of standard library may become necessary to make using built-in types and functions feasible using this approach + +# Future work + +## Generic Functions +The implementation of generic functions could depend on generic modules. It would simply be syntactic sugar. +A generic function could in implementations create a hidden generic module with the same template/generic signature. + +Usages of the generic function would perform a rewrite of the symbol name to refer to the function inside this module +This way a lot of implementation effort could be saved. + +For example we could rewrite this generic function +`fn foo(a: vec4, b: vec4) -> N {}` as: +```rescript +mod foo { + fn impl(a: vec4, b: vec4) -> N::T {} +} +``` + +Usage of this function could then be rewritten from `foo(vec4(1.0), vec4(2.0))` to `foo::impl(vec4(1.0), vec4(2.0))`. + +Generic functions and modules could take in both modules and concrete functions as arguments, along with constant expressions. Module signatures for generic functions could take the following general pattern: + +```rescript +mod sig Fn2 { + fn invoke(arg1: Arg1::T, arg2: Arg2) -> Returns::T; +} + +mod sig UnitFn2 { + fn invoke(arg1: Arg1::T, arg2: Arg2); +} +``` + +## Constants + +Similarly, the implementation of generic constant parameters could depend on generic modules. It again would simply be syntactic sugar for a generic module. + +For example we could instantiate the generic module `Foo` seen below, as `Foo<10u>` by rewriting `10u` as a module containing a constant value: + +```rescript +mod sig Const { + const value: Type::T; +} + +mod Foo> { + +} +``` + +## Types + +Finally, the implementation of generic type parameters could depend on generic modules. It once _again_ would simply be syntactic sugar for a generic module. + +We could do this by transforming types provided as an argument to a generic module as an implementation of the `Type` +signature: + +```rescript +mod sig Type { + type T; +} +``` + +Then the struct `Foo` provided as a generic argument could be rewritten as + +```rescript +mod FooType { + alias T = Foo; +} +``` + +## Anonymous Module Declarations +To prevent a prolifieration of single use modules, an additional feature could be added, which is the ability to declare anonymous modules as part of generic arguments. For example this would allow one to write something like this: + +```rescript +mod GenericExample { + // Uses settings here +} + +alias ConcreteGenericExample = GenericExample<{ + const frac = 0.4f; + const height = 3f; +}>; +``` diff --git a/Generics.md b/Generics.md deleted file mode 100644 index 8644986..0000000 --- a/Generics.md +++ /dev/null @@ -1,164 +0,0 @@ -# Simple Generics for WGSL - -(TBD) - -Generic programming is useful for wgsl, particularly for libraries. -With generics, wgsl functions like `reduce` or `prefixSum` -don’t need to be manually rewritten for each combination of element type and binary operation. -I’m hoping we can find a fairly minimal design for generics -that is easy for programmers to learn and supportable with modest effort in wesl tools. - -To ease implementation effort, I imagine we’ll want to avoid type inference -or type constraints on generics. But without type inference, -specifying generic types at every call site to a generic function gets verbose and tedious. To avoid that verbosity, let’s allow generic variables on import statements (glslify and wgsl-linker did this too). - -I thought we might start by allowing generics only on functions. -We’ll want a design that’s extensible to more features (e.g. generic structs) -of course, -but we can start with a minimal implementation and add features as they prove necessary. - -## Summary - -* angle bracket syntax for generic variable declaration, and generic value specification. -* declare generic variables on function declarations, e.g.: `fn foo(arg: E) -> E { let e:E = arg; return e; }` -* within an fn with a generic declaration, - generic variables names can be used in place of a wgsl type in both the fn declaration and fn body, - or in a function call expression inside the fn body. - The generic variable text will be replaced by the generic value text during linking. - So if `E` is `f32` the linked wgsl for foo would be: `fn foo(arg: f32) { let e:f32 = arg; return e; }`. -* Note that a linker will generate multiple copies of fn foo() in wgsl, - one for each unique set of generic arguments. So each fn will have a unique name. -* generic variable values are supplied on import statements or call statements. - - * import with a generic: - ``` - import util/foo as foo32; - main() { foo32(1.0); } - ``` - * or, call with a generic: - ``` - foo(1.0); - ``` - -* generic values supplied with imports are single world tokens (typically wgsl type names or function names), - or generic variables declared on that function. - -## Examples - - -Simple Example: - -* ``` - ./util.wgsl: - - @export fn workgroupMin(elems: array) -> E { } // E is a generic parameter - ``` - -* ``` - ./main.wgsl: - - import ./util/workgroupMin as workMin; // substitutes f32 for E - - fn main() { - workMin(a1); // no generic variables required at the call site - workMin(a2); - } - ``` - -Here’s a more complicated case. reduce is parameterized by an element type (e.g. u32) and a binary operation, e.g. max() - -* ``` - ./util.wgsl: - - export - fn reduce(elems: array) -> E { - return BinOp(elems[0], elems[1]); - } - -* ``` - ./main.wgsl: - - import ./ops/binOpMax as binOpMax; - import ./util/reduce as maxF32; - - fn main() { - maxF32(a1); - } - ``` - -Note that you can import a generic function w/o providing parameters: - -* ``` - ./util.wgsl: - - import binOpMax from ./ops; // no generic variable specified yet - - export fn reduceMax(elems: array) -> E { - return binOpMax(elems[0], elems[1]); // generic value applied at call site - } - ``` - -Re-exporting generics is allowed (presuming we allow re-exporting in general, see [Visibility](./Visiblity.md)): - -* ``` - ./lib.wgsl: - export reduce from util/reduce.wgsl; // re-export at package root level - - ./util/reduce.wgsl: - export fn reduce(elems: array) -> E { } - ``` - -## Questions and possible extensions - -* Do angle brackets conflict or comport with wgsl templates? -* Can you export a generic function after variable substitution too? or only the generic version -* Allow generic values to be pulled from runtime parameters? - wgsl-linker currently recognizes an `ext.` prefix to get variable values from the runtime caller. - e.g. ext.workgroupSize would patch in runtime variables at link time. - Hopefully we can address that with runtime #define, we’ll see. -* Currently there are no type constraints available for generic variable declarations.. - Simply substituting parameters and letting dawn or naga typecheck at runtime seems ok for now. - A future type checker could check annotation uses are valid by substituting generic parameters - and type checking the expanded wgsl. - And of course a future version of wgsl or wesl generics could add explicit type constraints on generic variables. - -* Generics on structs too? - - * ``` - ./util.wgsl: - export struct Point { position: vec2, color vec3f } - - ./main.wgsl - import Point as UPoint from ./util - - fn main() { - let p = UPoint(vec2u(0, 0), vec3f(.5, .5, .5)); - } - ``` - -* The reduce example makes me think whether we could call a generic function recursively, - and what would that do. - In theory, the following would unroll to N nested function calls. - The wgsl compilers may be good at flattening this. - -* ``` - fn accumulate(acc: E, elems: array) -> E { - if N >= 2 { - return accumulate(accumulateBinOp(E, elems[N-1]), elems); - else { - return acc; - } - } - - fn op_add(e1: E, e2: E) { - return e1 + e2; - } - - fn array_sum(elems: array) -> E { - return accumulate, N>(0, elems); - } - ``` - - Array_sum has a nested generic! This is cool. - - Also, some SFINAE I guess: because 0 is AbstractInt, it can subtitute E with u32 or i32, BUT not f32 afaik, because 0 is not AbstractFloat. This is somewhat disappointing. diff --git a/Imports.md b/Imports.md index c6aaed5..aba6318 100644 --- a/Imports.md +++ b/Imports.md @@ -4,12 +4,6 @@ We propose adding an importing mechanism to the WGSL shading language as an extension. -## TBD - -* Require braces for wgsl items? e.g. `import ./lighting/{ pbr };` - * If we require braces for wgsl items, could do `import ./foo/bar` instead of `import ./foo/bar/*`? -* Add example for recursive imports - # Motivation When writing bigger WGSL shaders, one tends to **split the code into reusable pieces**. Examples are re-using a set of lighting calculation functions and sharing a struct between two compute shaders and. @@ -20,278 +14,126 @@ We also should account for **importing shader from libraries**. Ideally, users c Finally, we want **multiple tools** which can compile WGSL-with-imports down to raw WGSL. Using WGSL-with-imports both in Rust projects and in web projects should be possible. -## Guide-level explanation +# Guide-level explanation -The `import` statement extension is designed to appear somewhat familiar to TypeScript syntax, -but with a Rust like recursive grammar to make importing -multiple importable items less verbose. -The syntax is also heavily inspired by [Gleam](https://gleam.run/). +The `load` statement extension is designed to appear somewhat familiar to Ruby `require` syntax, and specifies that a given file should be loaded by the linker and all of its symbols be made available to the global context. -By placing an import at the very top of a file, one can either import an entire module, or only specific importable items, such as functions, structs or types. +A given file is loaded only once and it is also legal for files to cyclically refer to one another. -``` -// Importing a single item using a relative path -import ./lighting/pbr; +One important restriction of this proposal is that it is illegal for symbols to have the same name, unless as per the [Extends](./Extends.md) proposal a symbol is declared virtual. -// Importing multiple items -import my/geom/sphere/{ draw, default_radius as foobar }; +All loaded entrypoints in the global scope are guaranteed to be available to the WebGPU API. However this guarantee does not extend to bindings as these may be removed by dead code elimination if not used in any entry point. -// Imports a whole module. Use it with `bevy_ui.name` -import bevy_ui/*; -``` +An example of how a shader may use loads is as follows: -These can then be used anywhere in the source code. +In a file called `utils.wesl`: -``` -fn main() { - bevy_ui.quad(vec2f(0.0, 1.0), 1.0); - let a = draw(3); +```wgsl +@binding(0) @group(0) var frame : u32; + +fn current_frame_plus_two() -> f32 { + return frame + 2.0; } ``` -Both `bevy_ui` and `my` are packages in the current project. Language servers and related tools can look in a `wgsl.toml` file to find the location of the packages. This lets libraries be published to package managers, and users can import them with a simple syntax. - -The first part is the file path, which is assumed to have a either `.wgsl` file extension, -or the extension we use for extended wgsl syntax (possibly `.wesl`). +In a file called `entrypoint.wesl` in the same directory as `utils.wesl`: -Recursive import definitions are also supported, which leads to short and succinct import statements. +```wgsl +load ./utils; +@fragment +fn frag_main() -> @location(0) vec4f { + return vec4(1, sin(current_frame_plus_two()), 0, 1); +} ``` -import bevy_pbr/{ - forward_io/VertexOutput, - pbr_types/{PbrInput, pbr_input_new}, - pbr_bindings/* -}; -fn main() { +Note that all names in the above example are preserved in linker output. -} -``` # Reference-level explanation -Imports must appear as the first items in a WGSL file. They import "importable items" (see [GLOSSARY.md](./GLOSSARY.md)). +A load statement is parsed as follows, with spaces and comments allowed between tokens: -An import statement is parsed as follows, with spaces and comments allowed between tokens: +```bnf +load_decl: +| 'load' load_relative? load_path ';' +; -``` -main: -| 'import' import_relative? import_path ';' - -import_relative: +load_relative: | ('.' | '..') '/' ('..' '/')* +| '/' +; -import_path: -| ident '/' (import_path | import_collection | item_import | star_import) - -star_import: -| '*' ('as' ident)? - -item_import: -| ident ('as' ident)? - -import_collection: -| '{' (import_path | item_import) (',' (import_path | item_import))* ','? '}' +load_path: +| ident ('/' ident)* +; ``` Where `ident` is defined in the WGSL grammar. -An import consists of - -- A path part, which either is a relative path ('.' and '..'), or points at a known package (ident). +A load consists of +- A path part, which either is a relative path ('.', '..' or '/'), or points at a known package (ident). - Nested path segments are joined. - Everything before the final slash is part of the path. - The final part is the file name. -- Items to import. - -A star import behaves like a star import in Typescript, except that the name can be inferred from the file name. **Notably**, it does *not* import all the items individually. Instead, it groups them all under a single name! - -An item import imports a single item. The item can be renamed with the `as` keyword. +Where `ident` is defined in the WGSL grammar. The WGSL grammer is additionally extended as follows: -An import collection imports multiple items, and allows for nested imports. - -## Examples - -To compare it to the more widely known Typescript syntax, here are some examples. - - - - - - - - - - - - - - - - - - - - - - -
WGSLTypescript
- -``` -import ../geom/sphere/{draw, default_radius as foobar}; -``` - - - -```ts -import { draw, default_radius as foobar } from '../geom/sphere.wgsl'; +```bnf +extend global_decl : + | load_decl +; ``` -
- -``` -import bevy_ui/*; -``` +## Items to import. - - -```ts -import * as bevy_ui from 'bevy_ui.wgsl'; -``` - -
- -``` -import bevy_pbr/{ - forward_io/VertexOutput, - pbr_types/{PbrInput, pbr_input_new}, - pbr_bindings/* as pbr_b -}; -``` - - - -```ts -import { VertexOutput } from 'bevy_pbr/forward_io.wgsl'; -import { PbrInput, pbr_input_new } from 'bevy_pbr/pbr_types.wgsl'; -import * as pbr_b from 'bevy_pbr/pbr_bindings.wgsl'; -``` +This proposal brings all items from the requested file recursively into scope. +This means that if file a loads `abc.wesl` which uses `def.wesl`, then the symbols from +both `abc.wesl` _and_ `def.wesl` will be in scope. Note that both `wgsl` and `wesl` files may be +loaded, so it is a link time error to have both a wgsl file and a wesl file in the same directory with +the same name. -
+If two loaded files have any clashing global symbols, then implementions of this specification are expected to produce an error. -## Parsing module.importable_item in the source code +Entrypoints in the global scope that are loaded either directly or indirectly via the main file are considered used. -For tools that are parsing WGSL, -here's how to extend the -[WGSL grammar](https://www.w3.org/TR/WGSL/#grammar-recursive-descent) -to parse imports: +Typical linker implementations would likely want to analyse usages from these entrypoints to eliminate unused globals and bindings. -About extending the grammar: -This one gets an additional meaning. A module property can also be a component. -component_or_swizzle_specifier: -| '.' member_ident component_or_swizzle_specifier ? - -This needs to be extended to support ident.ident -for_init: -| ident.ident func_call_statement.post.ident - -for_update: -| ident.ident func_call_statement.post.ident - -global_decl: -| attribute _ 'fn' ident ... '->' attribute _ ident.ident -| 'alias' ident '=' ident.ident template_elaborated_ident.post.ident ';' - -primary_expression: -| ident.ident template_elaborated_ident.post.ident -| ident.ident template_elaborated_ident.post.ident argument_expression_list +## Examples -statement: -| ident.ident template_elaborated_ident.post.ident argument_expression_list ';' +## Related Specifications -type_specifier: -| ident.ident ... +This proposal on its own is impractical for developing a robust ecosystem due to lack of namespacing. The [Modules](./Modules.md) proposal is likely necessary to make this import proposal workable. ## Behaviour-changing items -These items cannot be imported, but they affect module behavior: +These items cannot be imported, but they affect behavior: - [Extensions](https://www.w3.org/TR/WGSL/#extensions) - [Global diagnostic filters](https://www.w3.org/TR/WGSL/#global-diagnostic-directive) -These are always set at the very top in the main module, and affect all imported modules. They come before the imports. - -When, during parsing of imported modules, we encounter an extension or a global diagnostic filter, we check if the main module enables it, or sets the filter. -If yes, everything is fine. If not, we throw an error. - -(In naga-oil entrypoints are lowered to normal functions. -Not clear we should preserve that, it's against the spec, -but noting current behaviour that is being used in the wild.) - -## Preserved items - -These items are preserved when importing a module. They are not imported, but will land in the final module with a mangled name. - -- [Entry points](https://www.w3.org/TR/WGSL/#entry-points) -- [Pipeline overridable constants](https://www.w3.org/TR/WGSL/#override-decls) - -## Identifier Resolution - -The steps of identifier resolution are as follows: - -1. Parse the import statements. Resolve the paths to the concrete file paths. -2. Bring the imported items into scope. All other items that the imported items depend on are also imported, _but not user-accessible in the current scope_. - For example when importing `Foo` from `struct Foo { x: Bar; }`, we would import `Bar` as well. If the user types `Bar` in the source code, then that is an error. -3. Parse the WGSL. - -For example, given two modules - -``` -// lighting.wgsl -struct Light { - color: vec3; -} -struct LightSources { - lights: array; -} -``` - -``` -// main.wgsl -import lighting::{ LightSources }; - -// LightSources can be used -fn clear_lights(lights: LightSources) -> LightSources { - for (var i = 0; i < lights.lights.length(); i = i + 1) { - // We can access everything inside of LightSources - let light = lights.lights[i]; +These are always set at the very top in the main file, and affect all loaded files. They come before the imports. - // However, the user cannot use the type "Light" - // let light: Light <== error, Light is not imported +When, during parsing of loaded files, we encounter an extension or a global diagnostic filter, we check if the main file enables it, or sets the filter. - light.color = vec3(0.0, 0.0, 0.0); - } - return lights; -} -``` - -## Producing the final module +If yes, everything is fine. If not, we throw an error. -The final module is produced by taking each imported module (in topological order, with the main module last), resolving and mangling all the globals, and joining the resulting pieces of code together. +## Producing the final file -All entry points and pipeline overridable constants from the imported modules are also mangled and land in the output. +The final file is produced by taking each loaded file in topological order, with the main file last, and ensuring that each file is only taken once. The results are then concatenated together. Dead code elimination is allowed, but not required. -# Drawbacks +## Drawbacks Are there reasons as to why we should we not do this? - This introduces yet another importing syntax that developers have to learn, instead of using a standard syntax. -- To implement the name mangling, one has to parse WGSL code! This is not trivial, and requires a partial WGSL parser. -- Paths in import statements must consist of valid WGSL identifiers, which can be limiting. This limitation could be lifted by allowing arbitrary strings in import paths, but would make the implementation more complex. +- Paths in load statements must consist of valid WGSL identifiers, which can be limiting. This limitation could be lifted by allowing arbitrary strings in import paths, but would make the implementation more complex. +- The design of this proposal requires that developers of reusable shader libraries shoulder the responsibility to prevent naming collisions. This would be ameliorated by the [Modules](./Modules.md) proposal. +- This essentially makes _all loads_ wildcard imports. This could make language servers and usage analysis harder to implement though one would hope the uniformity of this proposal would help somewhat in reducing this complexity. + # Rationale and alternatives @@ -308,9 +150,13 @@ The usual alternative is that one library, like shaderc, becomes very popular an An open process lets us find a better solution. +# Simplicity + +This model is simple to implement and understand, even for users not familiar with languages like typescript or rust. It also avoids the name mangling problem in the WebGPU visible API. + ## Preprocessor `#include ` -One alternative, which is common in the GLSL and C worlds, is an including mechanism which simply copy-pastes existing code. A major upside is that this is very simple to implement. +One close alternative, which is common in the GLSL and C worlds, is an including mechanism which simply copy-pastes existing code. A major upside is that this is very simple to implement. One drawback is that importing the same shader multiple times, which can also happen indirectly, does not work without other preprocessor features. @@ -327,33 +173,37 @@ One drawback is that importing the same shader multiple times, which can also ha would not work, since anything defined in `math.wgsl` would be imported twice. In C-land, this is solved by using _include guards_. -Another drawback is that using the same name twice is impossible. In C-land, this leads to pseudo-namespaces, where major libraries will prefix all of their functions with a few symbols. An example of this is the Vulkan API `vkBeginCommandBuffer` or `vkCmdDraw`. +Another drawback is that using the same name twice is impossible. In C-land, this leads to pseudo-namespaces, where major libraries will prefix all of their functions with a few symbols. An example of this is the Vulkan API `vkBeginCommandBuffer` or `vkCmdDraw`. The [Modules](./Modules.md) proposal would avoid this pitfall. A future drawback is that "privacy" or "visibility" becomes very difficult to implement. Everything that is imported is automatically public and easily accessible. + In C-land, the workaround is using header files. In other languages, such as Python, the convention ends up being "anything prefixed with an underscore `_` is private". +Visibility in our proposal in contrast would be via the [Module Interfaces](./ModulesInterfaces.md) proposal and +would be optional, allowing regular wgsl files to be imported without modification. + ## Typescript-like imports -TODO: Main reason is just that they're more verbose +TODO: Main reason is just that they're more verbose. Also is more complicated than this proposal ## Rust-like imports -TODO: Needs something akin to a `mod` statement, otherwise ambiguity. +TODO: Needs something akin to a `mod` statement, otherwise ambiguity. Also is more complicated than this proposal ## Putting exports in comments This would have the advantage of letting some existing WGSL tools ignore the new syntax. For example, a WGSL formatter would not need to know about imports, and could just format the code as usual. +This could be considered in conjunction with or in addition to this proposal. + ## Using an alternative shader language There are multiple higher level shading languages, such as [slang](https://github.com/shader-slang/slang) or [Rust-GPU](https://github.com/EmbarkStudios/rust-gpu) which support imports. They also support more features that WGSL currently does not offer. For complex projects, this can very much pay off. The downside is using additional tooling, and dealing with an additional translation layer. -An additonal translation layer could lock shader authors out of certain WGSL features. +An additional translation layer could lock shader authors out of certain WGSL features. -Also, higher level GPU languages are typically processed at build time, -which precludes using language features to adapt to runtime conditions -like GPU characteristics or user settings. +Also, higher level GPU languages are typically processed at build time, which precludes using language features to adapt to runtime conditions like GPU characteristics or user settings. ## Composing shader code as strings at runtime @@ -399,10 +249,6 @@ Test cases will be available on # Future possibilities -## Namespaces - -We hope that namespaces will be added to WGSL itself. Then, the importing mechanism can be extended to fully support namespaces, for example by treating each file as introducing its own namespace. - ## Documentation comments This proposal works nicely with documentation comments in WGSL. This would allow library authors to document their shaders, which would be very useful for consumers. @@ -415,7 +261,3 @@ We encourage tooling authors to also implement source maps when implementing imp How a preprocessor would interact with this proposal is an open question for a future proposal. See [Conditional Compilation](./ConditionalCompilation.md). - -## Scoped imports - -Allow imports that are only active within one function? diff --git a/Include.md b/Include.md new file mode 100644 index 0000000..98e8707 --- /dev/null +++ b/Include.md @@ -0,0 +1,144 @@ +# Modules + +## Summary + +We propose adding a module extension mechanism to the WESL shading language. + +## Assumptions + +Assumes that [`load`](./Imports.md), [Modules](./Modules.md) and [Module Interfaces](./ModulesInterfaces.md) have already been implemented. The last one however isn't essential to this proposal which could be tweaked to function without it. + +# Motivation + +While WESL currently provides a way to organize and encapsulate types, functions and state. However it does *not* +provide a way to specialize behaviour, or in other words does not provide a mechanism for inheritance. + +Specialization is important for a number cases, an important and frequent one of which is for abstracting the implementation and configuration details of Physically Based Rendering (PBR), reducing the boilerplate and number of edge cases a technical artist has to consider. + +At its core, PBR is a mathematical parametrisation of the interaction of an infintesimally small fragment of a physical materials with light. For example many implementations of PBR allow one to specify the physical qualities of a surface such as the albedo, normal, how rough or smooth a surface is and whether it is metallic or not. These qualities are used in a complex integral that is used to drive the appearance of a surface. However most workflows do not require +technical artists to know the details of these integrals, and instead allow them to author materials by simply varying +the PBR values across the surface. + +An example of how we would want such our abstraction to look like in a game engine such as [Bevy](https://bevyengine.org/) +would be as follows: + +```reasonml +// Including standard material adds all the symbols +// to the global module including the vertex and fragment entry points. +include BevyPbr::StandardMaterial; + +// Tells the linker to resolve calls to `resolve_pbr_inputs` +// to this function instead of the base. This is constrained to the +// current namespace; in this case the namespace is the global scope. +patch fn resolve_pbr_inputs(pbr_inputs: &PbrInputs) { + // Calls the base method, populating the pbr inputs with the default values + BevyPbr::StandardMaterial::resolve_pbr_inputs(pbr_inputs); + + // Insert code to procedurally generate a carbon fibre coating +} +``` + +This is quite a concise representation and a big improvement in usability over what is available in Bevy today. + +# Guide-level explanation + +This proposal introduces two mechanisms which work in concert to specialize behaviour. These are `include` and `patch`. + +One could practically think of `include` as copying the contents of a given module into the current namespace. This serves dual purposes. Firstly it behaves as a means of not having to refer to a symbol within a deeply nested module by its fully qualified name, reducing verbosity. Secondly it allows one to inherit the behaviour of another module (commonly also called a mixin). It is possible to use `include` multiple times in a single namespace, provided no names in the scope clash. Namespaces which use `includes` do not have privileged access to the symbols within a module, so the same visibility rules apply. + +This is just one half of the story however, as the module being included is not aware of the namespace doing the including. Therefore we also need a way of specializing behaviour in functions within the target namespace to bridge the gap between the two. This is where `patch` comes in. `patch` is a keyword that may be applied to a function declaration. It instructs the linker to replace usages of the specified function within the namespace with the newly declared function. The patched function may still be invoked using its fully qualified name. Naturally `patch` comes with the restriction that function signatures have to match. + +A possible extension to this specification would be to only allow `virtual` functions to be patched. + +# Reference-level explanation + +`include` is parsed as follows, with spaces and comments allowed between tokens: + +```bnf +include_decl : + 'include' module_path ';' +; + +extend global_sig : + | include_decl +; +``` + +Where `module_path` is defined in the existing WESL grammar. + +The WGSL grammer is additionally extended as follows: + +```bnf +function_decl : + attribute * 'patch' ? function_header compound_statement +; + +extend global_decl : +| include_decl +; +``` + +## Linking WESL Files + +Linking a WESL file that supports both `include` and `patch` naturally introduces additional complexity when linking. Broadly speaking there are two approaches that could be taken to support this feature. + +The first, more naïve approach, is to copy all the symbols from the module being included and simply replace the patched functions within the AST in the pass before +canonicalization. This may be more than somewhat inefficient however as there would likely be many duplicates included in output. This means you'd be relying heavily on dead code elimination, the wgsl compiler, and the underlying drivers to reduce register pressure. However this approach should be easy to ensure correctness with. + +The second is more forensic, requiring that a usage graph be created for all the patched functions and variables in the module being included. The functions and variables present within this graph would need to be copied into the including namespace, while functions not within the graph would resolve to the original module. + +What increases the complexity of both approaches is the support for module nesting. Inline modules are able to access the symbols of the outer namespaces. Both usage graph and symbol duplication approaches would therefore need to include these submodules in all calculations. An additional source of complexity is that an included module may itself use `include`. To reduce complexity, cycles should be strictly forbidden in compliant implementations. + +For the second approach, dead code elimination would additionally need to take include usages into account. + +## Type Checking + +Module type checking would need to be modified as follows: +- additional symbols found within the included files would need to be considered in conjunction with the rest of the type checking rules. + +## Drawbacks + +Are there reasons as to why we should we not do this? + +- This extension would introduce additional complexity in linking and type checking +- Allowing multiple includes suffers from some of the problems present in multiple inheritance. + - Forbidding clashing symbols mitigates some of these concerns + - Module system in conjunction with this proposal would allow users to work around this +- Introduces additional function calls at extension points vs the templating approach. + - Whether this is a serious drawback given is TBD + - Likely would require a concerted effort to benchmark performance cost + - Future work could inline some of this code +- Does not address struct includes which may be a requirement for some use cases + - Future proposals could extend this and behavioural semantics and implementations could learn from this proposal + +## Rationale and alternatives + +- Why is this design the best in the space of possible designs? + - Solves common challenge with PBR rendering + - Allows for creation of smaller, more focused modules. + - This allows for better reuse and behavioural composition + - Solves some use cases that would otherwise require generic modules + - Not all however + - Simpler to understand than generic modules + - Patches are scoped to a namespace + - Better for language servers than monkey patching + - Allows entrypoints to be injected into the global namespace + - Allows users to import symbols within a module - reducing verbosity without requiring an additional mechanism +- What other designs have been considered and what is the rationale for not choosing them? + - See below +- What is the impact of not doing this? + - Little extensibility in WESL specification without it + - No means of including libraries into the global namespace preventing libraries from defining entry points + - Would need an additional mechanism to make individual symbols available to the current namespace + + +### String Templating +Allows for arbitrary injection and construction of code at the specified points meaning that it is more expressive in some ways than the proposal above. It's fast. Cannot be easily statically analysed. Poor experience when considering language servers + +### Single Inheritance +Con is it places an additional limitation on architecture. Pro is it avoids some issues seen multiple inheritance. +Con is it would prevent usage of `include` as a means of introducing symbols into the current scope. + +## Future Work + - Extension of structs + - Structured inclusion of code fragments into method bodies? diff --git a/Modules.md b/Modules.md new file mode 100644 index 0000000..f2dd197 --- /dev/null +++ b/Modules.md @@ -0,0 +1,160 @@ +# Modules + +## Summary + +We propose adding a module system mechanism to the WGSL shading language as an extension. + +## Assumptions + +Assumes that [`load`](./Imports.md) has already been implemented. + +# Motivation + +Currently all symbols in wgsl share a single global namespace with a prohibition against symbols with the same name. Many existing implementations of imports (not unlike [our proposal](./Imports.md)) simply add to this global scope. For relatively self contained projects this is not a problem, however when considering a broader ecosystem of packages containing reusable shaders, collisions become much more likely. + +Additionally WGSL provides very little way to encapsulate and organise code. This proposal would pave the way for both +[Module Interfaces](./ModulesInterfaces.md) and [Generic Modules](./GenericModules.md). The former would allow a graphics programmer to optionally control the visibility and typecheck the symbols within a module, while the latter would allow for more reusable code and the ability to build powerful abstractions. + +Finally [Include](./Include.md), another extension of modules, gives a way to compose behaviour in a manner not too dissimilar to inheritance, which is important for use cases such as writing extended shaders based on Standard PBR workflows in game engines such as [Bevy](https://bevyengine.org/). + +# Guide-level explanation + +A module is declared using the `mod` keyword. A module contains a set of declarations such as structs, functions, aliases, variables, and constants. A module may also contain another module inside it. Modules however are not allowed to contain load statements as the semantics of this behaviour would be confusing. Declarations within a module are accessed using the module name and the `::` operator. + +There are two ways to declare a module - as an alias for another module, or as an inline module. Here is an example of both forms, along with a module usage example: + +```rescript +// An inline module: +mod Math { + // Another inline module: + mod FloatMath { + const DEG_TO_RAD: f32 = 0.0174533; + + fn quat_from_euler(xyzRad: vec3) -> vec4 { + // Implementation here + } + } + + // An alias for a module + alias Float = FloatMath; +} + +// Usage +Math::Float::quat_from_euler(vec4(90.0 * Math::Float::DEG_TO_RAD)) +``` + +Libraries are automatically wrapped in a root module based on their name declared in the WESL manifest. This is to further +avoid namespace pollution. This has the downside that it is not possible to use entrypoints declared within a third party shader library unless the [`include`](./Include.md) feature is available and used to add symbols from within a library into the global namespace. + +# Reference-level explanation + +Module statements are parsed as follows, with spaces and comments allowed between tokens: + +```bnf +module_decl_main: + | module_decl +; + +module_member_decl : + ';' +| global_variable_decl ';' +| global_value_decl ';' +| type_alias_decl ';' +| struct_decl +| function_decl +| const_assert_statement ';' +| module_decl_main +; + +module_decl: + attribute * 'mod' ident "{" module_member_decl * "}" ";"? +; + +module_path : + ident ('::' ident)* +; + +``` + +Where `ident` is defined in the WGSL grammar. The WGSL grammer is additionally extended as follows: + +```bnf +extend global_decl : + | module_decl_main +; + +template_elaborated_ident : + (module_path '::')? ident _disambiguate_template template_list ? +``` + +## Behaviour-changing items + +These items cannot be imported, but they affect behavior: + +- [Extensions](https://www.w3.org/TR/WGSL/#extensions) +- [Global diagnostic filters](https://www.w3.org/TR/WGSL/#global-diagnostic-directive) + +These are always set at the very top in the main module, and affect all loaded files. They come before the imports. + +When, during parsing of loaded files and modules, we encounter usage of a module that has an extension or a global diagnostic filter within it, we check if the main module enables it, or sets the filter. + +If yes, everything is fine. If not, we throw an error. + +## Bundling WESL Files + +As modules are imported into the global namespace, a naive bundling algorithm would be: +- Remove all `load` statements +- Concatenate files together according to the linking logic described in [Imports](./Imports.md) + +## Linking WESL Files + +Linking WESL files with modules is a little bit more complex than the base [Imports](./Imports.md) linking algorithm. + +To do so the following steps must be taken: +- Bundling needs to occur prior to linking. +- A map of modules to their implementation should be made. A canonical module name should be picked using the logic described in the Name Mangling section below. +- Module usages should be replaced with the canonical module name. +- Next symbols within the module need to be mangled using the canonical module name using the [mangling algorithm](./NameMangling.md) and should be added to the global namespace. +- Usage of symbols within a module should then replaced with the mangled representation using the same algorithm. +- Finally a plain wgsl module is produced + +### Name Mangling +Symbols within a module are mangled based on the module path, using the scheme described in [Name Mangling](./NameMangling.md). + +As modules can be aliased, the module path choosen by the mangling scheme uses the following algorithm: + - Picks the path with the fewest number of parts in it + - If this is a tie, picks the path string with the smallest number of unicode characters in it + - Finally if that is also a tie, uses a region invariant string comparision function, and chooses the path that is alphabetically first (inclusive of the `::` operators) + +This scheme means that if a given symbol has been included into the global scope, it is not mangled at all. It has the absolute minimum number of parts in the path! + +An example of how to apply the algorithm described above, would be the name used in mangling the `quat_from_euler` function. The name chosen to be mangled would be `Math::Float::quat_from_euler`, rather than `Math::FloatMath::quat_from_euler`. This is because while they have the same number of parts in their paths, `Math::Float::quat_from_euler` has fewer number of characters. + +## Drawbacks + +Are there reasons as to why we should we not do this? + +- This introduces a major new syntactical element which may later conflict with the emerging WGSL standard. +- Requires that users understand the module syntax which may be difficult for object oriented programmers +- Can be quite verbose though module names can be shortened using module aliases (or `include` if adopted) +- Requires that the [`include`](./Include.md) proposal be adopted if third party modules are to be allowed to define entrypoints and bindings. + +# Rationale and alternatives + +- Why is this design the best in the space of possible designs? +- What other designs have been considered and what is the rationale for not choosing them? +- What is the impact of not doing this? + +## Alternatives + +### Extending Structs + +We could namespace symbols within the context of a struct. However as so many shaders deal with primitive types and don't adopt an OO approach, it may be unwise to tie these two concerns together. + +### Files as modules + +We could make every file also be a module. This has some downsides, in that entrypoints either have to be mangled or exposed in some way to the WebGPU API which may leak details of mangling to the user. Additionally, mixing the concept of files and modules may introduce confusion and complexity if we later added a module or trait like construct. + +## What is the impact of not doing this? + +A number of interesting language features (described in the motivation section) depend on modules. While all of them may not be adopted, modules seem to be a fruitful abstraction for future work in adding necessary features to the language. diff --git a/ModulesInterfaces.md b/ModulesInterfaces.md new file mode 100644 index 0000000..7a8e0f6 --- /dev/null +++ b/ModulesInterfaces.md @@ -0,0 +1,186 @@ +# Modules + +## Summary + +We propose adding a module signature mechanism to the WGSL shading language as an extension. + +## Assumptions + +Assumes that [`load`](./Imports.md) and [Modules](./Modules.md) has already been implemented. + +# Motivation + +Encapsulation and hiding is an important part of being a responsible library maintainer while still allowing for +the evolution and refactoring of implementation details. Currently WESL does not have a mechanism for visibility and so +assumes everything is public by default. + +Additionally we have seen examples of WGSL code that share the same interface but different implementations. One example +that we've seen was for a reduction algorithm that ran on the GPU. This algorithm needed to combine two values but the exact method for combining the values depended on the end user's requirements. Module signatures would allow these operations and even user defined ones to be type checked. The ability to check module implementations against their signatures would also prove very beneficial to type constraints in [generic modules](./GenericModules.md) and [generic functions](./GenericFunctions.md). + +# Guide-level explanation + +A module signature declared using `mod sig` keyword. A module signature contains a set of declarations of elements such as functions, variables, modules, and constants without specifying their implementation. A module can be said to "return" a set of signatures, which signals to type checkers that only the symbols defined in the signatures should be exposed, and that all the symbols need to be present for typechecking to pass. Like modules, module signatures can also be aliased. + +Below is an example of a module signature and its usage: + +```rescript +// A module signature: + +mod sig MathImpl { + const DEG_TO_RAD: f32; + type Quat; // An opaque type. It's exact representation is hidden from users + fn quat_from_euler(xyzRad: vec3) -> Quat; +} + +// Another module signature +mod sig Pi { + const PI: f32; +} + +// A third module signature +mod sig MainMath { + // Here Float conforms to both MathImpl and Pi + mod Float : MathImpl + Pi; +} + +mod sig F32 { + type T: f32; +} + +alias MainMathSig = MainMath; + +// This can be type checked to determine +// whether Math (and consequently Float conform to the provided signature) +mod Math -> MainMathSig { + mod FloatMath { + alias T = f32; + const DEG_TO_RAD: f32 = 0.0174533; + const PI: f32 = 3.142; + alias Quat = vec4; + + fn quat_from_euler(xyzRad: vec3) -> Quat { + // Implementation here + } + + fn private_function() -> f32 { + // Implementation here + } + } + // An alias for a module that is exposed with the following signatures: + alias Float : MathImpl + Pi + F32 = FloatMath; +} + +// Usage +Math::Float::quat_from_euler(vec4(90.0 * Math::Float::DEG_TO_RAD)) + +// Not allowed as the FloatMath module isn't in the interface +// Math::FloatMath::quat_from_euler(vec4(90.0 * Math::Float::DEG_TO_RAD)) + +// Not allowed as private_function isn't in the interface +// Math::Float::private_function() +``` + +# Reference-level explanation + +Module signatures are parsed as follows, with spaces and comments allowed between tokens: + +```bnf + +module_sig_decl : + attribute * 'mod' 'sig' ident '{' global_sig * '}' ';'? +; + +global_sig : + | function_sig + | nested_mod_sig + | global_variable_sig + | global_value_sig + | associated_type_sig +; + +function_sig : + attribute * function_header ';' + +nested_mod_sig : + attribute * 'mod' ident ':' (type_specifier ('+' type_specifier) *) ';' +; + +global_variable_sig : + attribute * variable_decl ? ';' +; + +global_value_sig : + 'const' ident ( ':' type_specifier ) ';' +| attribute * 'override' ident ( ':' type_specifier ) ';' +; + +extend module_member_decl : + | module_sig_decl +; + +module_decl: + attribute * 'mod' ident ('->' type_specifier ('+' type_specifier) * )? '{' module_member_decl * '}' ';'? +; + +associated_type_sig : + attribute * 'type' optionally_typed_ident ';' +; +``` + +Where `ident`, `attribute`, `type_specifier`, `function_header` and `optionally_typed_ident` are defined in the WGSL grammar. The WGSL grammer is additionally extended as follows: + +```bnf +extend global_decl : + | module_sig_decl +; + +type_alias_decl : + 'alias' ident (':' type_specifier ('+' type_specifier) *) ? '=' type_specifier +; +``` + +## Linking WESL Files + +Linking WESL files with modules signatures is very similar to the [Modules](./Modules.md) linking algorithm. There is just +one addition: + +- Module signatures should be removed from output code + +## Type Checking + +In general, type checking in WESL is to be performed by testing for structural equality. + +This means that given a module signature and a module, the following should hold after the canonicalisation step: + +- all types declared in the module signatures should be present either as an alias or a struct declaration and in addition, if specified, evaluate to the same type specified in the type constraint. (This is useful for cases where it is necessary to expose the type in the module signature). +- all submodules declared in the signatures should be present and be structurally a superset of the specified submodule type +- all functions should be present and have the same type and number of arguments as well as the same return type. +- all constants, overrides and vars should be present and have the same types. +- each of these respective elements should have identical attributes values present. +- additional symbols in the module are ignored by the typechecker and should be treated as private after assignment or declaration. + +## Drawbacks + +Are there reasons as to why we should we not do this? + +- This introduces additional complexity to the specification. +- Requires that users understand additional syntax +- While not strictly required, a typechecker is pratically required in order to validate signatures. + +# Rationale and alternatives + +- Why is this design the best in the space of possible designs? +- What other designs have been considered and what is the rationale for not choosing them? +- What is the impact of not doing this? + +## Alternatives + +### Visibility modifiers + +We could introduce explicit visibility modifiers rather than interfaces to mark a particular symbol as public/private. +In one sense this may be simpler, however it would require us to modify parsing of quite a large surface area. +Visibility would also not be a feature that could be later extended to offer more powerful abstractions. + +### No concept of visibility + +We could continue without visibilty or module signatures. The downside is relying on duck typing if/when generics are implemented could result in a worse experience, and type inference would be a more complicated approach. \ No newline at end of file diff --git a/VirtualFunctions.md b/VirtualFunctions.md deleted file mode 100644 index ad17b0b..0000000 --- a/VirtualFunctions.md +++ /dev/null @@ -1,6 +0,0 @@ -# Virtual Functions - -(TBD) - -The Bevy project makes use of virtual functions to provide -a rich set of extension points in the rendering pipeline. \ No newline at end of file diff --git a/Visibility.md b/Visibility.md deleted file mode 100644 index 20c6505..0000000 --- a/Visibility.md +++ /dev/null @@ -1,115 +0,0 @@ -# Visibility Control - -(TBD) - -This section will describe wgsl enhancements to control which WGSL elements are visible to importers. - -* how to re-export elements so that they're visible with a different path or name? -* controlling host visible names like entry points and overrides? -* Should export allow `as` renaming? -* Why not export struct Foo? - * Many current wgsl parsers (including wgpu's naga) would - choke on the unknown attribute as is and feels like having - two export forms is a bit inconsistent. -* Consider changing to public by default for imports within the package only. - * Matches semantics when importing from .wgsl code - * no annotation effort for tiny projects, everything public is fine. - * (Private by default gives a gentle push to programmers to consider their api every time - they add a public annotation.) - * (And the path of laziness leads to undersharing, - which is safer from a maintenance point of view.) - * (less consistent with package visibility.) - * (unexpected if programmers are accustomed to e.g. JavaScript imports.) - - -## Export - -One natural extension is to add explicit exports. -For one, this would allow library authors to hide certain functions or structs from the outside world. -It would also enable re-exports, where a library re-exports a function from another library. - -There are two variations of exports, which could be combined like in Typescript - -### Exporting a list of items - -A standalone export statement simply specifies which globals are exported. -Then, imports can be checked against the list of exports. This is very easy to parse and implement. - -``` -export { foo, bar }; -``` - -And when one wants to export everything defined in the current file, they can use the `*` syntax. - -``` -export *; -``` - -To re-export an item, one can use the same syntax. - -``` -import my/lighting/{ pbr }; - -export { pbr }; -``` - -### Exporting as an attribute - -Exports can also be added as an attribute to the item itself. - -``` -@export -struct Foo { - x: f32; -} - -@export -fn foo() {} -``` - -This is more user friendly, but also more complex to parse. It requires a partial parsing of the WGSL file to find the exports and their names. -A future export specification would include the minimal WGSL syntax that is necessary to implement this. - -## Translating Source File Paths to Import Module Paths - -Tools that look at source code will refer to a `package_root` in `wgsl.toml` that defines -the common prefix of `.wesl` and `.wgsl` files. - -Source directories and files under the `package_root` and map directly to module paths -under the package name as expected. -e.g. - -* Source file `C:\Users\lee\myProj\wgpu\foo.wesl` contains `export fn bar() {}` -* `wgpu` is the `project_root` in `wgsl.toml`. -* The project as published as package `fooz`. -* Other projects can write `import fooz/foo/bar;` to use `bar()`. - -### lib.wgsl - -If there is a file named `lib.wgsl` in the `package_root` directory, -any public exports in `lib.wgsl` are visible at the root of the module. -e.g. if a package ‘pkg’ has a source file `pkg_root/lib.wgsl` -that contains `export fn fun()`, -a module file in another package can import that function with `import pkg/fun`. - -## Libraries and Internal Modules Need Privacy - -We want library publishers to decide carefully what to expose as -part of their public api, so they can upgrade private parts of the library safely. -Smooth upgrades are valuable to the library ecosystem. - -Authors of significant internal modules will similarly want -to make a public vs private distinction to reduce maintenance effort as -the internal module evolves. - -### Private by Default - -We propose that importable elements like functions and structs -be private by default, (i.e. inaccessible from import statements). -The programmer needs to add an annotation to make them public, (i.e. available to import). -The programmer can decide whether the element should be public within the package only -or also public to importers from other packages. Perhaps `@export` and `@export(public)`. - -Importable elements from (unenhanced) .wgsl code may be imported from .wesl functions -in the same package. Elements in .wgsl code are not public from other packages -(.wesl code may reexport .wgsl element for package publishing). diff --git a/syntax.bnf b/syntax.bnf new file mode 100644 index 0000000..50e56da --- /dev/null +++ b/syntax.bnf @@ -0,0 +1,719 @@ +// This file is not directly compatible with existing BNF parsers. Instead, it +// uses a dialect for a concise reading experience, such as pattern literals +// for tokens and generalizing RegEx operations that reduce the need for empty +// alternatives. For details on interpreting this dialect, see 1.2. Syntax +// Notation in WebGPU Shading Language (WGSL) Specification. + +translation_unit : + global_directive * global_decl * +; + +global_directive : + diagnostic_directive +| enable_directive +| requires_directive +; + +global_decl : + ';' +| global_variable_decl ';' +| global_value_decl ';' +| type_alias_decl ';' +| struct_decl +| function_decl +| const_assert_statement ';' +# BEGIN WESL CHANGE +| load_decl ';' +| module_decl +| module_sig_decl +| include_decl +# END WESL CHANGE +; + +bool_literal : + 'true' +| 'false' +; + +int_literal : + decimal_int_literal +| hex_int_literal +; + +decimal_int_literal : + /0[iu]?/ +| /[1-9][0-9]*[iu]?/ +; + +hex_int_literal : + /0[xX][0-9a-fA-F]+[iu]?/ +; + +float_literal : + decimal_float_literal +| hex_float_literal +; + +decimal_float_literal : + /0[fh]/ +| /[1-9][0-9]*[fh]/ +| /[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[fh]?/ +| /[0-9]+\.[0-9]*([eE][+-]?[0-9]+)?[fh]?/ +| /[0-9]+[eE][+-]?[0-9]+[fh]?/ +; + +hex_float_literal : + /0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+([pP][+-]?[0-9]+[fh]?)?/ +| /0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*([pP][+-]?[0-9]+[fh]?)?/ +| /0[xX][0-9a-fA-F]+[pP][+-]?[0-9]+[fh]?/ +; + +diagnostic_directive : + 'diagnostic' diagnostic_control ';' +; + +literal : + int_literal +| float_literal +| bool_literal +; + +ident : + ident_pattern_token _disambiguate_template +; + +member_ident : + ident_pattern_token +; + +diagnostic_name_token : + ident_pattern_token +; + +diagnostic_rule_name : + diagnostic_name_token +| diagnostic_name_token '.' diagnostic_name_token +; + +template_list : + _template_args_start template_arg_comma_list _template_args_end +; + +template_arg_comma_list : + template_arg_expression ( ',' template_arg_expression ) * ',' ? +; + +template_arg_expression : + expression +; + +align_attr : + '@' 'align' '(' expression ',' ? ')' +; + +binding_attr : + '@' 'binding' '(' expression ',' ? ')' +; + +blend_src_attr : + '@' 'blend_src' '(' expression ',' ? ')' +; + +builtin_attr : + '@' 'builtin' '(' builtin_value_name ',' ? ')' +; + +builtin_value_name : + ident_pattern_token +; + +const_attr : + '@' 'const' +; + +diagnostic_attr : + '@' 'diagnostic' diagnostic_control +; + +group_attr : + '@' 'group' '(' expression ',' ? ')' +; + +id_attr : + '@' 'id' '(' expression ',' ? ')' +; + +interpolate_attr : + '@' 'interpolate' '(' interpolate_type_name ',' ? ')' +| '@' 'interpolate' '(' interpolate_type_name ',' interpolate_sampling_name ',' ? ')' +; + +interpolate_type_name : + ident_pattern_token +; + +interpolate_sampling_name : + ident_pattern_token +; + +invariant_attr : + '@' 'invariant' +; + +location_attr : + '@' 'location' '(' expression ',' ? ')' +; + +must_use_attr : + '@' 'must_use' +; + +size_attr : + '@' 'size' '(' expression ',' ? ')' +; + +workgroup_size_attr : + '@' 'workgroup_size' '(' expression ',' ? ')' +| '@' 'workgroup_size' '(' expression ',' expression ',' ? ')' +| '@' 'workgroup_size' '(' expression ',' expression ',' expression ',' ? ')' +; + +vertex_attr : + '@' 'vertex' +; + +fragment_attr : + '@' 'fragment' +; + +compute_attr : + '@' 'compute' +; + +attribute : + '@' ident_pattern_token argument_expression_list ? +| align_attr +| binding_attr +| blend_src_attr +| builtin_attr +| const_attr +| diagnostic_attr +| group_attr +| id_attr +| interpolate_attr +| invariant_attr +| location_attr +| must_use_attr +| size_attr +| workgroup_size_attr +| vertex_attr +| fragment_attr +| compute_attr +; + +diagnostic_control : + '(' severity_control_name ',' diagnostic_rule_name ',' ? ')' +; + +struct_decl : + 'struct' ident struct_body_decl +; + +struct_body_decl : + '{' struct_member ( ',' struct_member ) * ',' ? '}' +; + +struct_member : + attribute * member_ident ':' type_specifier +; + +type_alias_decl : + 'alias' ident (':' type_specifier ('+' type_specifier) *) ? '=' type_specifier +; + +type_specifier : + template_elaborated_ident +; + +# BEGIN WESL CHANGE +template_elaborated_ident : + (module_path '::')? ident _disambiguate_template template_list ? +; +# END WESL CHANGE + +variable_or_value_statement : + variable_decl +| variable_decl '=' expression +| 'let' optionally_typed_ident '=' expression +| 'const' optionally_typed_ident '=' expression +; + +variable_decl : + 'var' _disambiguate_template template_list ? optionally_typed_ident +; + +optionally_typed_ident : + ident ( ':' type_specifier ) ? +; + +global_variable_decl : + attribute * variable_decl ( '=' expression ) ? +; + +# BEGIN WESL CHANGE +const_value_decl : + 'const' optionally_typed_ident '=' expression +; +global_value_decl : + const_value_decl +| attribute * 'override' optionally_typed_ident ( '=' expression ) ? +; + +primary_expression : + template_elaborated_ident +| call_expression +| literal +| paren_expression +; + +call_expression : + call_phrase +; + +call_phrase : + template_elaborated_ident argument_expression_list +; + +paren_expression : + '(' expression ')' +; + +argument_expression_list : + '(' expression_comma_list ? ')' +; + +expression_comma_list : + expression ( ',' expression ) * ',' ? +; + +component_or_swizzle_specifier : + '[' expression ']' component_or_swizzle_specifier ? +| '.' member_ident component_or_swizzle_specifier ? +| '.' swizzle_name component_or_swizzle_specifier ? +; + +unary_expression : + singular_expression +| '-' unary_expression +| '!' unary_expression +| '~' unary_expression +| '*' unary_expression +| '&' unary_expression +; + +singular_expression : + primary_expression component_or_swizzle_specifier ? +; + +lhs_expression : + core_lhs_expression component_or_swizzle_specifier ? +| '*' lhs_expression +| '&' lhs_expression +; + +core_lhs_expression : + ident _disambiguate_template +| '(' lhs_expression ')' +; + +multiplicative_expression : + unary_expression +| multiplicative_expression multiplicative_operator unary_expression +; + +multiplicative_operator : + '*' +| '/' +| '%' +; + +additive_expression : + multiplicative_expression +| additive_expression additive_operator multiplicative_expression +; + +additive_operator : + '+' +| '-' +; + +shift_expression : + additive_expression +| unary_expression _shift_left unary_expression +| unary_expression _shift_right unary_expression +; + +relational_expression : + shift_expression +| shift_expression _less_than shift_expression +| shift_expression _greater_than shift_expression +| shift_expression _less_than_equal shift_expression +| shift_expression _greater_than_equal shift_expression +| shift_expression '==' shift_expression +| shift_expression '!=' shift_expression +; + +short_circuit_and_expression : + relational_expression +| short_circuit_and_expression '&&' relational_expression +; + +short_circuit_or_expression : + relational_expression +| short_circuit_or_expression '||' relational_expression +; + +binary_or_expression : + unary_expression +| binary_or_expression '|' unary_expression +; + +binary_and_expression : + unary_expression +| binary_and_expression '&' unary_expression +; + +binary_xor_expression : + unary_expression +| binary_xor_expression '^' unary_expression +; + +bitwise_expression : + binary_and_expression '&' unary_expression +| binary_or_expression '|' unary_expression +| binary_xor_expression '^' unary_expression +; + +expression : + relational_expression +| short_circuit_or_expression '||' relational_expression +| short_circuit_and_expression '&&' relational_expression +| bitwise_expression +; + +compound_statement : + attribute * '{' statement * '}' +; + +assignment_statement : + lhs_expression ( '=' | compound_assignment_operator ) expression +| '_' '=' expression +; + +compound_assignment_operator : + '+=' +| '-=' +| '*=' +| '/=' +| '%=' +| '&=' +| '|=' +| '^=' +| _shift_right_assign +| _shift_left_assign +; + +increment_statement : + lhs_expression '++' +; + +decrement_statement : + lhs_expression '--' +; + +if_statement : + attribute * if_clause else_if_clause * else_clause ? +; + +if_clause : + 'if' expression compound_statement +; + +else_if_clause : + 'else' 'if' expression compound_statement +; + +else_clause : + 'else' compound_statement +; + +switch_statement : + attribute * 'switch' expression switch_body +; + +switch_body : + attribute * '{' switch_clause + '}' +; + +switch_clause : + case_clause +| default_alone_clause +; + +case_clause : + 'case' case_selectors ':' ? compound_statement +; + +default_alone_clause : + 'default' ':' ? compound_statement +; + +case_selectors : + case_selector ( ',' case_selector ) * ',' ? +; + +case_selector : + 'default' +| expression +; + +loop_statement : + attribute * 'loop' attribute * '{' statement * continuing_statement ? '}' +; + +for_statement : + attribute * 'for' '(' for_header ')' compound_statement +; + +for_header : + for_init ? ';' expression ? ';' for_update ? +; + +for_init : + variable_or_value_statement +| variable_updating_statement +| func_call_statement +; + +for_update : + variable_updating_statement +| func_call_statement +; + +while_statement : + attribute * 'while' expression compound_statement +; + +break_statement : + 'break' +; + +break_if_statement : + 'break' 'if' expression ';' +; + +continue_statement : + 'continue' +; + +continuing_statement : + 'continuing' continuing_compound_statement +; + +continuing_compound_statement : + attribute * '{' statement * break_if_statement ? '}' +; + +return_statement : + 'return' expression ? +; + +func_call_statement : + call_phrase +; + +const_assert_statement : + 'const_assert' expression +; + +statement : + ';' +| return_statement ';' +| if_statement +| switch_statement +| loop_statement +| for_statement +| while_statement +| func_call_statement ';' +| variable_or_value_statement ';' +| break_statement ';' +| continue_statement ';' +| 'discard' ';' +| variable_updating_statement ';' +| compound_statement +| const_assert_statement ';' +; + +variable_updating_statement : + assignment_statement +| increment_statement +| decrement_statement +; + +function_decl : + attribute * 'patch' ? function_header compound_statement +; + +function_header : + 'fn' ident '(' param_list ? ')' ( '->' attribute * template_elaborated_ident ) ? +; + +param_list : + param ( ',' param ) * ',' ? +; + +param : + attribute * ident ':' type_specifier +; + +enable_directive : + 'enable' enable_extension_list ';' +; + +enable_extension_list : + enable_extension_name ( ',' enable_extension_name ) * ',' ? +; + +requires_directive : + 'requires' software_extension_list ';' +; + +software_extension_list : + software_extension_name ( ',' software_extension_name ) * ',' ? +; + +enable_extension_name : + ident_pattern_token +; + +software_extension_name : + ident_pattern_token +; + +ident_pattern_token : + /([_\p{XID_Start}][\p{XID_Continue}]+)|([\p{XID_Start}])/u +; + +severity_control_name : + ident_pattern_token +; + +swizzle_name : + /[rgba]/ +| /[rgba][rgba]/ +| /[rgba][rgba][rgba]/ +| /[rgba][rgba][rgba][rgba]/ +| /[xyzw]/ +| /[xyzw][xyzw]/ +| /[xyzw][xyzw][xyzw]/ +| /[xyzw][xyzw][xyzw][xyzw]/ +; + +# BEGIN WESL +load_decl: +| attribute * 'load' load_relative? load_path +; + +load_relative: +| ('.' | '..') '/' ('..' '/')* +| '/' +; + +load_path: +| ident ('/' ident)* +; + +include_decl : + 'include' module_path ';' +; + +module_member_decl : + ';' +| global_variable_decl ';' +| const_value_decl ';' +| type_alias_decl ';' +| struct_decl +| function_decl +| const_assert_statement ';' +| module_decl +| module_sig_decl +; + +module_sig_decl : + attribute * 'mod' 'sig' ident _disambiguate_template module_template_param_list + '{' global_sig * '}' + ';'? +; + +global_sig : + | function_sig + | nested_mod_sig + | global_variable_sig + | global_const_value_sig + | associated_type_sig + | include_decl +; + +function_sig : + attribute * function_header ';' + +nested_mod_sig : + attribute * 'mod' ident ':' module_type_specifier_set ';' +; + +global_variable_sig : + attribute * variable_decl ? ';' +; + +global_const_value_sig : + 'const' ident ( ':' type_specifier ) ';' +; + +associated_type_sig : + attribute * 'type' optionally_typed_ident ';' +; + +module_decl: + attribute * 'mod' ident _disambiguate_template module_template_param_list ('->' module_type_specifier_set)? + "{" module_member_decl * "}" + ";"? +; + +module_type_specifier_set : + module_type_specifier ('+' module_type_specifier) * +; + +module_template_param_list : + _template_args_start module_template_param_comma_list _template_args_end +; + +module_template_param_comma_list : + module_template_param ( ',' module_template_param ) * ',' ? +; + +module_template_param : + attribute * ident ':' module_type_specifier_set +; + +module_type_specifier : + (module_path '::')? ident _disambiguate_template template_list ? +; + +module_path : + module_path_part ('::' module_path_part)* +; + +module_path_part : + ident _disambiguate_template template_list ? +; +# END WESL \ No newline at end of file