Skip to content

Commit

Permalink
Refactor binary parsing (#63)
Browse files Browse the repository at this point in the history
* Refactor binary parsing

* Update pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/tags.md

Co-authored-by: Stephen Skeirik <[email protected]>

---------

Co-authored-by: Stephen Skeirik <[email protected]>
  • Loading branch information
virgil-serbanuta and sskeirik authored Jan 16, 2025
1 parent 3715249 commit 41cdcf1
Show file tree
Hide file tree
Showing 13 changed files with 1,142 additions and 988 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ module BINARY-PARSER-TEST
imports BINARY-PARSER-TEST-SYNTAX
imports BINARY-PARSER
imports BINARY-PARSER-SYNTAX
imports BINARY-PARSER-MODULE-TEST-SYNTAX
imports BINARY-PARSER-SECTION-SYNTAX
syntax KItem ::= parseBinary(Bytes)
Expand Down
1,017 changes: 29 additions & 988 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parser.md

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/base.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
```k
module BINARY-PARSER-BASE-SYNTAX
imports BYTES
imports INT
imports LIST
imports STRING
syntax BytesWithIndex ::= bwi(Bytes, Int)
syntax ParseError ::= parseError(String, List)
syntax BytesWithIndexOrError ::= BytesWithIndex | ParseError
endmodule
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Skipping over a constant prefix in the input.

```k
module BINARY-PARSER-CONSTANT-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
syntax BytesWithIndexOrError ::= parseConstant(BytesWithIndexOrError, Bytes) [function, total]
endmodule
module BINARY-PARSER-CONSTANT [private]
imports BINARY-PARSER-CONSTANT-SYNTAX
imports BOOL
imports K-EQUAL
rule parseConstant(bwi(Buffer:Bytes, Index:Int), Constant:Bytes)
=> bwi(Buffer, Index +Int lengthBytes(Constant))
requires Index +Int lengthBytes(Constant) <=Int lengthBytes(Buffer)
andBool substrBytes(Buffer, Index, Index +Int lengthBytes(Constant)) ==K Constant
rule parseConstant(bwi(Buffer:Bytes, Index:Int), Constant:Bytes)
=> parseError("parseConstant", ListItem(lengthBytes(Buffer)) ListItem(Index) ListItem(Constant) ListItem(Buffer))
[owise]
rule parseConstant(E:ParseError, _:Bytes) => E
endmodule
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
```k
module BINARY-PARSER-DEFN-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports WASM-COMMON-SYNTAX
syntax DefnResult ::= defnResult(Defn, BytesWithIndex) | ParseError
endmodule
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
Parsing a [function type](https://webassembly.github.io/spec/core/binary/types.html#binary-functype).

```k
module BINARY-PARSER-FUNCTYPE-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports BINARY-PARSER-DEFN-SYNTAX
syntax DefnResult ::= parseFuncType(BytesWithIndex) [function, total]
endmodule
module BINARY-PARSER-FUNCTYPE [private]
imports BINARY-PARSER-CONSTANT-SYNTAX
imports BINARY-PARSER-FUNCTYPE-SYNTAX
imports BINARY-PARSER-RESULTTYPE-SYNTAX
imports BINARY-PARSER-TAGS
imports WASM
syntax DefnResult ::= #parseFuncType(BytesWithIndexOrError) [function, total]
| #parseFuncType1(ResultTypeResult) [function, total]
| #parseFuncType2(VecType, ResultTypeResult) [function, total]
rule parseFuncType(BWI:BytesWithIndex) => #parseFuncType(parseConstant(BWI, TYPE_FUN))
rule #parseFuncType(BWI:BytesWithIndex) => #parseFuncType1(parseResultType(BWI))
rule #parseFuncType(E:ParseError) => E
rule #parseFuncType1(resultTypeResult(V:VecType, BWI:BytesWithIndex))
=> #parseFuncType2(V, parseResultType(BWI))
rule #parseFuncType1(E:ParseError) => E
rule #parseFuncType2(V1:VecType, resultTypeResult(V2:VecType, BWI:BytesWithIndex))
=> defnResult(#type(V1 -> V2, ), BWI:BytesWithIndex)
rule #parseFuncType2(_:VecType, E:ParseError) => E
endmodule
```
57 changes: 57 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/int.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
See the [LEB128 encoding](https://en.wikipedia.org/wiki/LEB128) for more details.

```k
module BINARY-PARSER-INT-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
syntax IntResult ::= intResult(value:Int, remainder:BytesWithIndex) | ParseError
syntax IntResult ::= parseLeb128UInt(BytesWithIndex) [function, total]
// TODO: Rename to IntVec
syntax IntList ::= List{Int, ":"}
syntax IntListResult ::= intListResult(IntList, BytesWithIndex) | ParseError
endmodule
module BINARY-PARSER-INT [private]
imports BINARY-PARSER-INT-SYNTAX
imports BOOL
syntax Bool ::= bit8IsSet(Int) [function, total]
rule bit8IsSet(I:Int) => I &Int 128 =/=Int 0
syntax Int ::= clearBit8(Int) [function, total]
rule clearBit8(I:Int) => I ^Int (I &Int 128)
syntax IntListResult ::= parseLeb128IntChunks(BytesWithIndex) [function, total]
| #parseLeb128IntChunks(Int, IntListResult) [function, total]
rule parseLeb128IntChunks(bwi(Buffer:Bytes, I:Int))
=> parseError("parseLeb128IntChunks", ListItem(lengthBytes(Buffer)) ListItem(I) ListItem(Buffer))
requires I <Int 0 orBool lengthBytes(Buffer) <=Int I
rule parseLeb128IntChunks(bwi(Buffer:Bytes, I:Int))
=> #parseLeb128IntChunks(clearBit8(Buffer[I]), parseLeb128IntChunks(bwi(Buffer, I +Int 1)))
requires 0 <=Int I andBool I <Int lengthBytes(Buffer)
andBool bit8IsSet(Buffer[I])
rule parseLeb128IntChunks(bwi(Buffer:Bytes, I:Int))
=> intListResult(Buffer[I] : .IntList, bwi(Buffer, I +Int 1))
requires 0 <=Int I andBool I <Int lengthBytes(Buffer)
andBool notBool bit8IsSet(Buffer[I])
rule #parseLeb128IntChunks(Value:Int, intListResult(L:IntList, BWI:BytesWithIndex))
=> intListResult(Value : L, BWI)
rule #parseLeb128IntChunks(_Value:Int, E:ParseError) => E
syntax IntResult ::= #parseLeb128UInt(IntListResult) [function, total]
rule parseLeb128UInt(BWI:BytesWithIndex) => #parseLeb128UInt(parseLeb128IntChunks(BWI))
rule #parseLeb128UInt(intListResult(L:IntList, BWI:BytesWithIndex))
=> intResult(buildLeb128UInt(L), BWI)
rule #parseLeb128UInt(E:ParseError) => E
syntax Int ::= buildLeb128UInt(IntList) [function, total]
rule buildLeb128UInt(.IntList) => 0
rule buildLeb128UInt(Value:Int : L:IntList) => Value +Int 128 *Int buildLeb128UInt(L)
endmodule
```
74 changes: 74 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/module.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
[Module](https://webassembly.github.io/spec/core/binary/modules.html) parsing

```k
module BINARY-PARSER-MODULE-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports WASM
syntax ModuleResult ::= moduleResult(ModuleDecl, BytesWithIndex) | ParseError
syntax ModuleResult ::= parseModule(BytesWithIndex) [function, total]
endmodule
module BINARY-PARSER-MODULE-TEST-SYNTAX
imports BOOL-SYNTAX
imports WASM
syntax ModuleDecl ::= addDefnToModule(Bool, Defn, ModuleDecl) [function, total]
endmodule
module BINARY-PARSER-MODULE [private]
imports BINARY-PARSER-CONSTANT-SYNTAX
imports BINARY-PARSER-MODULE-SYNTAX
imports BINARY-PARSER-MODULE-TEST-SYNTAX
imports BINARY-PARSER-SECTION-SYNTAX
imports BINARY-PARSER-TAGS
rule parseModule(BWI:BytesWithIndex)
=> parseModuleSections(reverseSections(splitSections(parseConstant(parseConstant(BWI, MAGIC), VERSION))))
syntax ModuleResult ::= parseModuleSections(UnparsedSectionsResult) [function, total]
| #parseModuleSections(ParsedSectionsResult, BytesWithIndex) [function, total]
rule parseModuleSections(unparsedSectionsResult(S:UnparsedSections, BWI:BytesWithIndex))
=> #parseModuleSections(parseSections(S), BWI)
rule parseModuleSections(E:ParseError) => E
rule #parseModuleSections(S:Sections, BWI:BytesWithIndex)
=> moduleResult
( addSectionsToModule
( S // Do not reverse, as required by addDefnToModule.
, #module
(... types: .Defns
, funcs: .Defns
, tables: .Defns
, mems: .Defns
, globals: .Defns
, elem: .Defns
, data: .Defns
, start: .Defns
, importDefns: .Defns
, exports: .Defns
, metadata: #meta(... id: , funcIds: .Map, filename: .String)
)
), BWI)
rule #parseModuleSections(E:ParseError, _:BytesWithIndex) => E
syntax ModuleDecl ::= addSectionsToModule(Sections, ModuleDecl) [function, total]
rule addSectionsToModule(.Sections, M:ModuleDecl) => M
rule addSectionsToModule(S:Section : Ss:Sections, M:ModuleDecl)
=> addSectionsToModule(Ss, addSectionToModule(S, M))
syntax ModuleDecl ::= addSectionToModule(Section, ModuleDecl) [function, total]
rule addSectionToModule(customSection(_:Bytes), M:ModuleDecl) => M
rule addSectionToModule(defnsSection(D:Defns), M:ModuleDecl) => addDefnsToModule(D, M)
syntax ModuleDecl ::= addDefnsToModule(Defns, ModuleDecl) [function, total]
rule addDefnsToModule(.Defns, M:ModuleDecl) => M
rule addDefnsToModule(D:Defn Ds:Defns, M:ModuleDecl)
=> addDefnsToModule(Ds, addDefnToModule(false, D, M))
rule addDefnToModule(true, _, M) => M
// The following add the defn at the top of the existing defns (e.g. T Ts), so
// addDefnToModule should be called with Defns in reverse order
// (the last defn should be processed in the first call).
rule addDefnToModule(false => true, T:TypeDefn, #module(... types: Ts => T Ts))
endmodule
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Parsing a [result type](https://webassembly.github.io/spec/core/binary/types.html#binary-resulttype).

```k
module BINARY-PARSER-RESULTTYPE-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports WASM-DATA-COMMON
syntax ResultTypeResult ::= resultTypeResult(VecType, BytesWithIndex) | ParseError
syntax ResultTypeResult ::= parseResultType(BytesWithIndex) [function, total]
endmodule
module BINARY-PARSER-RESULTTYPE [private]
imports BINARY-PARSER-INT-SYNTAX
imports BINARY-PARSER-RESULTTYPE-SYNTAX
imports BINARY-PARSER-VALTYPE-SYNTAX
syntax ResultTypeResult ::= #parseResultType(IntResult) [function, total]
| #parseResultType1(ValTypesResult) [function, total]
rule parseResultType(BWI:BytesWithIndex) => #parseResultType(parseLeb128UInt(BWI))
rule #parseResultType(intResult(Count:Int, BWI:BytesWithIndex))
=> #parseResultType1(parseValTypes(Count, .ValTypes, BWI))
rule #parseResultType(E:ParseError) => E
rule #parseResultType1(valTypesResult(V:ValTypes, BWI:BytesWithIndex))
=> resultTypeResult([V], BWI)
rule #parseResultType1(E:ParseError) => E
endmodule
```
106 changes: 106 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/section.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
Splitting a module into [sections](https://webassembly.github.io/spec/core/binary/modules.html#sections).

```k
module BINARY-PARSER-SECTION-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports WASM-COMMON-SYNTAX
syntax Section ::= customSection(Bytes)
| defnsSection(reverseDefns:Defns)
syntax SectionResult ::= sectionResult(Section, BytesWithIndex) | ParseError
syntax SectionResult ::= parseSection(UnparsedSection) [function, total]
syntax Sections ::= List{Section, ":"}
syntax ParsedSectionsResult ::= Sections | ParseError
syntax ParsedSectionsResult ::= parseSections(UnparsedSections) [function, total]
syntax UnparsedSection ::= unparsedSection(sectionId:Int, sectionData:Bytes)
syntax UnparsedSections ::= List{UnparsedSection, ":"}
syntax UnparsedSectionsResult ::= unparsedSectionsResult(UnparsedSections, BytesWithIndex) | ParseError
syntax UnparsedSectionsResult ::= splitSections(BytesWithIndexOrError) [function, total]
syntax UnparsedSectionsResult ::= reverseSections(UnparsedSectionsResult) [function, total]
endmodule
module BINARY-PARSER-SECTION [private]
imports BOOL
imports BINARY-PARSER-INT-SYNTAX
imports BINARY-PARSER-SECTION-SYNTAX
imports BINARY-PARSER-TAGS
imports BINARY-PARSER-TYPE-SECTION-SYNTAX
imports K-EQUAL-SYNTAX
rule parseSection(unparsedSection(CUSTOM_SEC, Data:Bytes))
=> sectionResult(customSection(Data), bwi(Data, lengthBytes(Data)))
rule parseSection(unparsedSection(TYPE_SEC, Data:Bytes))
=> parseTypeSection(bwi(Data, 0))
// rule parseSection(A) => parseError("parseSection", ListItem(A))
// [owise]
syntax ParsedSectionsResult ::= #parseSections(SectionResult, ParsedSectionsResult) [function, total]
rule parseSections(.UnparsedSections) => .Sections
rule parseSections(S:UnparsedSection : Ss:UnparsedSections)
=> #parseSections(parseSection(S), parseSections(Ss))
rule #parseSections(sectionResult(S:Section, bwi(B:Bytes, I:Int)), Ss:Sections)
=> S : Ss
requires I ==K lengthBytes(B)
rule #parseSections(E:ParseError, _) => E
rule #parseSections(_, E:ParseError) => E
[owise]
syntax UnparsedSectionResult ::= unparsedSectionResult(UnparsedSection, BytesWithIndex) | ParseError
syntax UnparsedSectionResult ::= splitSection(BytesWithIndex) [function, total]
syntax UnparsedSectionResult ::= #splitSection(sectionId:Int, IntResult) [function, total]
rule splitSection(bwi(Buffer:Bytes, Index:Int))
=> #splitSection(Buffer[Index], parseLeb128UInt(bwi(Buffer, Index +Int 1)))
requires Index <Int lengthBytes(Buffer)
rule #splitSection(SectionId, intResult(SectionLength:Int, bwi(Buffer:Bytes, Index:Int)))
=> unparsedSectionResult
( unparsedSection(SectionId, substrBytes(Buffer, Index, Index +Int SectionLength))
, bwi(Buffer, Index +Int SectionLength)
)
requires 0 <=Int Index andBool Index +Int SectionLength <=Int lengthBytes(Buffer)
rule #splitSection(SectionId:Int, intResult(SectionLength:Int, bwi(Buffer:Bytes, Index:Int)))
=> parseError("splitSection", ListItem(SectionId) ListItem(SectionLength) ListItem(Index) ListItem(lengthBytes(Buffer)) ListItem(Buffer))
[owise]
rule #splitSection(_SectionId:Int, E:ParseError) => E
syntax UnparsedSections ::= reverse(UnparsedSections) [function, total]
| #reverse(UnparsedSections, UnparsedSections) [function, total]
rule reverse(S:UnparsedSections) => #reverse(S, .UnparsedSections)
rule #reverse(.UnparsedSections, S:UnparsedSections) => S
rule #reverse(S:UnparsedSection : Ss1:UnparsedSections, Ss2:UnparsedSections) => #reverse(Ss1, S: Ss2)
rule reverseSections(unparsedSectionsResult(S:UnparsedSections, BWI:BytesWithIndex))
=> unparsedSectionsResult(reverse(S), BWI)
rule reverseSections(E:ParseError) => E
syntax UnparsedSectionsResult ::= #splitSections(UnparsedSectionResult) [function, total]
| concatOrError(UnparsedSection, UnparsedSectionsResult) [function, total]
rule splitSections(bwi(B:Bytes, Index:Int))
=> unparsedSectionsResult(.UnparsedSections, bwi(B:Bytes, Index:Int))
requires Index ==K lengthBytes(B)
rule splitSections(BWI:BytesWithIndex)
=> #splitSections(splitSection(BWI))
[owise]
rule splitSections(E:ParseError) => E
rule #splitSections(unparsedSectionResult(S:UnparsedSection, BWI:BytesWithIndex))
=> concatOrError(S, splitSections(BWI))
rule #splitSections(E:ParseError) => E
rule concatOrError(S:UnparsedSection, unparsedSectionsResult(Ss:UnparsedSections, BWI:BytesWithIndex))
=> unparsedSectionsResult(S : Ss, BWI)
rule concatOrError(_:UnparsedSection, E:ParseError) => E
endmodule
```
Loading

0 comments on commit 41cdcf1

Please sign in to comment.