Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor binary parsing #63

Merged
merged 2 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ module BINARY-PARSER-TEST
imports BINARY-PARSER-TEST-SYNTAX

imports BINARY-PARSER
imports BINARY-PARSER-SYNTAX
imports BINARY-PARSER-MODULE-TEST-SYNTAX
imports BINARY-PARSER-SECTION-SYNTAX

syntax KItem ::= parseBinary(Bytes)

Expand Down
1,017 changes: 29 additions & 988 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parser.md

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/base.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
```k

module BINARY-PARSER-BASE-SYNTAX
imports BYTES
imports INT
imports LIST
imports STRING

syntax BytesWithIndex ::= bwi(Bytes, Int)
syntax ParseError ::= parseError(String, List)

syntax BytesWithIndexOrError ::= BytesWithIndex | ParseError

endmodule

```
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Skipping over a constant prefix in the input.

```k
module BINARY-PARSER-CONSTANT-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX

syntax BytesWithIndexOrError ::= parseConstant(BytesWithIndexOrError, Bytes) [function, total]
endmodule

module BINARY-PARSER-CONSTANT [private]
imports BINARY-PARSER-CONSTANT-SYNTAX
imports BOOL
imports K-EQUAL

rule parseConstant(bwi(Buffer:Bytes, Index:Int), Constant:Bytes)
=> bwi(Buffer, Index +Int lengthBytes(Constant))
requires Index +Int lengthBytes(Constant) <=Int lengthBytes(Buffer)
andBool substrBytes(Buffer, Index, Index +Int lengthBytes(Constant)) ==K Constant
rule parseConstant(bwi(Buffer:Bytes, Index:Int), Constant:Bytes)
=> parseError("parseConstant", ListItem(lengthBytes(Buffer)) ListItem(Index) ListItem(Constant) ListItem(Buffer))
[owise]
rule parseConstant(E:ParseError, _:Bytes) => E

endmodule
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
```k
module BINARY-PARSER-DEFN-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports WASM-COMMON-SYNTAX

syntax DefnResult ::= defnResult(Defn, BytesWithIndex) | ParseError
endmodule
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
Parsing a [function type](https://webassembly.github.io/spec/core/binary/types.html#binary-functype).

```k
module BINARY-PARSER-FUNCTYPE-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports BINARY-PARSER-DEFN-SYNTAX

syntax DefnResult ::= parseFuncType(BytesWithIndex) [function, total]
endmodule

module BINARY-PARSER-FUNCTYPE [private]
imports BINARY-PARSER-CONSTANT-SYNTAX
imports BINARY-PARSER-FUNCTYPE-SYNTAX
imports BINARY-PARSER-RESULTTYPE-SYNTAX
imports BINARY-PARSER-TAGS
imports WASM

syntax DefnResult ::= #parseFuncType(BytesWithIndexOrError) [function, total]
| #parseFuncType1(ResultTypeResult) [function, total]
| #parseFuncType2(VecType, ResultTypeResult) [function, total]

rule parseFuncType(BWI:BytesWithIndex) => #parseFuncType(parseConstant(BWI, TYPE_FUN))
rule #parseFuncType(BWI:BytesWithIndex) => #parseFuncType1(parseResultType(BWI))
rule #parseFuncType(E:ParseError) => E
rule #parseFuncType1(resultTypeResult(V:VecType, BWI:BytesWithIndex))
=> #parseFuncType2(V, parseResultType(BWI))
rule #parseFuncType1(E:ParseError) => E
rule #parseFuncType2(V1:VecType, resultTypeResult(V2:VecType, BWI:BytesWithIndex))
=> defnResult(#type(V1 -> V2, ), BWI:BytesWithIndex)
rule #parseFuncType2(_:VecType, E:ParseError) => E

endmodule
```
57 changes: 57 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/int.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
See the [LEB128 encoding](https://en.wikipedia.org/wiki/LEB128) for more details.

```k
module BINARY-PARSER-INT-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX

syntax IntResult ::= intResult(value:Int, remainder:BytesWithIndex) | ParseError
syntax IntResult ::= parseLeb128UInt(BytesWithIndex) [function, total]

// TODO: Rename to IntVec
syntax IntList ::= List{Int, ":"}
syntax IntListResult ::= intListResult(IntList, BytesWithIndex) | ParseError
endmodule

module BINARY-PARSER-INT [private]
imports BINARY-PARSER-INT-SYNTAX
imports BOOL

syntax Bool ::= bit8IsSet(Int) [function, total]
rule bit8IsSet(I:Int) => I &Int 128 =/=Int 0

syntax Int ::= clearBit8(Int) [function, total]
rule clearBit8(I:Int) => I ^Int (I &Int 128)

syntax IntListResult ::= parseLeb128IntChunks(BytesWithIndex) [function, total]
| #parseLeb128IntChunks(Int, IntListResult) [function, total]

rule parseLeb128IntChunks(bwi(Buffer:Bytes, I:Int))
=> parseError("parseLeb128IntChunks", ListItem(lengthBytes(Buffer)) ListItem(I) ListItem(Buffer))
requires I <Int 0 orBool lengthBytes(Buffer) <=Int I
rule parseLeb128IntChunks(bwi(Buffer:Bytes, I:Int))
=> #parseLeb128IntChunks(clearBit8(Buffer[I]), parseLeb128IntChunks(bwi(Buffer, I +Int 1)))
requires 0 <=Int I andBool I <Int lengthBytes(Buffer)
andBool bit8IsSet(Buffer[I])
rule parseLeb128IntChunks(bwi(Buffer:Bytes, I:Int))
=> intListResult(Buffer[I] : .IntList, bwi(Buffer, I +Int 1))
requires 0 <=Int I andBool I <Int lengthBytes(Buffer)
andBool notBool bit8IsSet(Buffer[I])

rule #parseLeb128IntChunks(Value:Int, intListResult(L:IntList, BWI:BytesWithIndex))
=> intListResult(Value : L, BWI)
rule #parseLeb128IntChunks(_Value:Int, E:ParseError) => E


syntax IntResult ::= #parseLeb128UInt(IntListResult) [function, total]

rule parseLeb128UInt(BWI:BytesWithIndex) => #parseLeb128UInt(parseLeb128IntChunks(BWI))
rule #parseLeb128UInt(intListResult(L:IntList, BWI:BytesWithIndex))
=> intResult(buildLeb128UInt(L), BWI)
rule #parseLeb128UInt(E:ParseError) => E

syntax Int ::= buildLeb128UInt(IntList) [function, total]
rule buildLeb128UInt(.IntList) => 0
rule buildLeb128UInt(Value:Int : L:IntList) => Value +Int 128 *Int buildLeb128UInt(L)

endmodule
```
74 changes: 74 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/module.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
[Module](https://webassembly.github.io/spec/core/binary/modules.html) parsing

```k
module BINARY-PARSER-MODULE-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports WASM

syntax ModuleResult ::= moduleResult(ModuleDecl, BytesWithIndex) | ParseError

syntax ModuleResult ::= parseModule(BytesWithIndex) [function, total]
endmodule

module BINARY-PARSER-MODULE-TEST-SYNTAX
imports BOOL-SYNTAX
imports WASM

syntax ModuleDecl ::= addDefnToModule(Bool, Defn, ModuleDecl) [function, total]
endmodule

module BINARY-PARSER-MODULE [private]
imports BINARY-PARSER-CONSTANT-SYNTAX
imports BINARY-PARSER-MODULE-SYNTAX
imports BINARY-PARSER-MODULE-TEST-SYNTAX
imports BINARY-PARSER-SECTION-SYNTAX
imports BINARY-PARSER-TAGS

rule parseModule(BWI:BytesWithIndex)
=> parseModuleSections(reverseSections(splitSections(parseConstant(parseConstant(BWI, MAGIC), VERSION))))

syntax ModuleResult ::= parseModuleSections(UnparsedSectionsResult) [function, total]
| #parseModuleSections(ParsedSectionsResult, BytesWithIndex) [function, total]

rule parseModuleSections(unparsedSectionsResult(S:UnparsedSections, BWI:BytesWithIndex))
=> #parseModuleSections(parseSections(S), BWI)
rule parseModuleSections(E:ParseError) => E
rule #parseModuleSections(S:Sections, BWI:BytesWithIndex)
=> moduleResult
( addSectionsToModule
( S // Do not reverse, as required by addDefnToModule.
, #module
(... types: .Defns
, funcs: .Defns
, tables: .Defns
, mems: .Defns
, globals: .Defns
, elem: .Defns
, data: .Defns
, start: .Defns
, importDefns: .Defns
, exports: .Defns
, metadata: #meta(... id: , funcIds: .Map, filename: .String)
)
), BWI)
rule #parseModuleSections(E:ParseError, _:BytesWithIndex) => E

syntax ModuleDecl ::= addSectionsToModule(Sections, ModuleDecl) [function, total]
rule addSectionsToModule(.Sections, M:ModuleDecl) => M
rule addSectionsToModule(S:Section : Ss:Sections, M:ModuleDecl)
=> addSectionsToModule(Ss, addSectionToModule(S, M))
syntax ModuleDecl ::= addSectionToModule(Section, ModuleDecl) [function, total]
rule addSectionToModule(customSection(_:Bytes), M:ModuleDecl) => M
rule addSectionToModule(defnsSection(D:Defns), M:ModuleDecl) => addDefnsToModule(D, M)
syntax ModuleDecl ::= addDefnsToModule(Defns, ModuleDecl) [function, total]
rule addDefnsToModule(.Defns, M:ModuleDecl) => M
rule addDefnsToModule(D:Defn Ds:Defns, M:ModuleDecl)
=> addDefnsToModule(Ds, addDefnToModule(false, D, M))

rule addDefnToModule(true, _, M) => M
// The following add the defn at the top of the existing defns (e.g. T Ts), so
// addDefnToModule should be called with Defns in reverse order
// (the last defn should be processed in the first call).
rule addDefnToModule(false => true, T:TypeDefn, #module(... types: Ts => T Ts))
endmodule
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Parsing a [result type](https://webassembly.github.io/spec/core/binary/types.html#binary-resulttype).

```k
module BINARY-PARSER-RESULTTYPE-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports WASM-DATA-COMMON

syntax ResultTypeResult ::= resultTypeResult(VecType, BytesWithIndex) | ParseError
syntax ResultTypeResult ::= parseResultType(BytesWithIndex) [function, total]
endmodule

module BINARY-PARSER-RESULTTYPE [private]
imports BINARY-PARSER-INT-SYNTAX
imports BINARY-PARSER-RESULTTYPE-SYNTAX
imports BINARY-PARSER-VALTYPE-SYNTAX

syntax ResultTypeResult ::= #parseResultType(IntResult) [function, total]
| #parseResultType1(ValTypesResult) [function, total]

rule parseResultType(BWI:BytesWithIndex) => #parseResultType(parseLeb128UInt(BWI))
rule #parseResultType(intResult(Count:Int, BWI:BytesWithIndex))
=> #parseResultType1(parseValTypes(Count, .ValTypes, BWI))
rule #parseResultType(E:ParseError) => E
rule #parseResultType1(valTypesResult(V:ValTypes, BWI:BytesWithIndex))
=> resultTypeResult([V], BWI)
rule #parseResultType1(E:ParseError) => E
endmodule
```
106 changes: 106 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/section.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
Splitting a module into [sections](https://webassembly.github.io/spec/core/binary/modules.html#sections).

```k
module BINARY-PARSER-SECTION-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports WASM-COMMON-SYNTAX

syntax Section ::= customSection(Bytes)
| defnsSection(reverseDefns:Defns)

syntax SectionResult ::= sectionResult(Section, BytesWithIndex) | ParseError
syntax SectionResult ::= parseSection(UnparsedSection) [function, total]

syntax Sections ::= List{Section, ":"}
syntax ParsedSectionsResult ::= Sections | ParseError
syntax ParsedSectionsResult ::= parseSections(UnparsedSections) [function, total]

syntax UnparsedSection ::= unparsedSection(sectionId:Int, sectionData:Bytes)

syntax UnparsedSections ::= List{UnparsedSection, ":"}
syntax UnparsedSectionsResult ::= unparsedSectionsResult(UnparsedSections, BytesWithIndex) | ParseError
syntax UnparsedSectionsResult ::= splitSections(BytesWithIndexOrError) [function, total]

syntax UnparsedSectionsResult ::= reverseSections(UnparsedSectionsResult) [function, total]
endmodule

module BINARY-PARSER-SECTION [private]
imports BOOL
imports BINARY-PARSER-INT-SYNTAX
imports BINARY-PARSER-SECTION-SYNTAX
imports BINARY-PARSER-TAGS
imports BINARY-PARSER-TYPE-SECTION-SYNTAX
imports K-EQUAL-SYNTAX

rule parseSection(unparsedSection(CUSTOM_SEC, Data:Bytes))
=> sectionResult(customSection(Data), bwi(Data, lengthBytes(Data)))
rule parseSection(unparsedSection(TYPE_SEC, Data:Bytes))
=> parseTypeSection(bwi(Data, 0))
// rule parseSection(A) => parseError("parseSection", ListItem(A))
// [owise]


syntax ParsedSectionsResult ::= #parseSections(SectionResult, ParsedSectionsResult) [function, total]
rule parseSections(.UnparsedSections) => .Sections
rule parseSections(S:UnparsedSection : Ss:UnparsedSections)
=> #parseSections(parseSection(S), parseSections(Ss))
rule #parseSections(sectionResult(S:Section, bwi(B:Bytes, I:Int)), Ss:Sections)
=> S : Ss
requires I ==K lengthBytes(B)
rule #parseSections(E:ParseError, _) => E
rule #parseSections(_, E:ParseError) => E
[owise]


syntax UnparsedSectionResult ::= unparsedSectionResult(UnparsedSection, BytesWithIndex) | ParseError
syntax UnparsedSectionResult ::= splitSection(BytesWithIndex) [function, total]

syntax UnparsedSectionResult ::= #splitSection(sectionId:Int, IntResult) [function, total]

rule splitSection(bwi(Buffer:Bytes, Index:Int))
=> #splitSection(Buffer[Index], parseLeb128UInt(bwi(Buffer, Index +Int 1)))
requires Index <Int lengthBytes(Buffer)

rule #splitSection(SectionId, intResult(SectionLength:Int, bwi(Buffer:Bytes, Index:Int)))
=> unparsedSectionResult
( unparsedSection(SectionId, substrBytes(Buffer, Index, Index +Int SectionLength))
, bwi(Buffer, Index +Int SectionLength)
)
requires 0 <=Int Index andBool Index +Int SectionLength <=Int lengthBytes(Buffer)
rule #splitSection(SectionId:Int, intResult(SectionLength:Int, bwi(Buffer:Bytes, Index:Int)))
=> parseError("splitSection", ListItem(SectionId) ListItem(SectionLength) ListItem(Index) ListItem(lengthBytes(Buffer)) ListItem(Buffer))
[owise]
rule #splitSection(_SectionId:Int, E:ParseError) => E


syntax UnparsedSections ::= reverse(UnparsedSections) [function, total]
| #reverse(UnparsedSections, UnparsedSections) [function, total]
rule reverse(S:UnparsedSections) => #reverse(S, .UnparsedSections)
rule #reverse(.UnparsedSections, S:UnparsedSections) => S
rule #reverse(S:UnparsedSection : Ss1:UnparsedSections, Ss2:UnparsedSections) => #reverse(Ss1, S: Ss2)

rule reverseSections(unparsedSectionsResult(S:UnparsedSections, BWI:BytesWithIndex))
=> unparsedSectionsResult(reverse(S), BWI)
rule reverseSections(E:ParseError) => E

syntax UnparsedSectionsResult ::= #splitSections(UnparsedSectionResult) [function, total]
| concatOrError(UnparsedSection, UnparsedSectionsResult) [function, total]

rule splitSections(bwi(B:Bytes, Index:Int))
=> unparsedSectionsResult(.UnparsedSections, bwi(B:Bytes, Index:Int))
requires Index ==K lengthBytes(B)
rule splitSections(BWI:BytesWithIndex)
=> #splitSections(splitSection(BWI))
[owise]
rule splitSections(E:ParseError) => E

rule #splitSections(unparsedSectionResult(S:UnparsedSection, BWI:BytesWithIndex))
=> concatOrError(S, splitSections(BWI))
rule #splitSections(E:ParseError) => E

rule concatOrError(S:UnparsedSection, unparsedSectionsResult(Ss:UnparsedSections, BWI:BytesWithIndex))
=> unparsedSectionsResult(S : Ss, BWI)
rule concatOrError(_:UnparsedSection, E:ParseError) => E

endmodule
```
Loading
Loading