diff --git a/src/charparse.sig b/src/charparse.sig index c9607e5..247f66b 100644 --- a/src/charparse.sig +++ b/src/charparse.sig @@ -3,100 +3,99 @@ signature CHAR_PARSER = sig - (* type synonym for Parsing.parser working on character streams *) - type 'a charParser = ('a, char) ParserCombinators.parser - type message = char ParserCombinators.message - - (* (oneOf cs) succeeds if the current character is in the supplied list of - characters cs. Returns the parsed character. See also satisfy. - - vowel = oneOf "aeiou" - *) - val oneOf : char list -> char charParser - - (* As the dual of oneOf, (noneOf cs) succeeds if the current character not - in the supplied list of characters cs. Returns the parsed character. - - consonant = noneOf "aeiou" - *) - val noneOf : char list -> char charParser - - (* (char c) parses a single character c. Returns the parsed character - (i.e. c). - - semiColon = char #";" - *) - val char : char -> char charParser - - (* (string s) parses a sequence of characters given by s. Returns the - parsed string (i.e. s). - - divOrMod = string "div" - || string "mod" - *) - val string : string -> string charParser - - (* This parser succeeds for any character. Returns the parsed character. *) - val anyChar : char charParser - - (* Parses an upper case letter (a character between #"A" and #"Z"). Returns - the parsed character. - *) - val upper : char charParser - - (* Parses a lower case character (a character between #"a" and #"z"). - Returns the parsed character. - *) - val lower : char charParser - - (* Parses a letter (an upper case or lower case character). Returns the - parsed character. - *) - val letter : char charParser - - (* Parses a letter or digit. Returns the parsed character. *) - val alphaNum : char charParser - (* Parses a digit (a character between '0' and '9'). Returns the parsed - character. - *) - val digit : char charParser - - (* Parses a hexadecimal digit (a digit or a letter between 'a' and 'f' or - 'A' and 'F'). Returns the parsed character. - *) - val hexDigit : char charParser - - (* Parses an octal digit (a character between '0' and '7'). Returns the - parsed character. - *) - val octDigit : char charParser - - (* Parses a newline character (#"\n"). Returns a newline character. *) - val newLine : char charParser - - (* Parses a tab character (#"\t"). Returns a newline character. *) - val tab : char charParser - - (* Parses a white space character (any character in " \v\f\t\r\n"). Returns - the parsed character. - *) - val space : char charParser - - (* Skips zero or more white space characters. See also repeati - *) - val spaces : unit charParser - - (* The parser (satisfy f) succeeds for any character for which the supplied - function f returns true. Returns the character that is actually parsed. - *) - val satisfy : (char -> bool) -> char charParser - - (* Formatter for messages over char streams *) - val messageToString : message -> string - - (* Parse function that uses the default formatter for char streams *) - val parseChars : 'a charParser -> (char * Coord.t) Stream.stream -> - (string, 'a) Sum.sum - val parseString : 'a charParser -> string -> (string, 'a) Sum.sum + (* type synonym for Parsing.parser working on character streams *) + type 'a charParser = ('a, char) ParserCombinators.parser + type message = char ParserCombinators.message + + (* (oneOf cs) succeeds if the current character is in the supplied list of + characters cs. Returns the parsed character. See also satisfy. + + vowel = oneOf "aeiou" + *) + val oneOf : char list -> char charParser + + (* As the dual of oneOf, (noneOf cs) succeeds if the current character not + in the supplied list of characters cs. Returns the parsed character. + + consonant = noneOf "aeiou" + *) + val noneOf : char list -> char charParser + + (* (char c) parses a single character c. Returns the parsed character + (i.e. c). + + semiColon = char #";" + *) + val char : char -> char charParser + + (* (string s) parses a sequence of characters given by s. Returns the + parsed string (i.e. s). + + divOrMod = string "div" || string "mod" + *) + val string : string -> string charParser + + (* This parser succeeds for any character. Returns the parsed character. *) + val anyChar : char charParser + + (* Parses an upper case letter (a character between #"A" and #"Z"). Returns + the parsed character. + *) + val upper : char charParser + + (* Parses a lower case character (a character between #"a" and #"z"). + Returns the parsed character. + *) + val lower : char charParser + + (* Parses a letter (an upper case or lower case character). Returns the + parsed character. + *) + val letter : char charParser + + (* Parses a letter or digit. Returns the parsed character. *) + val alphaNum : char charParser + + (* Parses a digit (a character between '0' and '9'). Returns the parsed + character. + *) + val digit : char charParser + + (* Parses a hexadecimal digit (a digit or a letter between 'a' and 'f' or + 'A' and 'F'). Returns the parsed character. + *) + val hexDigit : char charParser + + (* Parses an octal digit (a character between '0' and '7'). Returns the + parsed character. + *) + val octDigit : char charParser + + (* Parses a newline character (#"\n"). Returns a newline character. *) + val newLine : char charParser + + (* Parses a tab character (#"\t"). Returns a newline character. *) + val tab : char charParser + + (* Parses a white space character (any character in " \v\f\t\r\n"). Returns + the parsed character. + *) + val space : char charParser + + (* Skips zero or more white space characters. See also repeati + *) + val spaces : unit charParser + + (* The parser (satisfy f) succeeds for any character for which the supplied + function f returns true. Returns the character that is actually parsed. + *) + val satisfy : (char -> bool) -> char charParser + + (* Formatter for messages over char streams *) + val messageToString : message -> string + + (* Parse function that uses the default formatter for char streams *) + val parseChars : 'a charParser -> (char * Coord.t) Stream.stream -> (string, 'a) Sum.sum + val parseString : 'a charParser -> string -> (string, 'a) Sum.sum end diff --git a/src/charparse.sml b/src/charparse.sml index 43622b0..dace3e3 100644 --- a/src/charparse.sml +++ b/src/charparse.sml @@ -3,49 +3,57 @@ structure CharParser :> CHAR_PARSER = struct - open ParserCombinators - infixr 4 << >> - infixr 1 ?? - - type 'a charParser = ('a, char) parser - type message = char message - - fun oneOf xs = try (satisfy (fn x => List.exists (fn y => x = y) xs)) - fun noneOf xs = try (satisfy (fn x => List.all (fn y => x <> y) xs)) - fun char x = try (satisfy (fn y => x = y)) ?? "'" ^ str x ^ "'" - fun string s = - let fun string_aux xs = case xs of - nil => succeed s - | (x :: xs') => char x >> string_aux xs' - in string_aux (String.explode s) end - - val anyChar = any - val upper = try (satisfy Char.isUpper) ?? "upper case letter" - val lower = try (satisfy Char.isLower) ?? "lower case letter" - val letter = try (satisfy Char.isAlpha) ?? "letter" - val alphaNum = try (satisfy Char.isAlphaNum) ?? "alphanumeric character" - val digit = try (satisfy Char.isDigit) ?? "digit" - val hexDigit = try (satisfy Char.isHexDigit) ?? "hexadecimal digit" - val octDigit = try (satisfy (fn x => Char.isDigit x - andalso Char.<= (x, #"7"))) ?? "octal digit" - val newLine = char #"\n" ?? "'\n'" - val tab = char #"\t" ?? "'\t'" - val space = try (satisfy Char.isSpace) - val spaces = repeatSkip space - val satisfy = satisfy - - fun messageToString m = - case m of - Unexpected (SOME t) => "unexpected '" ^ str t ^ "'" - | Unexpected NONE => "unexpected end of stream" - | Expected s => s - | Message m => m - - fun parseChars p = parse messageToString p - fun parseString p s = - let val s = CoordinatedStream.coordinate (fn x => Stream.hd x = #"\n" handle Stream.Empty => false) (Coord.init "-") - (Stream.fromString s) - in parseChars p s - end + open ParserCombinators + infixr 4 << >> + infixr 1 ?? + + type 'a charParser = ('a, char) parser + type message = char message + + fun oneOf xs = try (satisfy (fn x => List.exists (fn y => x = y) xs)) + fun noneOf xs = try (satisfy (fn x => List.all (fn y => x <> y) xs)) + fun char x = try (satisfy (fn y => x = y)) ?? "'" ^ str x ^ "'" + fun string s = + let + fun string_aux xs = + case xs of + [] => succeed s + | (x :: xs') => char x >> string_aux xs' + in + string_aux (String.explode s) + end + + val anyChar = any + val upper = try (satisfy Char.isUpper) ?? "upper case letter" + val lower = try (satisfy Char.isLower) ?? "lower case letter" + val letter = try (satisfy Char.isAlpha) ?? "letter" + val alphaNum = try (satisfy Char.isAlphaNum) ?? "alphanumeric character" + val digit = try (satisfy Char.isDigit) ?? "digit" + val hexDigit = try (satisfy Char.isHexDigit) ?? "hexadecimal digit" + val octDigit = try (satisfy (fn x => Char.isDigit x andalso Char.<= (x, #"7"))) ?? "octal digit" + val newLine = char #"\n" ?? "'\n'" + val tab = char #"\t" ?? "'\t'" + val space = try (satisfy Char.isSpace) + val spaces = repeatSkip space + val satisfy = satisfy + + fun messageToString m = + case m of + Unexpected (SOME t) => "unexpected '" ^ str t ^ "'" + | Unexpected NONE => "unexpected end of stream" + | Expected s => s + | Message m => m + + fun parseChars p = parse messageToString p + fun parseString p s = + let + val s = + CoordinatedStream.coordinate + (fn x => Stream.hd x = #"\n" handle Stream.Empty => false) + (Coord.init "-") + (Stream.fromString s) + in + parseChars p s + end end diff --git a/src/langparse.sml b/src/langparse.sml index 503f5d6..cd6456d 100644 --- a/src/langparse.sml +++ b/src/langparse.sml @@ -3,79 +3,79 @@ functor SimpleStyle (Def : MINI_LANGUAGE_DEF) :> LANGUAGE_DEF = struct - open ParserCombinators - open CharParser - infixr 1 <|> + open ParserCombinators + open CharParser + infixr 1 <|> - type scanner = char CharParser.charParser + type scanner = char CharParser.charParser - val commentStart = NONE - val commentEnd = NONE - val commentLine = NONE - val nestedComments = false + val commentStart = NONE + val commentEnd = NONE + val commentLine = NONE + val nestedComments = false - val identStart = letter <|> char #"_" - val identLetter = alphaNum <|> oneOf (String.explode "_'") - val opLetter = oneOf (String.explode ":!#$%&*+./< =>?@\\^|-~") - val opStart = opLetter + val identStart = letter <|> char #"_" + val identLetter = alphaNum <|> oneOf (String.explode "_'") + val opLetter = oneOf (String.explode ":!#$%&*+./< =>?@\\^|-~") + val opStart = opLetter - val reservedNames = Def.reservedNames - val reservedOpNames = Def.reservedOpNames + val reservedNames = Def.reservedNames + val reservedOpNames = Def.reservedOpNames - val caseSensitive = true + val caseSensitive = true end functor JavaStyle (Def : MINI_LANGUAGE_DEF) :> LANGUAGE_DEF = struct - open ParserCombinators - open CharParser - infixr 1 <|> + open ParserCombinators + open CharParser + infixr 1 <|> - type scanner = char CharParser.charParser + type scanner = char CharParser.charParser - val commentStart = SOME "/*" - val commentEnd = SOME "*/" - val commentLine = SOME "//" - val nestedComments = true + val commentStart = SOME "/*" + val commentEnd = SOME "*/" + val commentLine = SOME "//" + val nestedComments = true - val identStart = letter - val identLetter = alphaNum <|> oneOf (String.explode "_'") - val opLetter = oneOf (String.explode ":!#$%&*+./< =>?@\\^|-~") - val opStart = opLetter + val identStart = letter + val identLetter = alphaNum <|> oneOf (String.explode "_'") + val opLetter = oneOf (String.explode ":!#$%&*+./< =>?@\\^|-~") + val opStart = opLetter - val reservedNames = Def.reservedNames - val reservedOpNames = Def.reservedOpNames + val reservedNames = Def.reservedNames + val reservedOpNames = Def.reservedOpNames - val caseSensitive = false + val caseSensitive = false end functor MLStyle (Def : MINI_LANGUAGE_DEF) :> LANGUAGE_DEF = struct - open ParserCombinators - open CharParser - infixr 1 <|> + open ParserCombinators + open CharParser + infixr 1 <|> - type scanner = char CharParser.charParser + type scanner = char CharParser.charParser - val commentStart = SOME "(*" - val commentEnd = SOME "*)" - val commentLine = NONE - val nestedComments = true + val commentStart = SOME "(*" + val commentEnd = SOME "*)" + val commentLine = NONE + val nestedComments = true - val identStart = letter - val identLetter = alphaNum <|> oneOf (String.explode "_'") + val identStart = letter + val identLetter = alphaNum <|> oneOf (String.explode "_'") - (* did I miss anything? add to much? *) - val opLetter = oneOf (String.explode ":!#$%&*+./<=>?@\\^|-~") - val opStart = opLetter + (* did I miss anything? add to much? *) + val opLetter = oneOf (String.explode ":!#$%&*+./<=>?@\\^|-~") + val opStart = opLetter - val reservedNames = Def.reservedNames - val reservedOpNames = Def.reservedOpNames + val reservedNames = Def.reservedNames + val reservedOpNames = Def.reservedOpNames - val caseSensitive = true + val caseSensitive = true end diff --git a/src/parse.sig b/src/parse.sig index 227f97b..81e1c80 100644 --- a/src/parse.sig +++ b/src/parse.sig @@ -3,71 +3,70 @@ signature BASIC_PARSER = sig - (* type for error messages *) - datatype 't message = Unexpected of 't option | Expected of string - | Message of string - (* Parser with token type 't, result type 'a *) - type ('a, 't) parser - - (* succeed with given value *) - val succeed : 'a -> ('a, 't) parser - (* fail immediately *) - val fail : string -> ('a, 't) parser - - (* check for end of input *) - val eos : (unit, 't) parser - (* admit anything, provided there's something on the input *) - val any : ('t, 't) parser - - (* sequential successful composition of parsers *) - val -- : ('a, 't) parser * ('a -> ('b, 't) parser) -> ('b, 't) parser - (* sequential failing composition of parsers *) - (* val ## : ('a, 't) parser * (Pos.t -> ('a, 't) parser) -> ('a, 't) parser*) - (* fail-fast composition of parsers *) - val <|> : ('a, 't) parser * ('a, 't) parser -> ('a, 't) parser - (* error reporting combinator *) - val ?? : ('a, 't) parser * string -> ('a, 't) parser - - (* doesn't consume input if fails *) - val try : ('a, 't) parser -> ('a, 't) parser - - (* grab position *) - val !! : ('a, 't) parser -> ('a * Pos.t, 't) parser - - (* get position *) - (*val get : (Pos.t -> ('a, 't) parser) -> ('a, 't) parser*) - - (* to handle mutually-recursive parsers *) - val $ : (unit -> ('a, 't) parser) -> ('a, 't) parser - - (* to construct a recursive parser *) - val fix : (('a, 't) parser -> ('a, 't) parser) -> ('a, 't) parser - - (* re-parse same input, given result of first parse *) - (*val lookahead : ('a, 't) parser -> ('a -> ('b, 't) parser) -> - ('b, 't) parser*) - - (* parse this stream before reading any other input *) - (*val push : ('t * Pos.t) Stream.stream -> - ('a, 't) parser -> ('a, 't) parser *) - - (* parse a stream *) - val runParser : ('a, 't) parser -> ('t * Coord.t) Stream.stream -> - (Pos.t * 't message list, 'a) Sum.sum - val parse : ('t message -> string) -> ('a, 't) parser -> - ('t * Coord.t) Stream.stream -> (string, 'a) Sum.sum - val simpleParse : ('a, 't) parser -> ('t * Coord.t) Stream.stream -> - (string, 'a) Sum.sum - - (* default message printer *) - val messageToString : 't message -> string - - (* transform p s - - parses consecutive maximal prefixes of s with p as many times - as possible, outputting the results as a stream *) - val transform : ('a, 't) parser -> ('t * Coord.t) Stream.stream -> - 'a Stream.stream + (* type for error messages *) + datatype 't message = + Unexpected of 't option + | Expected of string + | Message of string + + (* Parser with token type 't, result type 'a *) + type ('a, 't) parser + + (* succeed with given value *) + val succeed : 'a -> ('a, 't) parser + (* fail immediately *) + val fail : string -> ('a, 't) parser + + (* check for end of input *) + val eos : (unit, 't) parser + (* admit anything, provided there's something on the input *) + val any : ('t, 't) parser + + (* sequential successful composition of parsers *) + val -- : ('a, 't) parser * ('a -> ('b, 't) parser) -> ('b, 't) parser + (* sequential failing composition of parsers *) + (* val ## : ('a, 't) parser * (Pos.t -> ('a, 't) parser) -> ('a, 't) parser*) + (* fail-fast composition of parsers *) + val <|> : ('a, 't) parser * ('a, 't) parser -> ('a, 't) parser + (* error reporting combinator *) + val ?? : ('a, 't) parser * string -> ('a, 't) parser + + (* doesn't consume input if fails *) + val try : ('a, 't) parser -> ('a, 't) parser + + (* grab position *) + val !! : ('a, 't) parser -> ('a * Pos.t, 't) parser + + (* get position *) + (*val get : (Pos.t -> ('a, 't) parser) -> ('a, 't) parser*) + + (* to handle mutually-recursive parsers *) + val $ : (unit -> ('a, 't) parser) -> ('a, 't) parser + + (* to construct a recursive parser *) + val fix : (('a, 't) parser -> ('a, 't) parser) -> ('a, 't) parser + + (* re-parse same input, given result of first parse *) + (*val lookahead : ('a, 't) parser -> ('a -> ('b, 't) parser) -> + ('b, 't) parser*) + + (* parse this stream before reading any other input *) + (*val push : ('t * Pos.t) Stream.stream -> + ('a, 't) parser -> ('a, 't) parser *) + + (* parse a stream *) + val runParser : ('a, 't) parser -> ('t * Coord.t) Stream.stream -> (Pos.t * 't message list, 'a) Sum.sum + val parse : ('t message -> string) -> ('a, 't) parser -> ('t * Coord.t) Stream.stream -> (string, 'a) Sum.sum + val simpleParse : ('a, 't) parser -> ('t * Coord.t) Stream.stream -> (string, 'a) Sum.sum + + (* default message printer *) + val messageToString : 't message -> string + + (* transform p s + + parses consecutive maximal prefixes of s with p as many times + as possible, outputting the results as a stream *) + val transform : ('a, 't) parser -> ('t * Coord.t) Stream.stream -> 'a Stream.stream end @@ -85,101 +84,96 @@ sig *) (* sequential composition *) - val && : ('a, 't) parser * ('b, 't) parser -> ('a * 'b, 't) parser + val && : ('a, 't) parser * ('b, 't) parser -> ('a * 'b, 't) parser (* alternation *) - val || : ('a, 't) parser * ('a, 't) parser -> ('a, 't) parser + val || : ('a, 't) parser * ('a, 't) parser -> ('a, 't) parser (* apply function to success value *) - val wth : ('a, 't) parser * ('a -> 'b) -> ('b, 't) parser + val wth : ('a, 't) parser * ('a -> 'b) -> ('b, 't) parser (* succeed only if check on successful is true *) val suchthat : ('a, 't) parser * ('a -> bool) -> ('a, 't) parser (* specify success value *) - val return : ('b, 't) parser * 'a -> ('a, 't) parser + val return : ('b, 't) parser * 'a -> ('a, 't) parser (* end of stream with specific result *) val done : 'a -> ('a, 't) parser (* n-ary sequential composition *) - val seq : ('a, 't) parser list -> ('a list, 't) parser + val seq : ('a, 't) parser list -> ('a list, 't) parser (* n-ary alternation *) - val alt : ('a, 't) parser list -> ('a, 't) parser + val alt : ('a, 't) parser list -> ('a, 't) parser (* ensure that next token satisfies condition, yielding that token *) - val satisfy : ('t -> bool) -> ('t, 't) parser + val satisfy : ('t -> bool) -> ('t, 't) parser (* succeed only if function returns SOME a *) - val maybe : ('t -> 'a option) -> ('a, 't) parser + val maybe : ('t -> 'a option) -> ('a, 't) parser (* succeed with mapped result if SOME, otherwise fail. *) - val when : ('a, 't) parser * ('a -> 'b option) -> ('b, 't) parser + val when : ('a, 't) parser * ('a -> 'b option) -> ('b, 't) parser (* XXX these require equality on tokens; yech! *) (* check for a given token *) - val literal : ''t -> (''t, ''t) parser + val literal : ''t -> (''t, ''t) parser (* check for a given list of tokens *) - val string : ''t list -> (''t list, ''t) parser + val string : ''t list -> (''t list, ''t) parser (* check for one of a list of tokens *) - val oneof : ''t list -> (''t, ''t) parser + val oneof : ''t list -> (''t, ''t) parser (* optional parse, yielding an optional result *) - val opt : ('a, 't) parser -> ('a option, 't) parser + val opt : ('a, 't) parser -> ('a option, 't) parser (* optional parse, with given action on success *) val optional : ('a -> 'b) -> 'b -> ('a, 't) parser -> ('b, 't) parser (* zero or more copies *) - val repeat : ('a, 't) parser -> ('a list, 't) parser + val repeat : ('a, 't) parser -> ('a list, 't) parser (* one or more *) - val repeat1 : ('a, 't) parser -> ('a list, 't) parser + val repeat1 : ('a, 't) parser -> ('a list, 't) parser (* exact number *) - val repeatn : int -> ('a, 't) parser -> ('a list, 't) parser + val repeatn : int -> ('a, 't) parser -> ('a list, 't) parser (* skip zero or more copies *) - val repeatSkip : ('a, 't) parser -> (unit, 't) parser + val repeatSkip : ('a, 't) parser -> (unit, 't) parser (* skip one or more copies *) val repeatSkip1 : ('a, 't) parser -> (unit, 't) parser (* parse two things, yielding value of first *) - val first : ('a, 't) parser -> ('b, 't) parser -> ('a, 't) parser - val << : ('a, 't) parser * ('b, 't) parser -> ('a, 't) parser + val first : ('a, 't) parser -> ('b, 't) parser -> ('a, 't) parser + val << : ('a, 't) parser * ('b, 't) parser -> ('a, 't) parser (* ... second *) - val second : ('a, 't) parser -> ('b, 't) parser -> ('b, 't) parser - val >> : ('a, 't) parser * ('b, 't) parser -> ('b, 't) parser + val second : ('a, 't) parser -> ('b, 't) parser -> ('b, 't) parser + val >> : ('a, 't) parser * ('b, 't) parser -> ('b, 't) parser (* .... middle of three *) - val middle : ('a, 't) parser -> ('b, 't) parser -> ('c, 't) parser - -> ('b, 't) parser + val middle : ('a, 't) parser -> ('b, 't) parser -> ('c, 't) parser -> ('b, 't) parser (* parse one or more, with given separator between items *) - val separate1: ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser + val separate1 : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser (* ... zero or more *) - val separate : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser + val separate : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser (* one or more, obligatory trailing separator *) - val sepEnd1 : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser + val sepEnd1 : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser (* zero or more, obligatory trailing separator *) - val sepEnd : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser + val sepEnd : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser (* one or more, allowing trailing separator *) - val sepEnd1' : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser + val sepEnd1' : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser (* zero or more, allowing trailing separator *) - val sepEnd' : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser + val sepEnd' : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser (* parse with the first parser until the other parser succeeds *) - val until : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser + val until : ('a, 't) parser -> ('b, 't) parser -> ('a list, 't) parser (* nested parsers *) - val join : (('a, 't) parser, 't) parser -> ('a, 't) parser + val join : (('a, 't) parser, 't) parser -> ('a, 't) parser (* chaining of parsers *) - val chainr : ('a, 't) parser -> ('a * 'a -> 'a, 't) parser -> - 'a -> ('a, 't) parser - val chainr1 : ('a, 't) parser -> ('a * 'a -> 'a, 't) parser -> - ('a, 't) parser - val chainl : ('a, 't) parser -> ('a * 'a -> 'a, 't) parser -> - 'a -> ('a, 't) parser - val chainl1 : ('a, 't) parser -> ('a * 'a -> 'a, 't) parser -> - ('a, 't) parser + val chainr : ('a, 't) parser -> ('a * 'a -> 'a, 't) parser -> 'a -> ('a, 't) parser + val chainr1 : ('a, 't) parser -> ('a * 'a -> 'a, 't) parser -> ('a, 't) parser + val chainl : ('a, 't) parser -> ('a * 'a -> 'a, 't) parser -> 'a -> ('a, 't) parser + val chainl1 : ('a, 't) parser -> ('a * 'a -> 'a, 't) parser -> ('a, 't) parser (* succeeds without consuming anything if the given parser fails *) - val not : ('a, 't) parser -> (unit, 't) parser + val not : ('a, 't) parser -> (unit, 't) parser (***** Pre/In/Post-fix utilities *****) @@ -198,10 +192,9 @@ sig (* Same, but also look for adjacent tokens, combining them with the supplied function and associativity. *) - val parsefixityadj : ('a fixityitem, 't) parser -> - associativity -> ('a * 'a -> 'a) -> ('a, 't) parser + val parsefixityadj : ('a fixityitem, 't) parser -> associativity -> ('a * 'a -> 'a) -> ('a, 't) parser - (* Utilities for manipulating intermediate results, + (* Utilities for manipulating intermediate results, ie. (IF >> $exp) && (THEN >> $exp) && (ELSE >> $exp) wth If o flat3 *) diff --git a/src/parse.sml b/src/parse.sml index a6027d7..b694fef 100644 --- a/src/parse.sml +++ b/src/parse.sml @@ -9,10 +9,12 @@ struct type 't stream = ('t * coord) Stream.stream open Sum - datatype 't message = Unexpected of 't option | Expected of string - | Message of string - type ('a, 't) parser = bool ref -> coord * 't stream -> - (pos * 't message list, 'a * pos * coord * 't stream) sum + datatype 't message = + Unexpected of 't option + | Expected of string + | Message of string + + type ('a, 't) parser = bool ref -> coord * 't stream -> (pos * 't message list, 'a * pos * coord * 't stream) sum infix 2 -- ## infixr 1 <|> ?? @@ -24,50 +26,50 @@ struct fun eos _ (c, ts) = case Stream.front ts of - Stream.Nil => INR ((), Pos.pos c c, c, ts) - | Stream.Cons ((x, c), _) => INL (Pos.pos c c, [Unexpected (SOME x)]) + Stream.Nil => INR ((), Pos.pos c c, c, ts) + | Stream.Cons ((x, c), _) => INL (Pos.pos c c, [Unexpected (SOME x)]) fun any b (c, ts) = - case Stream.front ts of - Stream.Nil => INL (Pos.pos c c, [Unexpected NONE]) - | Stream.Cons ((x, c'), ts) => (b := true; INR (x, Pos.pos c c', c', ts)) + case Stream.front ts of + Stream.Nil => INL (Pos.pos c c, [Unexpected NONE]) + | Stream.Cons ((x, c'), ts) => (b := true; INR (x, Pos.pos c c', c', ts)) fun (p -- q) b (c, ts) = - bindR (p b (c, ts)) - (fn (x, posx, c, ts) => - let val nb = ref false - in map (fn (posy, ms) => (b := (!b orelse !nb); ((*Pos.union posx*) posy, ms))) - (fn (y, posy, c, ts) => (y, Pos.union posx posy, c, ts)) - (q x nb (c, ts)) - end) + bindR (p b (c, ts)) + (fn (x, posx, c, ts) => + let val nb = ref false + in map (fn (posy, ms) => (b := (!b orelse !nb); ((*Pos.union posx*) posy, ms))) + (fn (y, posy, c, ts) => (y, Pos.union posx posy, c, ts)) + (q x nb (c, ts)) + end) fun (p ## q) b (c, ts) = - case p b (c, ts) of - INL (pf, errs) => q pf b (c, ts) - | INR x => INR x + case p b (c, ts) of + INL (pf, errs) => q pf b (c, ts) + | INR x => INR x fun (p <|> q) b (c, ts) = - bindL (p b (c, ts)) (fn e => if !b then INL e else q b (c, ts)) + bindL (p b (c, ts)) (fn e => if !b then INL e else q b (c, ts)) fun try p b (c, ts) = - mapL (fn e => (b := false; e)) (p b (c, ts)) + mapL (fn e => (b := false; e)) (p b (c, ts)) fun (p ?? s) b (c, ts) = - mapL (fn (pos, errs) => (pos, errs @ [Expected s])) (p b (c, ts)) + mapL (fn (pos, errs) => (pos, errs @ [Expected s])) (p b (c, ts)) fun lookahead p q b (c, ts) = - bindR (p b (c, ts)) (fn (x, _, _, _) => q x b (c, ts)) + bindR (p b (c, ts)) (fn (x, _, _, _) => q x b (c, ts)) fun !! p b (c, ts) = - mapR (fn (x, posx, c, ts) => ((x, posx), posx, c, ts)) (p b (c, ts)) + mapR (fn (x, posx, c, ts) => ((x, posx), posx, c, ts)) (p b (c, ts)) fun get f b (c, ts) = f (Pos.pos c c) b (c, ts) - + fun $ p b (c, ts) = p () b (c, ts) fun fix f b (c, ts) = f (fix f) b (c, ts) - val initc = Coord.init "-" + val initc = Coord.init "-" fun runParser (p : ('a, 't) parser) ts = mapR #1 (p (ref false) (initc, ts)) @@ -77,44 +79,46 @@ struct (* fun push ns p (pos, ts) = p (initpos, Stream.append ns ts)*) fun messageToString m = - case m of - Unexpected (SOME t) => "unexpected token" - | Unexpected NONE => "unexpected end of stream" - | Expected s => s - | Message m => m + case m of + Unexpected (SOME t) => "unexpected token" + | Unexpected NONE => "unexpected end of stream" + | Expected s => s + | Message m => m fun printError fmt (p, msgs) = - let fun unex msgs = - case List.filter (fn Unexpected _ => true | _ => false) msgs of - x :: _ => fmt x ^ ". " - | _ => "" - fun exps xs = case xs of - [] => "" (* impossible case *) - | [x] => " or " ^ fmt x ^ ". " - | x :: xs => ", " ^ fmt x ^ exps xs - fun exp msgs = - case List.filter (fn Expected _ => true | _ => false) msgs of - [] => "" - | [x] => "Expected " ^ fmt x ^ ". " - | x :: xs => "Expected " ^ fmt x ^ exps xs - fun msg msgs = (String.concatWith ". " o List.map (fn Message m => m)) - (List.filter (fn Message _ => true | _ => false) msgs) - in "Parse error at " ^ Pos.toString p ^ ": " ^ - unex msgs ^ exp msgs ^ msg msgs ^ "\n" - end + let + fun unex msgs = + case List.filter (fn Unexpected _ => true | _ => false) msgs of + x :: _ => fmt x ^ ". " + | _ => "" + fun exps xs = + case xs of + [] => "" (* impossible case *) + | [x] => " or " ^ fmt x ^ ". " + | x :: xs => ", " ^ fmt x ^ exps xs + fun exp msgs = + case List.filter (fn Expected _ => true | _ => false) msgs of + [] => "" + | [x] => "Expected " ^ fmt x ^ ". " + | x :: xs => "Expected " ^ fmt x ^ exps xs + fun msg msgs = + (String.concatWith ". " o List.map (fn Message m => m)) + (List.filter (fn Message _ => true | _ => false) msgs) + in + "Parse error at " ^ Pos.toString p ^ ": " ^ unex msgs ^ exp msgs ^ msg msgs ^ "\n" + end fun parse fmt p = mapL (printError fmt) o runParser p fun simpleParse p = parse messageToString p fun transform p ts = let - fun trans (pos, ts) () = - case p (ref false) (pos, ts) of - INR (x, _, pos', ts') => - Stream.Cons (x, Stream.lazy (trans (pos', ts'))) - | INL _ => Stream.Nil + fun trans (pos, ts) () = + case p (ref false) (pos, ts) of + INR (x, _, pos', ts') => Stream.Cons (x, Stream.lazy (trans (pos', ts'))) + | INL _ => Stream.Nil in - Stream.lazy (trans (initc, ts)) + Stream.lazy (trans (initc, ts)) end end @@ -139,9 +143,9 @@ struct fun p wth f = p -- succeed o f fun p suchthat g = - p -- (fn x => if g x then succeed x else fail "") - fun p when f = - p -- (fn x => case f x of SOME r => succeed r | NONE => fail "") + p -- (fn x => if g x then succeed x else fail "") + fun p when f = + p -- (fn x => case f x of SOME r => succeed r | NONE => fail "") fun p return x = p -- (fn _ => succeed x) fun seq ps = foldr (fn (ph, pt) => ph && pt wth op::) (succeed []) ps @@ -160,8 +164,8 @@ struct fun optional f x p = p wth f || succeed x - fun first p q = p -- (fn x => q return x) - fun second p q = p -- (fn _ => q) + fun first p q = p -- (fn x => q return x) + fun second p q = p -- (fn _ => q) fun middle p q r = p -- (fn _ => q -- (fn x => r return x)) fun (p << q) = first p q @@ -185,27 +189,34 @@ struct fun repeatSkip1 p = p >> repeatSkip p fun separate1 p q = p && repeat (second q p) wth op:: - fun separate p q = separate1 p q || succeed [] - fun sepEnd' p q = first (separate p q) (opt q) - fun sepEnd1' p q = separate1 p q << opt q - fun sepEnd p q = repeat (p << q) - fun sepEnd1 p q = repeat1 (p << q) + fun separate p q = separate1 p q || succeed [] + fun sepEnd' p q = first (separate p q) (opt q) + fun sepEnd1' p q = separate1 p q << opt q + fun sepEnd p q = repeat (p << q) + fun sepEnd1 p q = repeat1 (p << q) fun join p = p -- (fn q => q) - fun until p q = - let fun aux _ = (q return []) <|> p >> $ aux - in $ aux end + fun until p q = + let + fun aux _ = (q return []) <|> p >> $ aux + in + $ aux + end (* chaining of parsers *) fun chainr1 p opp = - p -- (fn v => (opp && chainr1 p opp wth (fn (f, v') => f (v, v'))) - <|> succeed v) - fun chainr p opp d = chainr1 p opp <|> succeed d + p -- (fn v => (opp && chainr1 p opp wth (fn (f, v') => f (v, v'))) <|> succeed v) + + fun chainr p opp d = + chainr1 p opp <|> succeed d + fun chainl1 p opp = - p && (repeat (opp && p)) wth - (fn (v, ts) => List.foldl (fn ((f, vr), vl) => f (vl, vr)) v ts) - fun chainl p opp d = chainl1 p opp <|> succeed d + p && (repeat (opp && p)) wth + (fn (v, ts) => List.foldl (fn ((f, vr), vl) => f (vl, vr)) v ts) + + fun chainl p opp d = + chainl1 p opp <|> succeed d fun not p = ((try p) >> fail "unexpected token") <|> succeed () @@ -231,84 +242,96 @@ struct | prec (Postfix(n, _)) = n fun resolvefixity ys = - let fun resolve (xs, c as Atm _, ys) = - next (c::xs, ys) - | resolve (xs, c as Opr(Prefix _), ys) = - next (c::xs, ys) - | resolve (x::[], c as Opr(Infix _), ys) = - next (c::x::[], ys) - | resolve (x::(c' as Opr(f'))::xs, c as Opr(f as Infix _), ys) = - if prec(f) > prec(f') then next (c::x::c'::xs, ys) - else if prec(f') > prec(f) then reduce (x::c'::xs, c::ys) - else (case (assoc(f'), assoc(f)) - of (Left, Left) => reduce (x::c'::xs, c::ys) - | (Right, Right) => next (c::x::c'::xs, ys) - | _ => fail "Operator ambiguous") - | resolve(x::[], c as Opr(Postfix _), ys) = - reduce (c::x::[], ys) - | resolve (x::(c' as Opr(f'))::xs, - c as Opr(f as Postfix _), ys) = - if prec(f) > prec(f') then reduce (c::x::c'::xs, ys) - else if prec(f') > prec(f) then reduce (x::c'::xs, c::ys) - else fail "Operator ambiguous" - | resolve _ = fail "Atom/operator mismatch" - - and reduce (Atm(a)::Opr(Prefix(_, cprefix))::xs, ys) = - next(Atm(cprefix(a))::xs, ys) - | reduce (Atm(a)::Opr(Infix(_, _, cinfix))::Atm(a')::xs, ys) = - next(Atm(cinfix(a', a))::xs, ys) - | reduce (Opr(Postfix(_, cpostfix))::Atm(a)::xs, ys) = - next(Atm(cpostfix(a))::xs, ys) - | reduce _ = fail "Atom/operator mismatch" - - and next (Atm(a)::[], []) = succeed a - | next (xs, []) = reduce (xs, []) - | next (xs, y::ys) = resolve (xs, y, ys) - - in next ([], ys) end + let + fun resolve (xs, c as Atm _, ys) = + next (c::xs, ys) + | resolve (xs, c as Opr(Prefix _), ys) = + next (c::xs, ys) + | resolve (x::[], c as Opr(Infix _), ys) = + next (c::x::[], ys) + | resolve (x::(c' as Opr(f'))::xs, c as Opr(f as Infix _), ys) = + if prec(f) > prec(f') then + next (c::x::c'::xs, ys) + else if prec(f') > prec(f) then + reduce (x::c'::xs, c::ys) + else + (case (assoc(f'), assoc(f)) of + (Left, Left) => reduce (x::c'::xs, c::ys) + | (Right, Right) => next (c::x::c'::xs, ys) + | _ => fail "Operator ambiguous") + | resolve(x::[], c as Opr(Postfix _), ys) = + reduce (c::x::[], ys) + | resolve (x::(c' as Opr(f'))::xs, c as Opr(f as Postfix _), ys) = + if prec(f) > prec(f') then + reduce (c::x::c'::xs, ys) + else if prec(f') > prec(f) then + reduce (x::c'::xs, c::ys) + else + fail "Operator ambiguous" + | resolve _ = fail "Atom/operator mismatch" + + and reduce (Atm(a)::Opr(Prefix(_, cprefix))::xs, ys) = + next(Atm(cprefix(a))::xs, ys) + | reduce (Atm(a)::Opr(Infix(_, _, cinfix))::Atm(a')::xs, ys) = + next(Atm(cinfix(a', a))::xs, ys) + | reduce (Opr(Postfix(_, cpostfix))::Atm(a)::xs, ys) = + next(Atm(cpostfix(a))::xs, ys) + | reduce _ = fail "Atom/operator mismatch" + + and next (Atm(a)::[], []) = succeed a + | next (xs, []) = reduce (xs, []) + | next (xs, y::ys) = resolve (xs, y, ys) + + in + next ([], ys) + end fun resolvefixityadj cadj cassoc ys = - let fun resolve (Atm(a)::xs, Atm(a'), ys) = - - (* treat adjacent tokens as if they have an infix operator - of high precedence between them -- Tom *) - resolve (Atm(a)::xs, Opr(Infix(cassoc, 999, cadj)), - Atm(a')::ys) - | resolve (xs, Atm(a), ys) = - next (Atm(a)::xs, ys) - | resolve (xs, c as Opr(Prefix _), ys) = - next (c::xs, ys) - | resolve (x::[], c, ys) = - next (c::x::[], ys) - | resolve ((c' as Opr _)::xs, c, ys) = - reduce (c'::xs, c::ys) - | resolve (x::(c' as Opr(f'))::xs, c as Opr(f), ys) = - if prec(f) > prec(f') then next (c::x::c'::xs, ys) - else if prec(f') > prec(f) then reduce (x::c'::xs, c::ys) - else (case (assoc(f'), assoc(f)) - of (Left, Left) => reduce (x::c'::xs, c::ys) - | (Right, Right) => next (c::x::c'::xs, ys) - | _ => fail "Operator ambiguous") - | resolve _ = fail "Operator mismatch" - - and reduce (Atm(a)::Opr(Prefix(_, cprefix))::xs, ys) = - next (Atm(cprefix(a))::xs, ys) - | reduce (Atm(a)::Opr(Infix(_, _, cinfix))::Atm(a')::xs, ys) = - next (Atm(cinfix(a', a))::xs, ys) - | reduce (Opr(Postfix(_, cpostfix))::Atm(a)::xs, ys) = - next (Atm(cpostfix(a))::xs, ys) - | reduce _ = fail "Operator mismatch" - - and next (Atm(a)::[], []) = succeed a - | next (xs, []) = reduce(xs, []) - | next (xs, y::ys) = resolve(xs, y, ys) - - in next ([], ys) end + let + fun resolve (Atm(a)::xs, Atm(a'), ys) = + (* treat adjacent tokens as if they have an infix operator + of high precedence between them -- Tom *) + resolve (Atm(a)::xs, Opr(Infix(cassoc, 999, cadj)), Atm(a')::ys) + | resolve (xs, Atm(a), ys) = + next (Atm(a)::xs, ys) + | resolve (xs, c as Opr(Prefix _), ys) = + next (c::xs, ys) + | resolve (x::[], c, ys) = + next (c::x::[], ys) + | resolve ((c' as Opr _)::xs, c, ys) = + reduce (c'::xs, c::ys) + | resolve (x::(c' as Opr(f'))::xs, c as Opr(f), ys) = + if prec(f) > prec(f') then + next (c::x::c'::xs, ys) + else if prec(f') > prec(f) then + reduce (x::c'::xs, c::ys) + else + (case (assoc(f'), assoc(f)) of + (Left, Left) => reduce (x::c'::xs, c::ys) + | (Right, Right) => next (c::x::c'::xs, ys) + | _ => fail "Operator ambiguous") + | resolve _ = fail "Operator mismatch" + + and reduce (Atm(a)::Opr(Prefix(_, cprefix))::xs, ys) = + next (Atm(cprefix(a))::xs, ys) + | reduce (Atm(a)::Opr(Infix(_, _, cinfix))::Atm(a')::xs, ys) = + next (Atm(cinfix(a', a))::xs, ys) + | reduce (Opr(Postfix(_, cpostfix))::Atm(a)::xs, ys) = + next (Atm(cpostfix(a))::xs, ys) + | reduce _ = fail "Operator mismatch" + + and next (Atm(a)::[], []) = succeed a + | next (xs, []) = reduce(xs, []) + | next (xs, y::ys) = resolve(xs, y, ys) + + in + next ([], ys) + end fun parsefixity p = - (repeat1 p) -- (fn ys => resolvefixity ys) + (repeat1 p) -- (fn ys => resolvefixity ys) fun parsefixityadj p assoc adj = - (repeat1 p) -- (resolvefixityadj adj assoc) + (repeat1 p) -- (resolvefixityadj adj assoc) end diff --git a/src/tokparse.sig b/src/tokparse.sig index d943a2c..da0e1ef 100644 --- a/src/tokparse.sig +++ b/src/tokparse.sig @@ -4,79 +4,79 @@ signature MINI_LANGUAGE_DEF = sig - val reservedNames : string list - val reservedOpNames : string list + val reservedNames : string list + val reservedOpNames : string list end signature LANGUAGE_DEF = sig - type scanner = char CharParser.charParser + type scanner = char CharParser.charParser - (* multiline comment start/end sequence *) - val commentStart : string option - val commentEnd : string option + (* multiline comment start/end sequence *) + val commentStart : string option + val commentEnd : string option - (* single line comment start *) - val commentLine : string option + (* single line comment start *) + val commentLine : string option - (* do the multiline comments support nesting *) - val nestedComments : bool + (* do the multiline comments support nesting *) + val nestedComments : bool - (* parsers for first and subsequent letters of identifiers *) - val identStart : scanner - val identLetter : scanner + (* parsers for first and subsequent letters of identifiers *) + val identStart : scanner + val identLetter : scanner - (* parsers for first and subsequent chars of operators *) - val opStart : scanner - val opLetter : scanner + (* parsers for first and subsequent chars of operators *) + val opStart : scanner + val opLetter : scanner - (* reserved keywords and operators *) - val reservedNames : string list - val reservedOpNames : string list + (* reserved keywords and operators *) + val reservedNames : string list + val reservedOpNames : string list - (* is the language case sensitive *) - val caseSensitive : bool + (* is the language case sensitive *) + val caseSensitive : bool end signature TOKEN_PARSER = sig - type 'a charParser = 'a CharParser.charParser - - val identifier : string charParser - val reserved : string -> unit charParser - val operator : string charParser - val reservedOp : string -> unit charParser - - val charLiteral : char charParser - val stringLiteral : string charParser - (* val natural : IntInf charParser*) - val integer : int charParser - (* val float : real charParser *) - (* val naturalOrFloat : CharParser st (Either Integer Double)*) - val decimal : int charParser - val hexadecimal : int charParser - val octal : int charParser - - val symbol : string -> string charParser - val lexeme : 'a charParser -> 'a charParser - val whiteSpace : unit charParser - - val parens : 'a charParser -> 'a charParser - val braces : 'a charParser -> 'a charParser - val brackets : 'a charParser -> 'a charParser - val squares : 'a charParser -> 'a charParser - - val semi : string charParser - val comma : string charParser - val colon : string charParser - val dot : string charParser - val semiSep : ('a charParser) -> ('a list) charParser - val semiSep1 : ('a charParser) -> ('a list) charParser - val commaSep : ('a charParser) -> ('a list) charParser - val commaSep1 : ('a charParser) -> ('a list) charParser - + type 'a charParser = 'a CharParser.charParser + + val identifier : string charParser + val reserved : string -> unit charParser + val operator : string charParser + val reservedOp : string -> unit charParser + + val charLiteral : char charParser + val stringLiteral : string charParser + (* val natural : IntInf charParser*) + val integer : int charParser + (* val float : real charParser *) + (* val naturalOrFloat : CharParser st (Either Integer Double)*) + val decimal : int charParser + val hexadecimal : int charParser + val octal : int charParser + + val symbol : string -> string charParser + val lexeme : 'a charParser -> 'a charParser + val whiteSpace : unit charParser + + val parens : 'a charParser -> 'a charParser + val braces : 'a charParser -> 'a charParser + val brackets : 'a charParser -> 'a charParser + val squares : 'a charParser -> 'a charParser + + val semi : string charParser + val comma : string charParser + val colon : string charParser + val dot : string charParser + val semiSep : ('a charParser) -> ('a list) charParser + val semiSep1 : ('a charParser) -> ('a list) charParser + val commaSep : ('a charParser) -> ('a list) charParser + val commaSep1 : ('a charParser) -> ('a list) charParser + end diff --git a/src/tokparse.sml b/src/tokparse.sml index bfefed3..fd88ede 100644 --- a/src/tokparse.sml +++ b/src/tokparse.sml @@ -3,91 +3,116 @@ functor TokenParser (Lang : LANGUAGE_DEF) :> TOKEN_PARSER = struct - fun elem x = List.exists (fn y => x = y) - fun notElem x = List.all (fn y => x <> y) - - open ParserCombinators - open CharParser - infixr 4 << >> - infixr 3 && - infix 2 -- ## - infix 2 wth suchthat return guard when - infixr 1 || <|> ?? - - type 'a charParser = 'a charParser - - val lineComment = - let fun comLine _ = newLine <|> done #"\n" <|> (anyChar >> $ comLine) - in case Lang.commentLine of - SOME s => string s >> $ comLine return () - | NONE => fail "Single-line comments not supported" - end - val mlComment = - case (Lang.commentStart, Lang.commentEnd) of - (SOME st, SOME ed) => - let - fun bcNest _ = try (string st) >> $contNest - and contNest _ = try (string ed return ()) - <|> ($bcNest <|> (anyChar return ())) >> $contNest - val bcU = try (string st) >> repeat (not (string ed) >> anyChar) >> string ed return () - in if Lang.nestedComments then $ bcNest else bcU - end - | _ => fail "Multi-line comments not supported" - val comment = lineComment <|> mlComment - - val whiteSpace = repeatSkip ((space return ()) || comment) - fun lexeme p = p << whiteSpace - fun symbol s = lexeme (string s) - - val name = - Lang.identStart && repeat Lang.identLetter wth implode o op:: - val identifier = - try (lexeme (name suchthat (fn x => notElem x Lang.reservedNames))) - fun reserved kw = - if elem kw Lang.reservedNames then - try (lexeme (name suchthat (fn x => x = kw)) return ()) - else fail "Not a reserved name" - - val opName = - Lang.opStart && repeat Lang.opLetter wth implode o op:: - val operator = - try (lexeme (opName suchthat (fn x => notElem x Lang.reservedOpNames))) - fun reservedOp rop = - if elem rop Lang.reservedOpNames then - try (lexeme (opName suchthat (fn x => x = rop)) return ()) - else fail "Not a reserved operator" - - fun parens p = middle (symbol "(") p (symbol ")") - fun braces p = middle (symbol "{") p (symbol "}") - fun brackets p = middle (symbol "<") p (symbol ">") - fun squares p = middle (symbol "[") p (symbol "]") - - val semi = symbol ";" - val comma = symbol "," - val colon = symbol ":" - val dot = symbol "." - fun semiSep p = separate p semi - fun semiSep1 p = separate1 p semi - fun commaSep p = separate p comma - fun commaSep1 p = separate1 p comma - - val chrEscape = - string "\\" && (anyChar wth Char.toString) wth op^ when Char.fromString - val charLiteral = middle (char #"'") (chrEscape <|> anyChar) (symbol "'") - val stringLiteral = - (middle (char #"\"") - (repeat (chrEscape <|> (anyChar suchthat (fn x => x <> #"\"")))) - (symbol "\"")) wth String.implode - - fun dig d = if Char.isDigit d then Char.ord d - Char.ord #"0" - else Char.ord (Char.toLower d) - Char.ord #"a" + 10 - - fun transnum b = List.foldl (fn (s, d) => b*d + s) 0 - val decimal = repeat1 digit wth transnum 10 o List.map dig - val hexadecimal = repeat1 hexDigit wth transnum 16 o List.map dig - val octal = repeat1 octDigit wth transnum 8 o List.map dig - val positive = - (char #"0" >> ((char #"x" >> hexadecimal) || octal)) || decimal - val integer = lexeme ((char #"-" >> positive wth op~) || positive) + fun elem x = List.exists (fn y => x = y) + fun notElem x = List.all (fn y => x <> y) + + open ParserCombinators + open CharParser + infixr 4 << >> + infixr 3 && + infix 2 -- ## + infix 2 wth suchthat return guard when + infixr 1 || <|> ?? + + type 'a charParser = 'a charParser + + val lineComment = + let + fun comLine _ = newLine <|> done #"\n" <|> (anyChar >> $ comLine) + in + case Lang.commentLine of + SOME s => string s >> $ comLine return () + | NONE => fail "Single-line comments not supported" + end + + val mlComment = + case (Lang.commentStart, Lang.commentEnd) of + (SOME st, SOME ed) => + let + fun bcNest _ = try (string st) >> $contNest + and contNest _ = try (string ed return ()) <|> ($bcNest <|> (anyChar return ())) >> $contNest + val bcU = try (string st) >> repeat (not (string ed) >> anyChar) >> string ed return () + in + if Lang.nestedComments then $ bcNest else bcU + end + | _ => fail "Multi-line comments not supported" + val comment = lineComment <|> mlComment + + val whiteSpace = repeatSkip ((space return ()) || comment) + fun lexeme p = p << whiteSpace + fun symbol s = lexeme (string s) + + val name = + Lang.identStart + && repeat Lang.identLetter + wth implode o op:: + + val identifier = + try (lexeme (name suchthat (fn x => notElem x Lang.reservedNames))) + + fun reserved kw = + if elem kw Lang.reservedNames then + try (lexeme (name suchthat (fn x => x = kw)) return ()) + else + fail "Not a reserved name" + + val opName = + Lang.opStart + && repeat Lang.opLetter + wth implode o op:: + + val operator = + try (lexeme (opName suchthat (fn x => notElem x Lang.reservedOpNames))) + + fun reservedOp rop = + if elem rop Lang.reservedOpNames then + try (lexeme (opName suchthat (fn x => x = rop)) return ()) + else + fail "Not a reserved operator" + + fun parens p = middle (symbol "(") p (symbol ")") + fun braces p = middle (symbol "{") p (symbol "}") + fun brackets p = middle (symbol "<") p (symbol ">") + fun squares p = middle (symbol "[") p (symbol "]") + + val semi = symbol ";" + val comma = symbol "," + val colon = symbol ":" + val dot = symbol "." + fun semiSep p = separate p semi + fun semiSep1 p = separate1 p semi + fun commaSep p = separate p comma + fun commaSep1 p = separate1 p comma + + val chrEscape = + string "\\" + && (anyChar wth Char.toString) + wth op^ + when Char.fromString + + val charLiteral = + middle + (char #"'") + (chrEscape <|> anyChar) + (symbol "'") + + val stringLiteral = + (middle + (char #"\"") + (repeat (chrEscape <|> (anyChar suchthat (fn x => x <> #"\"")))) + (symbol "\"")) wth String.implode + + fun dig d = + if Char.isDigit d then + Char.ord d - Char.ord #"0" + else + Char.ord (Char.toLower d) - Char.ord #"a" + 10 + + fun transnum b = List.foldl (fn (s, d) => b*d + s) 0 + val decimal = repeat1 digit wth transnum 10 o List.map dig + val hexadecimal = repeat1 hexDigit wth transnum 16 o List.map dig + val octal = repeat1 octDigit wth transnum 8 o List.map dig + val positive = (char #"0" >> ((char #"x" >> hexadecimal) || octal)) || decimal + val integer = lexeme ((char #"-" >> positive wth op~) || positive) end diff --git a/src/util/either.sml b/src/util/either.sml index a8f1d92..ef980cd 100644 --- a/src/util/either.sml +++ b/src/util/either.sml @@ -3,24 +3,33 @@ struct datatype ('a, 'b) either = Left of 'a | Right of 'b - fun either lft rgt e = case e of - Left x => lft x - | Right y => rgt y - fun lefts xs = case xs of - [] => [] - | Left x :: xs => x :: lefts xs - | _ :: xs => lefts xs - fun rights xs = case xs of - [] => [] - | Right x :: xs => x :: rights xs - | _ :: xs => rights xs - fun part xs = case xs of - [] => ([], []) - | x :: xs => - let val (ls, rs) = part xs - in (case x of - Left l => (l :: ls, rs) - | Right r => (ls, r :: rs)) - end + fun either lft rgt e = + case e of + Left x => lft x + | Right y => rgt y + + fun lefts xs = + case xs of + [] => [] + | Left x :: xs => x :: lefts xs + | _ :: xs => lefts xs + + fun rights xs = + case xs of + [] => [] + | Right x :: xs => x :: rights xs + | _ :: xs => rights xs + + fun part xs = + case xs of + [] => ([], []) + | x :: xs => + let + val (ls, rs) = part xs + in + case x of + Left l => (l :: ls, rs) + | Right r => (ls, r :: rs) + end end