diff --git a/src/compiler/api/GF/Grammar/Grammar.hs b/src/compiler/api/GF/Grammar/Grammar.hs index 1353462b8..8729ad1ec 100644 --- a/src/compiler/api/GF/Grammar/Grammar.hs +++ b/src/compiler/api/GF/Grammar/Grammar.hs @@ -395,12 +395,12 @@ data Term = | FV [Term] -- ^ alternatives in free variation: @variants { s ; ... }@ + | Markup Ident [(Ident,Term)] [Term] + | Alts Term [(Term, Term)] -- ^ alternatives by prefix: @pre {t ; s\/c ; ...}@ | Strs [Term] -- ^ conditioning prefix strings: @strs {s ; ...}@ | TSymCat Int LIndex [(LIndex,(Ident,Type))] | TSymVar Int Int - | OpenTag Ident [Assign] -- used internally in the parser - | CloseTag Ident -- used internally in the parser deriving (Show, Eq, Ord) -- | Patterns diff --git a/src/compiler/api/GF/Grammar/Lexer.x b/src/compiler/api/GF/Grammar/Lexer.x index 979fe9088..7354277cb 100644 --- a/src/compiler/api/GF/Grammar/Lexer.x +++ b/src/compiler/api/GF/Grammar/Lexer.x @@ -2,8 +2,8 @@ { {-# LANGUAGE CPP #-} module GF.Grammar.Lexer - ( Token(..), Posn(..) - , P, runP, runPartial, token, lexer, getPosn, failLoc + ( Lang(..), Token(..), Posn(..) + , P, runP, runLangP, runPartial, token, lexer, getPosn, failLoc , isReservedWord ) where @@ -17,7 +17,7 @@ import qualified Data.ByteString.Internal as BS(w2c) import qualified Data.ByteString.UTF8 as UTF8 import qualified Data.Map as Map import Data.Word(Word8) -import Data.Char(readLitChar,isSpace) +import Data.Char(readLitChar,isSpace,isAlphaNum) import Data.Maybe(isJust) import qualified Control.Monad.Fail as Fail } @@ -31,7 +31,10 @@ $i = [$l $d _ '] -- identifier character $u = [.\n] -- universal: any character @rsyms = -- symbols and non-identifier-like reserved words - \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \= + \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \= + +@ident = + (\_ | $l)($l | $d | \_ | \')* :- "--" [.]* ; -- Toss single line comments @@ -39,11 +42,39 @@ $u = [.\n] -- universal: any character $white+ ; @rsyms { tok ident } -\< { \_ _ s -> if start_of_tag (BS.tail s) - then T_less_tag - else T_less } +\< $white* @ident $white* (\/ | \>) + { \lang inp@(AI pos s) inp' _ -> + let inp0 = AI (alexMove pos '<') (BS.tail s) + in case lang of { + NLG -> case getTag inp0 of { + Just (tag,inp') -> POk (inp,inp') (T_open_tag tag) ; + Nothing -> PFailed pos "matching the html tag failed" + } ; + _ -> POk (inp,inp0) T_less + } } +\< $white* @ident $white+ @ident $white* \= + { \lang inp@(AI pos s) inp' _ -> + let inp0 = AI (alexMove pos '<') (BS.tail s) + in case lang of { + NLG -> case getTag inp0 of { + Just (tag,inp') -> if tag == identS "let" + then POk (inp,inp0) T_less + else POk (inp,inp') (T_open_tag tag) ; + Nothing -> PFailed pos "matching the html tag failed" + } ; + _ -> POk (inp,inp0) T_less + } } +\< \/ $white* @ident { \lang inp@(AI pos s) inp' _ -> + case lang of { + NLG -> case getTag (AI (alexMove (alexMove pos '<') '/') (BS.drop 2 s)) of { + Just (tag,inp') -> POk (inp,inp') (T_close_tag tag) ; + Nothing -> PFailed pos "matching the html tag failed" + } ; + _ -> let inp0 = AI (alexMove pos '<') (BS.tail s) + in POk (inp,inp0) T_less + } } \' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (T_Ident . identS . unescapeInitTail . unpack) } -(\_ | $l)($l | $d | \_ | \')* { tok ident } +@ident { tok ident } \" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t | $d+)))* \" { tok (T_String . unescapeInitTail . unpack) } @@ -53,11 +84,10 @@ $white+ ; { unpack = UTF8.toString ---unpack = id ident = res T_Ident . identC . rawIdentC -tok f p len s = f (UTF8.take len s) +tok f _ inp@(AI _ s) inp' len = POk (inp,inp') (f (UTF8.take len s)) data Token = T_exclmark @@ -78,7 +108,6 @@ data Token | T_colon | T_semicolon | T_less - | T_less_tag | T_equal | T_big_rarrow | T_great @@ -138,6 +167,8 @@ data Token | T_Integer Integer -- integer literals | T_Double Double -- double precision float literals | T_Ident Ident + | T_open_tag Ident + | T_close_tag Ident | T_EOF deriving Show -- debug @@ -171,6 +202,7 @@ resWords = Map.fromList , b ";" T_semicolon , b "=" T_equal , b "=>" T_big_rarrow + , b "<" T_less , b ">" T_great , b "?" T_questmark , b "[" T_obrack @@ -251,20 +283,14 @@ alexMove (Pn l c) '\n' = Pn (l+1) 1 alexMove (Pn l c) _ = Pn l (c+1) alexGetByte :: AlexInput -> Maybe (Word8,AlexInput) -alexGetByte (AI p _ s) = +alexGetByte (AI p s) = case WBS.uncons s of Nothing -> Nothing Just (w,s) -> - let p' = alexMove p c - c = BS.w2c w - in p' `seq` Just (w, (AI p' c s)) -{- --- Not used by this lexer: -alexInputPrevChar :: AlexInput -> Char -alexInputPrevChar (AI p c s) = c --} + let p' = alexMove p (BS.w2c w) + in p' `seq` Just (w, (AI p' s)) + data AlexInput = AI {-# UNPACK #-} !Posn -- current position, - {-# UNPACK #-} !Char -- previous char {-# UNPACK #-} !BS.ByteString -- current input string type AlexInput2 = (AlexInput,AlexInput) @@ -274,7 +300,9 @@ data ParseResult a | PFailed Posn -- The position of the error String -- The error message -newtype P a = P { unP :: AlexInput2 -> ParseResult a } +data Lang = GF | BNFC | NLG + +newtype P a = P { unP :: Lang -> AlexInput2 -> ParseResult a } instance Functor P where fmap = liftA @@ -284,10 +312,10 @@ instance Applicative P where (<*>) = ap instance Monad P where - return a = a `seq` (P $ \s -> POk s a) - (P m) >>= k = P $ \ s -> case m s of - POk s a -> unP (k a) s - PFailed posn err -> PFailed posn err + return a = a `seq` (P $ \_ s -> POk s a) + (P m) >>= k = P $ \l s -> case m l s of + POk s a -> unP (k a) l s + PFailed posn err -> PFailed posn err #if !(MIN_VERSION_base(4,13,0)) -- Monad(fail) will be removed in GHC 8.8+ @@ -295,24 +323,27 @@ instance Monad P where #endif instance Fail.MonadFail P where - fail msg = P $ \(_,AI posn _ _) -> PFailed posn msg + fail msg = P $ \_ (_,AI posn _) -> PFailed posn msg runP :: P a -> BS.ByteString -> Either (Posn,String) a -runP p bs = snd <$> runP' p (Pn 1 0,bs) +runP p bs = snd <$> runP' GF p (Pn 1 0,bs) + +runLangP :: Lang -> P a -> BS.ByteString -> Either (Posn,String) a +runLangP lang p bs = snd <$> runP' lang p (Pn 1 0,bs) -runPartial p s = conv <$> runP' p (Pn 1 0,UTF8.fromString s) +runPartial p s = conv <$> runP' GF p (Pn 1 0,UTF8.fromString s) where conv ((pos,rest),x) = (UTF8.toString rest,x) -runP' (P f) (pos,txt) = - case f (dup (AI pos ' ' txt)) of - POk (AI pos _ rest,_) x -> Right ((pos,rest),x) +runP' lang (P f) (pos,txt) = + case f lang (dup (AI pos txt)) of + POk (AI pos rest,_) x -> Right ((pos,rest),x) PFailed pos msg -> Left (pos,msg) dup x = (x,x) failLoc :: Posn -> String -> P a -failLoc pos msg = P $ \_ -> PFailed pos msg +failLoc pos msg = P $ \_ _ -> PFailed pos msg lexer :: (Token -> P a) -> P a lexer cont = cont=< POk (inp,inp) T_EOF - AlexError (AI pos _ _) -> PFailed pos "lexical error" - AlexSkip inp' len -> {-trace (show len) $-} go (inp,inp') - AlexToken inp' len act -> POk (inp,inp') (act pos len str) + AlexError (AI pos _) -> PFailed pos "lexical error" + AlexSkip inp' len -> go lang (inp,inp') + AlexToken inp' len act -> act lang inp inp' len -start_of_tag s = isJust (match s) +getTag inp = space inp where - match s = do - s <- matchSpace s - (char s '/' - `mplus` - do s <- matchIdent s - s <- matchSpace s - (char s '/' - `mplus` - do s <- matchIdent s - s <- matchSpace s - char s '=')) - - matchSpace s - | BS.null s = Just s - | isSpace (BS.head s) = matchSpace (BS.tail s) - | otherwise = Just s - - init = - BS.pack "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ" - rest = - BS.append init (BS.pack "0123456789'") - - char s c - | BS.null s = Nothing - | BS.head s == c = Just (BS.tail s) - | otherwise = Nothing - - matchIdent s - | BS.null s = Nothing - | BS.elem (BS.head s) init = matchRest s 1 (BS.tail s) - | otherwise = Nothing - - matchRest s0 i s - | BS.null s = checkResWord (BS.take i s0) s - | BS.elem (BS.head s) rest = matchRest s0 (i+1) (BS.tail s) - | otherwise = checkResWord (BS.take i s0) s - - checkResWord w s = - case Map.lookup (identC (rawIdentC w)) resWords of - Just t -> Nothing - Nothing -> Just s + space inp = do + (w,inp') <- alexGetByte inp + if isSpace (BS.w2c w) + then space inp' + else ident [] inp + + ident cs inp = do + (w,inp') <- alexGetByte inp + let c = BS.w2c w + if isAlphaNum c || c == '_' + then ident (c:cs) inp' + else return (identS (reverse cs),inp) getPosn :: P Posn -getPosn = P $ \ai2@(_,inp@(AI pos _ _)) -> POk ai2 pos +getPosn = P $ \_ ai2@(_,inp@(AI pos _)) -> POk ai2 pos } diff --git a/src/compiler/api/GF/Grammar/Parser.y b/src/compiler/api/GF/Grammar/Parser.y index 40ff012bd..196cca276 100644 --- a/src/compiler/api/GF/Grammar/Parser.y +++ b/src/compiler/api/GF/Grammar/Parser.y @@ -2,13 +2,14 @@ { {-# OPTIONS -fno-warn-overlapping-patterns #-} module GF.Grammar.Parser - ( P, runP, runPartial + ( P, runP, Lang(..), runLangP, runPartial, Posn(..) , pModDef , pModHeader , pTerm , pTopDef , pBNFCRules , pEBNFRules + , pNLG ) where import GF.Infra.Ident @@ -33,6 +34,7 @@ import qualified Data.Map as Map %partial pTerm Exp %name pBNFCRules ListCFRule %name pEBNFRules ListEBNFRule +%name pNLG NLG -- no lexer declaration %monad { P } { >>= } { return } @@ -59,7 +61,6 @@ import qualified Data.Map as Map ':' { T_colon } ';' { T_semicolon } '<' { T_less } - '' { T_big_rarrow} '>' { T_great } @@ -118,6 +119,8 @@ Integer { (T_Integer $$) } Double { (T_Double $$) } String { (T_String $$) } Ident { (T_Ident $$) } +' mkTag id $1 Empty - _ -> return (App $1 $2) } + : Exp4 Exp5 { App $1 $2 } | Exp4 '{' Exp '}' { App $1 (ImplArg $3) } | 'case' Exp 'of' '{' ListCase '}' { let annot = case $2 of Typed _ t -> TTyped t @@ -471,18 +481,6 @@ Exp6 | '<' ListTupleComp '>' { R (tuple2record $2) } | '<' Exp ':' Exp '>' { Typed $2 $4 } | '(' Exp ')' { $2 } - | '' { OpenTag $2 $3 } - | '' { CloseTag $3 } - | '' { markup $2 $3 Empty } - -Attributes :: { [Assign] } -Attributes - : { [] } - | Attribute Attributes { $1:$2 } - -Attribute :: { Assign } -Attribute - : Ident '=' Exp6 { assign (ident2label $1) $3 } ListExp :: { [Term] } ListExp @@ -703,6 +701,42 @@ ERHS3 :: { ERHS } | Ident { ENonTerm (showIdent $1,[]) } | '(' ERHS0 ')' { $2 } +NLG :: { Map.Map Ident Info } + : ListNLGDef { Map.fromList $1 } + | Posn Tag Posn { Map.singleton (identS "main") (ResOper Nothing (Just (mkL $1 $3 (Abs Explicit (identS "qid") $2)))) } + | Posn Exp Posn { Map.singleton (identS "main") (ResOper Nothing (Just (mkL $1 $3 (Abs Explicit (identS "qid") $2)))) } + +ListNLGDef :: { [(Ident,Info)] } +ListNLGDef + : {- empty -} { [] } + | 'oper' OperDef ListNLGDef { $2 ++ $3 } + +Markup :: { Term } +Markup + : Tag { $1 } + | Exp ';' { $1 } + +Tag :: { Term } +Tag + : '' ListMarkup '' {% if $1 == $5 + then return (Markup $1 $2 $4) + else fail ("Unmatched closing tag " ++ showIdent $1) } + | '' { Markup $1 $2 [] } + +ListMarkup :: { [Term] } + : { [] } + | Exp { [$1] } + | Markup ListMarkup { $1 : $2 } + +Attributes :: { [(Ident,Term)] } +Attributes + : { [] } + | Attribute Attributes { $1:$2 } + +Attribute :: { (Ident,Term) } +Attribute + : Ident '=' Exp6 { ($1,$3) } + ModuleName :: { ModuleName } : Ident { MN $1 } @@ -828,35 +862,4 @@ mkAlts cs = case cs of mkL :: Posn -> Posn -> x -> L x mkL (Pn l1 _) (Pn l2 _) x = L (Local l1 l2) x - -mkTag ident (App t1 t2) conc = - case match ident t2 of - Just attrs -> fmap (App t1) (mkTag ident t2 conc) - Nothing -> let t = App (Q (cPredef, (identS "linearize"))) t2 - in mkTag ident t1 $ case conc of - Empty -> t - _ -> C t conc -mkTag ident t conc = - case match ident t of - Just attrs -> return (markup ident attrs conc) - Nothing -> fail ("Unmatched closing tag " ++ showIdent ident) - -match ident (OpenTag ident' attrs) - | ident == ident' = Just attrs -match ident (R [(lbl,(Nothing,Vr ident'))]) - | lbl == ident2label (identS "p1") && ident == ident' - = Just [] -match ident _ = Nothing - -markup ident attrs content = - App - (App - (App - (Q (cPredef, (identS "markup"))) - (R attrs) - ) - (K (showIdent ident)) - ) - content - } diff --git a/src/compiler/api/GF/Grammar/Predef.hs b/src/compiler/api/GF/Grammar/Predef.hs index 882d374fa..a92a31f6c 100644 --- a/src/compiler/api/GF/Grammar/Predef.hs +++ b/src/compiler/api/GF/Grammar/Predef.hs @@ -33,6 +33,7 @@ cSOFT_BIND = identS "SOFT_BIND" cSOFT_SPACE = identS "SOFT_SPACE" cCAPIT = identS "CAPIT" cALL_CAPIT = identS "ALL_CAPIT" +cHtml = identS "Html" isPredefCat :: Ident -> Bool isPredefCat c = elem c [cInt,cString,cFloat] diff --git a/src/compiler/api/GF/Grammar/Printer.hs b/src/compiler/api/GF/Grammar/Printer.hs index 8abffbf78..a2c1b3279 100644 --- a/src/compiler/api/GF/Grammar/Printer.hs +++ b/src/compiler/api/GF/Grammar/Printer.hs @@ -24,7 +24,7 @@ module GF.Grammar.Printer ) where import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint -import PGF2(Literal(..)) +import PGF2(Literal(..),pgfFilePath) import PGF2.Transactions(SeqId) import GF.Infra.Ident import GF.Infra.Option @@ -84,6 +84,8 @@ ppModule q (mn, ModInfo mtype mstat opts exts with opens _ _ mseqs jments) = ppExtends (id,MIExcept incs) = id <+> '-' <+> brackets (commaPunct pp incs) ppWith (id,ext,opens) = ppExtends (id,ext) <+> "with" <+> commaPunct ppInstSpec opens +ppModule q (mn, ModPGF pgf) = + "pgf" <+> mn <+> '=' <+> show (pgfFilePath pgf) ppOptions opts = "flags" $$ @@ -249,6 +251,11 @@ ppTerm q d (Typed e t) = '<' <> ppTerm q 0 e <+> ':' <+> ppTerm q 0 t <> '>' ppTerm q d (ImplArg e) = braces (ppTerm q 0 e) ppTerm q d (ELincat cat t) = prec d 4 ("lincat" <+> cat <+> ppTerm q 5 t) ppTerm q d (ELin cat t) = prec d 4 ("lin" <+> cat <+> ppTerm q 5 t) +ppTerm q d (Markup tag attrs children) + | null children = pp "<" <> pp tag <+> hsep (map (ppMarkupAttr q) attrs) <> pp "/>" + | otherwise = pp "<" <> pp tag <+> hsep (map (ppMarkupAttr q) attrs) <> pp ">" $$ + nest 3 (ppMarkupChildren q children) $$ + pp " pp tag <> pp ">" ppTerm q d (TSymCat i r rs) = pp '<' <> pp i <> pp ',' <> ppLinFun (pp.fst) r rs <> pp '>' ppTerm q d (TSymVar i r) = pp '<' <> pp i <> pp ',' <> pp '$' <> pp r <> pp '>' @@ -341,6 +348,16 @@ ppAltern q (x,y) = ppTerm q 0 x <+> '/' <+> ppTerm q 0 y ppParams q ps = fsep (intersperse (pp '|') (map (ppParam q) ps)) ppParam q (id,cxt) = id <+> hsep (map (ppDDecl q) cxt) +ppMarkupAttr q (id,e) = + id <> pp '=' <> ppTerm q 5 e + +ppMarkupChildren q [t] = ppTerm q 0 t +ppMarkupChildren q (t:ts) = + (case t of + Markup {} -> ppTerm q 0 t + _ -> ppTerm q 0 t <> ';') $$ + ppMarkupChildren q ts + ppSeqId :: SeqId -> Doc ppSeqId seqid = 'S' <> pp seqid diff --git a/src/compiler/gf.cabal b/src/compiler/gf.cabal index 4b9ac22c0..e92904bab 100644 --- a/src/compiler/gf.cabal +++ b/src/compiler/gf.cabal @@ -82,8 +82,6 @@ library GF.Infra.Option GF.Infra.UseIO GF.Infra.BuildInfo - - other-modules: GF.Support GF.Text.Pretty GF.Text.Lexing