                                                                      
        Case study: Parsing expressions	
        Note that this is not a monadic approach to parsing.	
                                                                      
        (c) Simon Thompson, 1995,1998.					
                                                                      

>       module ParsingBasics where
>       infixr 5 >*>
 
Syntactic types							
 
>       type Var = Char
>       data Expr = Lit Int | Var Var | Op Op Expr Expr
>       data Op   = Add | Sub | Mul | Div | Mod
 
The type of parsers.						
 
>       type Parse a b = [a] -> [(b,[a])]
 
Some basic parsers						
 
 
Fail on any input.						
 
>       none :: Parse a b
>       none inp = []
 
Succeed, returning the value supplied.				
 
>       succeed :: b -> Parse a b 
>       succeed val inp = [(val,inp)]
 
token t recognises t as the first value in the input.		
 
>       token :: Eq a => a -> Parse a a
>       token t (x:xs) 
>         | t==x 	= [(t,xs)]
>         | otherwise 	= []
>       token t []    = []
 
spot whether an element with a particular property is the 	
first element of input.						
 
>       spot :: (a -> Bool) -> Parse a a
>       spot p (x:xs) 
>         | p x 	= [(x,xs)]
>         | otherwise 	= []
>       spot p []    = []
 
Examples.							
 
>       bracket = token '('
>       dig     =  spot isDigit
 
Combining parsers						
 
 
alt p1 p2 recognises anything recogniseed by p1 or by p2.	
 
>       alt :: Parse a b -> Parse a b -> Parse a b
>       alt p1 p2 inp = p1 inp ++ p2 inp
>       exam1 = (bracket `alt` dig) "234" 
 
Apply one parser then the second to the result(s) of the first.	
 

>       (>*>) :: Parse a b -> Parse a c -> Parse a (b,c)
	
>       (>*>) p1 p2 inp 
>         = [((y,z),rem2) | (y,rem1) <- p1 inp , (z,rem2)  <- p2 rem1 ]
 
Transform the results of the parses according to the function.	
 
>       build :: Parse a b -> (b -> c) -> Parse a c
>       build p f inp = [ (f x,rem) | (x,rem) <- p inp ]
 
Recognise a list of objects.					
 
	
>       list :: Parse a b -> Parse a [b]
>       list p = (succeed []) `alt`
>                ((p >*> list p) `build` convert)
>                where
>                convert = uncurry (:)
 
From the exercises...						
 
>       neList   :: Parse a b -> Parse a [b]
>       neList = neList		 	 -- dummy definition
>       optional :: Parse a b -> Parse a [b]
>       optional = optional	 	 -- dummy definition
>       nTimes :: Int -> Parse a b -> Parse a [b]
>       nTimes = nTimes		 	 -- dummy definition
 
A parser for expressions					
 
 
The parser has three components, corresponding to the three	
clauses in the definition of the syntactic type.		
 
>       parser :: Parse Char Expr
>       parser = (litParse `alt` varParse) `alt` opExpParse
 
Spotting variables.						
 
>       varParse :: Parse Char Expr
>       varParse = spot isVar `build` Var

>       isVar :: Char -> Bool
>       isVar x = ('a' <= x && x <= 'z')
 
Parsing (fully bracketed) operator applications.		
 
>       opExpParse 
>         = (token '(' >*>
>            parser    >*>
>            spot isOp >*>
>            parser    >*>
>            token ')') 
>            `build` makeExpr

>       makeExpr (_,(e1,(bop,(e2,_)))) = Op (charToOp bop) e1 e2

>       isOp :: Char -> Bool
>       isOp = isOp		  	 -- dummy definition

>       charToOp :: Char -> Op
>       charToOp = charToOp	  	 -- dummy definition

 
A number is a list of digits with an optional ~ at the front. 
 
>       litParse 
>         = ((optional (token '~')) >*>
>            (neList (spot isDigit)))
>            `build` (charlistToExpr.join) 
>            where
>            join = uncurry (++)
 
From the exercises...						
 
>       charlistToExpr :: [Char] -> Expr
>       charlistToExpr = charlistToExpr 	 -- dummy definition
 
A grammar for unbracketed expressions.				
								
eXpr  ::= Int | Var | (eXpr Op eXpr) |				
          lexpr mop mexpr | mexpr aop eXpr			
lexpr ::= Int | Var | (eXpr Op eXpr)				
mexpr ::= Int | Var | (eXpr Op eXpr) |	lexpr mop mexpr		
mop   ::= 'a' | '/' | '\%'					
aop   ::= '+' | '-'						
 
 
The top-level parser						
 
>       topLevel :: Parse a b -> [a] -> b
>       topLevel p inp
>         = case results of
>             [] -> error "parse unsuccessful"
>             _  -> head results
>           where
>           results = [ found | (found,[]) <- p inp ]
 
The type of commands.						
 
>       data Command = Eval Expr | Assign Var Expr | Null
>       commandParse :: Parse Char Command
>       commandParse = commandParse 	 -- dummy definition
 
From the exercises.						
 
tokenList :: [a] -> Parse a [a]
spotWhile :: (a -> Bool) -> Parse a [a]

