-- A parser for simple type definitions.
-- Developed for COMP3610 (and COMP2600)

-- Clem Baker-Finch

module ParseTypeDefs where

import Char

-- The grammar is suitable for top-down predictive parsing without
-- modification:

-- <type> ::= <simple> | * ident | array [ <simple> ] of <type>
-- <simple> ::= int | char | num .. num

-- First the abstract syntax, the target of the parser:

type Name = String

data TypeDef = Simple Simple | Ptr Name | Arr Simple TypeDef
     deriving Show

data Simple  = IntType | CharType | Range Int Int
     deriving Show

-- The scanner is straightforward:

data Token = STAR | ID Name
           | LBRAK | RBRAK
           | ARRAY | OF
           | INTSYM | CHARSYM
           | NUM Int | ELIPSIS deriving Eq

scan :: String -> [Token]

scan []           = []
scan ('*':cs)     = STAR : scan cs
scan ('[':cs)     = LBRAK : scan cs
scan (']':cs)     = RBRAK : scan cs
scan ('.':'.':cs) = ELIPSIS : scan cs
scan input@(c:cs)
  | isSpace c     = scan cs
  | isAlpha c     = checkResWord word : scan afterWord
  | isDigit c     = NUM (read num)    : scan afterNum
  | c == '-'      = NUM (read num)    : scan afterNum
  | otherwise     = error (c:" : illegal character.")
  where
    (word, afterWord) = span isAlphaNum input
    (num,  afterNum)  = span isDigit    input

checkResWord :: String -> Token

checkResWord "int"   = INTSYM
checkResWord "char"  = CHARSYM
checkResWord "array" = ARRAY
checkResWord "of"    = OF
checkResWord other   = ID other

-- The parser takes a sequence of tokens and constructs a parse tree
-- (i.e. a value of TypeDef).  It must also return the sequence of
-- remaining tokens with which to continue the parse.  There will also
-- be a parser corresponding to the syntactic category Simple.

parseTypeDef :: [Token] -> (TypeDef, [Token])

parseTypeDef (STAR:ID name:toks) = (Ptr name, toks)
parseTypeDef (ARRAY:LBRAK:toks)    = (Arr ixType elType, toks4)
  where (ixType, toks1) = parseSimple toks
        toks2           = check RBRAK toks1
        toks3           = check OF toks2
        (elType, toks4) = parseTypeDef toks3
parseTypeDef toks                  = (Simple sType, toks')
  where (sType, toks')  = parseSimple toks

parseSimple :: [Token] -> (Simple, [Token])

parseSimple (INTSYM:toks)                        = (IntType, toks)
parseSimple (CHARSYM:toks)                       = (CharType, toks)
parseSimple (NUM min : ELIPSIS : NUM max : toks) = (Range min max, toks)
parseSimple _                                    = error "Unexpected symbol."

-- Check that the next token is the one expected:

check :: Token -> [Token] -> [Token]

check tok []  = error "Unexpected end of input."
check tok (t:ts)
  | tok == t  = ts
  | otherwise = error "Unexpected symbol."

-- To parse an entire type definition, simply require all tokens to be
-- consumed.

parse :: String -> TypeDef
parse input = typeDef
  where (typeDef, []) = parseTypeDef $ scan input


