/** SExpressionParser.java
 *
 *  S-Expression parser for C SC 453, Fall 2002 (Collberg)
 *  Author: Martin Stepp (stepp@cs.arizona.edu)
 *
 *  Description: Call the static method SExpressionParser.parse(InputStream)
 *               to get a Vector of Vectors of SExprToken objects.  Each token 
 *               is an atom, int, string, or real from an s-expression as
 *               output by the lexer.  This saves you from having to read in 
 *               that text and verify its correctness and all that mess.
 */

import java.io.*;
import java.util.*;


/** Parses S-Expressions.  To do so, call              <code>
 *  SExpressionParser.parse(InputStream).
 *
 *  </code><p>
 *
 *  For example,    <pre>
 *
 *  (
 *    (PROGRAM            1)
 *    (IDENT     P        3)
 *    (SEMICOLON          6)
 *  )
 *
 *  </pre>
 *
 *  parses into the following contents:
 *
 *  <pre>
 *
 *  java.util.Vector {
 *    java.util.Vector { SExprToken(kind=SExprToken.ATOM,   value="PROGRAM"),
 *                       SExprToken(kind=SExprToken.INTLIT, value="1")},
 *    java.util.Vector { SExprToken(kind=SExprToken.ATOM,   value="IDENT"),
 *                       SExprToken(kind=SExprToken.ATOM,   value="P"),
 *                       SExprToken(kind=SExprToken.INTLIT, value="3")},
 *    java.util.Vector { SExprToken(kind=SExprToken.ATOM,   value="SEMICOLON"),
 *                       SExprToken(kind=SExprToken.INTLIT, value="6")}
 *  }
 *
 *  </pre><p>
 *
 *  So what you probably think of as one "token" from the lexer is actually
 *  a Vector of SExprTokens.  You can pull this Vector apart and put it into
 *  your own objects, so you don't have to deal with all the vector-of-
 *  vectors overhead.  The reason it is written this way is because it has
 *  to be general enough that it works for later phases of the compiler as well.
 *
 *  <p>
 *
 *  Code example: <pre>
 *
 *  Vector tokens = SExpressionParser.parse(new FileInputStream("somefile.dat"));
 *  for (int i = 0; i < tokens.size(); i++)
 *    System.out.println("token: " + tokens.get(i));
 *
 *
 *  </pre>
 */
public class SExpressionParser
{
  /** Parses a stream of characters into
   *  an SExpression, modeled by a vector of vectors.
   */
  public static Vector parse(InputStream in) throws IOException
  {
    Vector lexTokens = lex(in);
    itr = lexTokens.iterator();
    next();
    
    match(SExprToken.LPAREN);  // avoids unnecessary outer list layer
    Vector tokens = SExpr();
    match(SExprToken.RPAREN);
    
    return tokens;
  }
  
  /** Pretty-prints a Vector of Vectors of SExprTokens. */
  public static void unparse(Vector list)
  {
    unparseR(list, "");
  }
  
  /* Pretty-prints vector of vectors. */
  private static void unparseR(Vector list, String pad)
  {
    System.out.print(pad + "(");
    
    boolean needNewLine = true;
    int size = list.size();
    for (int ii = 0;  ii < size;  ii++)
    {
      Object element = list.elementAt(ii);
      if (element instanceof Vector)
      {
        // a sublist
        Vector sublist = (Vector)element;
        if (needNewLine)
          System.out.println();
        unparseR(sublist, pad + "   ");  // recurse to print lists
        needNewLine = false;
      }
      else
      {
        needNewLine = true;
        System.out.print(element + (ii < size - 1 ? " " : ""));
      }
    }
    System.out.println((!needNewLine ? pad : "") + ")");
  }
  
  // You don't need to look at the rest of the code below if you don't want to;
  // the only method you need is the parse method.
  
  private static SExprToken curr = null;
  private static Iterator itr = null;
  
  /** Test driver. */
  
  public static void main(String[] args) throws Exception
  {
    unparse(parse(new FileInputStream(args[0])));
  }
  
  
  /* Moves to next internal token. */
  private static SExprToken next() throws SExprException
  {
    if (!itr.hasNext())
      throw new SExprException("no tokens left");
    
    return curr = (SExprToken)itr.next();
  }
  
  /* Matches one SExpression internal token. */
  private static void match(Object type)
  {
    // System.out.println("Matching " + type);
    if (curr.kind != type)
      die("Cannot match " + type);

    try
    {
      next();
    }
    catch (SExprException see)
    {
      die("Cannot match " + type + " - " + see.getMessage());
    }
  }
  
  /* Parser recursive helper method to parse SExpression grammar. */
  private static Vector SExpr()
  {
    Vector result = new Vector();
    while (true)
    {
      if (curr.kind == SExprToken.LPAREN)
      {
        match(SExprToken.LPAREN);
        result.add(SExpr());
        match(SExprToken.RPAREN);
      }
      else if (curr.kind == SExprToken.RPAREN)
        break;
      else if (curr.kind == SExprToken.ATOM)
      {
        result.addElement(new SExprToken(curr.kind, curr.value));
        match(SExprToken.ATOM);
      }
      else if (curr.kind == SExprToken.STR)
      {
        result.addElement(curr);
        match(SExprToken.STR);
      }
      else if (curr.kind == SExprToken.INT)
      {
        result.addElement(curr);
        match(SExprToken.INT);
      }
      else if (curr.kind == SExprToken.REAL)
      {
        result.addElement(curr);
        match(SExprToken.REAL);
      }
      else if (curr.kind == SExprToken.EOF)
      {
        match(SExprToken.EOF);
        break;
      }
      else
      {
        result.addElement(SExprToken.EPSILON);
        break;
      }
    }
    
    return result;
  }
  
  /* Lexes the tokens so I can parse them. */
  private static Vector lex(InputStream in) throws IOException
  {
    PushbackReader reader = new PushbackReader(new InputStreamReader(in));
    
    Vector tokens = new Vector();
    String str = "";
    Object state = SExprToken.NONE;

    while (reader.ready())
    {
      char c = (char)reader.read();
      
      if (state == SExprToken.NONE)
      {
        if (Character.isLetter(c))
        {
          str += c;
          state = SExprToken.ATOM;
        }
        else if (Character.isDigit(c))
        {
          str += c;
          state = SExprToken.INT;
        }
        else if (c == '.')
        {
          str += c;
          state = SExprToken.DOT;
        }
        else if (c == '(')
          tokens.addElement(new SExprToken(SExprToken.LPAREN, "("));
        else if (c == ')')
          tokens.addElement(new SExprToken(SExprToken.RPAREN, ")"));
        else if (c == '\"')
        {
          state = SExprToken.STR;
        }
        else if (!Character.isWhitespace(c))
          die("Bad character " + c);
      }
      else if (state == SExprToken.STR)
      {
        if (c == '\"')
        {
          tokens.addElement(new SExprToken(SExprToken.STR, str));
          str = "";
          state = SExprToken.NONE;
        }
        else if (c == '\\')
        {
          str += c;
          state = SExprToken.STR_BS;
        }
        else
          str += c;
      }
      else if (state == SExprToken.STR_BS)
      {
        str += c;
        state = SExprToken.STR;
      }
      else if (state == SExprToken.INT)
      {
        if (Character.isDigit(c))
          str += c;
        else if (c == '.')
        {
          str += c;
          state = SExprToken.REAL;
        }
        else
        {
          tokens.addElement(new SExprToken(SExprToken.INT, str));
          str = "";
          state = SExprToken.NONE;
          reader.unread(c);
        }
      }
      else if (state == SExprToken.DOT)
      {
        if (Character.isDigit(c))
        {
          str += c;
          state = SExprToken.REAL;
        }
        else
          die("Bad real number " + str + c);
      }
      else if (state == SExprToken.REAL)
      {
        if (Character.isDigit(c))
          str += c;
        else if (c == 'E')
        {
          str += c;
          state = SExprToken.REAL_E;
        }
        else
        {
          tokens.addElement(new SExprToken(SExprToken.REAL, str));
          str = "";
          state = SExprToken.NONE;
          reader.unread(c);
        }
      }
      else if (state == SExprToken.REAL_E)
      {
        if (Character.isDigit(c))
        {
          str += c;
          state = SExprToken.REAL_E_DIGIT;
        }
        else if (c == '+'  ||  c == '-')
        {
          str += c;
          state = SExprToken.REAL_E_SIGN;
        }
        else
          die("Bad real number " + str + c);
      }
      else if (state == SExprToken.REAL_E_SIGN)
      {
        if (Character.isDigit(c))
        {
          str += c;
          state = SExprToken.REAL_E_DIGIT;
        }
        else
          die("Bad real number " + str + c);
      }
      else if (state == SExprToken.REAL_E_DIGIT)
      {
        if (Character.isDigit(c))
          str += c;
        else
        {
          tokens.addElement(new SExprToken(SExprToken.REAL, str));
          str = "";
          state = SExprToken.NONE;
          reader.unread(c);
        }
      }
      else if (state == SExprToken.ATOM)
      {
        if (Character.isLetterOrDigit(c))
          str += c;
        else
        {
          tokens.addElement(new SExprToken(SExprToken.ATOM, str));
          str = "";
          state = SExprToken.NONE;
        }
      }
    }
    
    tokens.addElement(new SExprToken(SExprToken.EOF, "EOF"));
    return tokens;
  }
  
  private static void die(String s)
  {
    System.err.println("S-expression parse error: " + s);
    System.exit(1);
  }
  
  /* Forbids SExpressionParser object construction. */
  private SExpressionParser() {}

  /* Another helper class. Ignore it. */
  private static class SExprException extends RuntimeException
  {
    public SExprException(String m)
    {
      super(m);
    }
  }
}