/*
 * LpParser.java
 *
 * Copyright (C) 2006 - 2007 Martin Slota
 *
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2 of the License, or (at your option) any later
 * version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 51
 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

/*
 * History:
 * v0.1 (2007-01-02): initial version
 * v0.1.3 (2007-01-03): initial implementation
 * v0.2 (2007-01-06):
 * - documentation added
 * - some private methods made protected
 * - tests finished
 * v0.2.1 (2007-01-12):
 * - some refactoring performed
 * - constructors modified to throw IllegalArgumentException instead of
 *   NullPointerException
 * - a bug in parseTerm fixed (kept creating new variables instead of calling
 *   getVariable)
 * v0.2.2 (2007-01-15):
 * - the constructor with a String argument accepts any CharSequence now
 * v0.2.3 (2007-01-28):
 * - checked exceptions eliminated (see 
 *   http://www.mindview.net/Etc/Discussions/CheckedExceptions)
 * - documentation updated
 * v0.2.4 (2007-02-08):
 * - made reusable (setInput instead of constructors)
 * - documentation updated
 * v0.2.5 (2007-02-11):
 * - constant, variable, predicate and function pools moved directly to their 
 *   classes
 * - support for integrity constraints added
 * - parseArguments added (code duplicity)
 * - some changes because of the addition of LpAtom class
 * v0.2.6 (2007-02-12):
 * - nextRule deleted; parseRule, parseLiteral, parseTerm made public (now the 
 *   class is more universal, can be used to parse models, etc.)
 * - hasMoreTokens added
 * - parseAllRules methods added
 * - changes because LogicProgram converted to an interface, implementation is 
 *   now in DefaultLogicProgram 
 * v0.2.7 (2007-02-13):
 * - parseAtom added so that it can also be used to parse models
 * v0.2.8 (2007-03-05):
 * - changes because DefaultLogicProgram was renamed to GeneralizedLogicProgram
 * - a part of parseRule() moved to separate methods: parseOnlyRule() and 
 *   parseRuleBody() (used by EvolpParser)
 * - now uses new getInstance methods in LpFunction and LpPredicate
 */

package lp.parse;

import java.io.*;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import lp.struct.LpAtom;

import lp.struct.LpCompoundTerm;
import lp.struct.LpConstant;
import lp.struct.LpFunction;
import lp.struct.LpLiteral;
import lp.struct.LpPredicate;
import lp.struct.LpRule;
import lp.struct.LpTerm;
import lp.struct.LpVariable;

import lp.unit.GeneralizedLogicProgram;
import lp.unit.LogicProgram;

import static lp.parse.LpTokenType.*;

/**
 * Processes textual input with a logic program or logic programming constructs
 * and produces {@link LpRule}, {@link LpLiteral} and {@link LpTerm} objects. A 
 * {@link LpLexer} is used to tokenize the input first. The grammar according to
 * which the constructs are parsed follows:
 *
 *<pre>
 *Rule ---&gt; OrdinaryRule | Constraint
 *OrdinaryRule ---&gt; Literal (RULE_ARROW (Literal (COMMA Literal)*)?)? DOT
 *Constraint ---&gt; RULE_ARROW Literal (COMMA Literal)* DOT
 *Literal ---&gt; 'not'? Atom
 *Atom ---&gt; PredicateName Arguments?
 *PredicateName ---&gt; LOWERCASE_WORD
 *Arguments ---&gt; LEFT_PAREN Term (COMMA Term)* RIGHT_PAREN
 *Term ---&gt; Constant | Variable | CompoundTerm
 *Constant ---&gt; LOWERCASE_WORD
 *Variable ---&gt; UPPERCASE_WORD
 *CompoundTerm ---&gt; FunctionName Arguments
 *FunctionName ---&gt; LOWERCASE_WORD
 *</pre>
 *
 * In this grammar
 * <ul>
 * <li>Rule, OrdinaryRule, Constraint, Literal, Atom, PredicateName, Arguments, 
 * Term, Constant, Variable, CompoundTerm and FunctionName are non-terminals 
 * corresponding to the structure of the input</li>
 * <li>DOT, RULE_ARROW, COMMA, LEFT_PAREN, RIGHT_PAREN, LOWERCASE_WORD and
 * UPPERCASE_WORD are tokens as defined in {@link LpTokenType}</li>
 * <li>'not' is a LOWERCASE_WORD with a corresponding lexem "not"</li>
 * </ul>
 *
 * @author Martin Slota
 * @version 0.2.8
 * @see LpLexer
 * @see LpRule
 * @see #parseRule()
 * @see #parseLiteral()
 * @see #parseTerm()
 */
public class LpParser implements Closeable {
	/**
	 * Container for the underlying lexer instance, through which the input is
	 * parsed.
	 */
	private final LpLexer lexer;
	
	/**
	 * Creates a new {@code LpParser} instance that uses a new {@link LpLexer} 
	 * instance to tokenize the character input before parsing.
	 */
	public LpParser() {
		this(new LpLexer());
	}
	
	/**
	 * Creates a new {@code LpParser} instance that uses {@code lexer} to 
	 * tokenize the character input before parsing.
	 *
	 * @param lexer the {@code LpLexer} instance used to tokenize the character 
	 * input before parsing
	 */
	public LpParser(LpLexer lexer) {
		this.lexer = lexer;
	}
	
	/**
	 * Sets the character input of this {@code LpParser} to the given 
	 * {@link CharSequence}.
	 *
	 * @param input the character sequence to be parsed
	 * @throws IllegalArgumentException if {@code input} is {@code null}
	 * @see LpLexer#setInput(CharSequence)
	 */
	public void setInput(CharSequence input) {
		lexer.setInput(input);
		nextToken();
	}
	
	/**
	 * Sets the character input of this {@code LpParser} to the contents of the 
	 * given file. The default system character encoding is used to read the
	 * file.
	 *
	 * @param input the file to be parsed
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) in 
	 * case an I/O exception occurs while opening or reading the file
	 * @throws IllegalArgumentException if {@code input} is {@code null}
	 * @see LpLexer#setInput(File)
	 */
	public void setInput(File input) {
		lexer.setInput(input);
		nextToken();
	}
	
	/**
	 * Sets the character input of this {@code LpParser} to the given 
	 * {@link Reader}.
	 *
	 * @param input the input reader
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) 
	 * in case an I/O exception occurs while reading from the {@link Reader}
	 * @throws IllegalArgumentException if {@code input} is {@code null}
	 * @see LpLexer#setInput(Reader)
	 */
	public void setInput(Reader input) {
		lexer.setInput(input);
		nextToken();
	}
	
	/**
	 * Closes the underlying lexer. If {@link #setInput(CharSequence)} or
	 * {@link #setInput(File)} was used to set the current character source,
	 * this method should be called when parsing is finished. In other cases it 
	 * is up to the programmer whether she will use this method or take care of
	 * closing the input reader in some other way.
	 *
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) in 
	 * case an I/O exception occurs while closing the underlying lexer
	 * @see LpLexer#close()
	 */
	public void close() {
		getLexer().close();
	}

	/**
	 * Returns {@code true} if there are more tokens on the input and 
	 * {@code false} if there are no more tokens or if {@link #close()} has 
	 * already been called.
	 *
	 * @return as specified above
	 */
	public boolean hasMoreTokens() {
		LpTokenType type = getLexer().getTokenType();
		return (type != null && type != EOF);
	}
	
	/**
	 * Parses the whole input as logic programming rules and returns them in a 
	 * new {@link LogicProgram} object.
	 *
	 * @return a {@code LogicProgram} instance containing the parsed rules
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) in 
	 * case an I/O error occurs while tokenizing the input
	 * @throws LpParserException if the input doesn't match the expression 
	 * (Rule* EOF) (Rule is a non-terminal from the grammar in class 
	 * description)
	 * @see #parseRule()
	 */
	public LogicProgram parseAllRules() {
		return parseAllRules(new GeneralizedLogicProgram());
	}
	
	/**
	 * Parses the whole input as logic programming rules and adds them to the
	 * given {@link LogicProgram} object.
	 *
	 * @return a {@code LogicProgram} instance containing the parsed rules
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) in 
	 * case an I/O error occurs while tokenizing the input
	 * @throws LpParserException if the input doesn't match the expression 
	 * (Rule* EOF) (Rule is a non-terminal from the grammar in class 
	 * description)
	 * @see #parseRule()
	 */
	public LogicProgram parseAllRules(LogicProgram program) {
		while (hasMoreTokens())
			program.add(parseRule());
		return program;
	}
	
	/**
	 * Corresponds to the non-terminal Rule from the class description.
	 * Parses the next rule in the input and returns a {@link LpRule}
	 * instance&#8212;an object model of the parsed rule.
	 *
	 * @return the parsed rule
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) in 
	 * case an I/O error occurs while tokenizing the input
	 * @throws LpParserException if the input doesn't match the grammar from
	 * class description
	 */
	public LpRule parseRule() {
		LpRule result = parseOnlyRule();
		match(DOT);
		return result;
	}
	
	/**
	 * Parses the next rule on input but doesn't require the leading DOT. For
	 * internal use by inheriting classes.
	 *
	 * @return the parsed rule
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) in 
	 * case an I/O error occurs while tokenizing the input
	 * @throws LpParserException if the input doesn't match the grammar from
	 * class description
	 */
	protected LpRule parseOnlyRule() {
		LpLiteral head;
		if (getLexer().getTokenType() == RULE_ARROW) {
			head = null;
		} else {
			head = parseLiteral();
		}
		
		Set<LpLiteral> body = parseRuleBody(head == null);
		return new LpRule(head, body);
	}
	
	/**
	 * Parses a rule's body and returns its object model&#8212;a list of 
	 * {@link LpLiteral} objects. If {@code constraint} is false, the input must
	 * match the expression
	 *
	 *<pre>(RULE_ARROW (Literal (COMMA Literal)*)?)?</pre>
	 *
	 * If {@code constraint} is {@code true}, the input must match the 
	 * expression
	 *
	 *<pre>(RULE_ARROW Literal (COMMA Literal)*)?</pre>
	 *
	 * @param constraint if {@code true}, at least one literal is required after
	 * the arrow (in case there is one)
	 * @return the parsed body of a rule as a list of {@link LpLiteral} objects
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) in 
	 * case an I/O error occurs while tokenizing the input
	 * @throws LpParserException if the input doesn't match the grammar from
	 * class description
	 */
	protected Set<LpLiteral> parseRuleBody(boolean constraint) {
		Set<LpLiteral> result = null;
		if (getLexer().getTokenType() == RULE_ARROW) {
			nextToken();
			if (constraint)
				expect(LOWERCASE_WORD);
			if (getLexer().getTokenType() == LOWERCASE_WORD) {
				result = new LinkedHashSet<LpLiteral>();
				result.add(parseLiteral());
				while (getLexer().getTokenType() == COMMA) {
					nextToken();
					result.add(parseLiteral());
				}
			}
		}
		return result;
	}
	
	/**
	 * Corresponds to the non-terminal Literal from the class description.
	 * Parses the next literal in the input and returns a {@link LpLiteral}
	 * instance&#8212;an object model of the parsed literal.
	 *
	 * @return the parsed literal
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) 
	 * n case an I/O error occurs while tokenizing the input
	 * @throws LpParserException if the input doesn't match the grammar from
	 * class description
	 */
	public LpLiteral parseLiteral() {
		boolean positive = true;
		if (getLexer().getTokenType() == LOWERCASE_WORD
				&& getLexer().getLexem().equals("not")) {
			positive = false;
			nextToken();
		}
		
		return parseAtom().getLiteral(positive);
	}
	
	/**
	 * Corresponds to the non-terminal Atom from the class description.
	 * Parses the next atom on the input and returns a {@link LpAtom}
	 * instance&#8212;an object model of the parsed atom.
	 *
	 * @return the parsed atom
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) 
	 * n case an I/O error occurs while tokenizing the input
	 * @throws LpParserException if the input doesn't match the grammar from
	 * class description
	 */
	public LpAtom parseAtom() {
		expect(LOWERCASE_WORD);
		String name = getLexer().getLexem();
		nextToken();
		
		List<LpTerm> arguments = parseArguments();
		
		LpPredicate pred = LpPredicate.getInstance(name, arguments);
		return LpAtom.getInstance(pred, arguments);
	}
	
	/**
	 * Corresponds to the non-terminal Term from the class description.
	 * Parses the next term in the input and returns an instance of a class
	 * implementing the {@link LpTerm} interface&#8212;an object model of the
	 * parsed term.
	 *
	 * @return the parsed term
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) 
	 * in case an I/O error occurs while tokenizing the input
	 * @throws LpParserException if the input doesn't match the grammar from
	 * class description
	 */
	public LpTerm parseTerm() {
		if (getLexer().getTokenType() == UPPERCASE_WORD) {
			LpTerm result = LpVariable.getInstance(getLexer().getLexem());
			nextToken();
			return result;
		}
		
		expect(LOWERCASE_WORD);
		String name = getLexer().getLexem();
		nextToken();
		
		List<LpTerm> arguments = parseArguments();
		
		if (arguments == null)
			return LpConstant.getInstance(name);
		else
			return LpCompoundTerm.getInstance(
					LpFunction.getInstance(name, arguments),
					arguments);
	}
	
	/**
	 * Corresponds to the non-terminal Arguments from the class description.
	 * Parses a comma separated list of terms enclosed in parenthesis.
	 *
	 * @return the parsed argument list
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) 
	 * in case an I/O error occurs while tokenizing the input
	 * @throws LpParserException if the input doesn't match the grammar from
	 * the class description
	 */
	protected List<LpTerm> parseArguments() {
		List<LpTerm> arguments = null;
		if (getLexer().getTokenType() == LEFT_PAREN) {
			match(LEFT_PAREN);
			arguments = new ArrayList<LpTerm>();
			arguments.add(parseTerm());
			while (getLexer().getTokenType() == COMMA) {
				match(COMMA);
				arguments.add(parseTerm());
			}
			match(RIGHT_PAREN);
		}
		return arguments;
	}
	
	/**
	 * Compares the current token (read through {@link LpLexer#getTokenType()}
	 * with an expected token and reads a new token. Throws a
	 * {@link LpParserException} if the tokens differ. Has the same effects as
	 * calling
	 *
	 *<pre>
	 *expect(token);
	 *readNewLA();
	 *</pre>
	 *
	 * @param token the expected token
	 * @throws LpParserException in case the current token is not as expected
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) 
	 * in case an I/O exception occurs while identifying the next token
	 */
	protected void match(LpTokenType token) {
		expect(token);
		nextToken();
	}
	
	/**
	 * Compares the current token (read through {@link LpLexer#getTokenType()}
	 * with an expected token. Throws a {@link LpParserException} if the tokens
	 * differ.
	 *
	 * @param token the expected token
	 * @throws LpParserException in case the lookahead token is not as expected
	 */
	protected void expect(LpTokenType token) {
		if (getLexer().getTokenType() != token) {
			throw new LpParserException(token, getLexer());
		}
	}
	
	/**
	 * Asks for the next token from the underlying {@link LpLexer}.
	 *
	 * @throws IOException (wrapped in an {@link lp.util.ExceptionAdapter}) in 
	 * case an I/O exception occurs while identifying the next token
	 * @see LpLexer#nextToken()
	 */
	protected final void nextToken() {
		if (getLexer().getTokenType() != EOF) {
			getLexer().nextToken();
		}
	}
	
	/**
	 * An accessor method for {@link #lexer}.
	 *
	 * @return {@link #lexer}
	 */
	protected LpLexer getLexer() {
		return lexer;
	}
}