// $Id$
/*
 * Copyright 2010 Institute for Systems Biology
 *                Seattle, Washington, USA.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package biotextEngine.xmlparsers;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.Stack;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import biotextEngine.util.Scrivener;
import org.xml.sax.ContentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/**
 * The generic parser which extends the default handler provided by the SAX
 * library. This is an event based parsing, that does not require the
 * construction of a DOM tree in memory. Thus it is good for parsing large XML
 * files.
 * 
 * @author Ariel Schwartz
 * @author Gaurav Bhalotia
 */
public class GenericXMLParser extends DefaultHandler {
    private static Logger LOGGER = Scrivener.getLogger(GenericXMLParser.class);

    /** Constants used for JAXP 1.2 */
    static final String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
    static final String W3C_XML_SCHEMA = "http://www.w3.org/2001/XMLSchema";
    static final String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
    
    private static XMLReader xmlReader;
    private static Stack<ContentHandler> childHandlers = new Stack<ContentHandler>();
    protected String currentElement;
    protected String xmlFileName = null;
    private static String XML_INPUT_FILENAME = null;
    
    static PrintWriter OUTFILE = null;

    /* If the parser needs to be validated */
    static boolean parseValidate = false;

    /* If the output needs to go to a intermediate file */
    static boolean IS_TO_FILE = false;

    private static Connection DBCONNECT;
    private static final String DB_DRIVER_NAME = "com.mysql.jdbc.Driver";
    private static String DB_URL;

    /**
     * Returns the current database connection
     */
    static public synchronized Connection getDbConnection() {
        if (null == DBCONNECT) {
            // if url not set from params, assume local
            if (null == DB_URL) {
                DB_URL = "jdbc:mysql://localhost:3306/medline";
            }
            
            try {
                Class.forName(DB_DRIVER_NAME);
                LOGGER.config("Opening DB connection to: " + DB_URL);
                DBCONNECT = DriverManager.getConnection(DB_URL);
                
            } catch (ClassNotFoundException ex) {
                LOGGER.severe("Unable to find database driver classes: " + ex.getMessage());
                System.exit(1);

            } catch (SQLException ex) {
                LOGGER.severe("Cannot connect to this database: " + ex.getMessage());
                System.exit(1);
            }
        }
        
        return DBCONNECT;
    }
    
    /**
     * The default constructor. Initializes the data base connection
     * 
     * @throws ClassNotFoundException If the database driver class is not found
     * @throws SQLException If there is a problem in connection to the database
     */
    public GenericXMLParser() {

        xmlFileName = new File(XML_INPUT_FILENAME).getName();

        addChildHandler(this);
    }

    /**
     * Convert from a filename to a file URL.
     */
    protected static String convertToFileURL(String filename) throws Exception {

        File file = new File(filename);
        String path = file.toURI().toURL().toString();
        return path;
    }

    /**
     * Prints the correct usage for the code
     */
    protected static void usage() {


        /* functionality from the original; untested, obscuring for now
        System.err.println("\t-dtd = DTD validation");
        System.err.println("\t-validate = Parse validation (Checks that all tags are being handled)");
        System.err.println("\t-xsd | -xsdss <file.xsd> = W3C XML Schema validation using xsi: hints");
        System.err.println("\t\tin instance document or schema source <file.xsd>");
        System.err.println("\t-xsdss <file> = W3C XML Schema validation using schema source <file>");
        */
        
        LOGGER.info("Usage: [-options] file.xml");
        LOGGER.info("-file => Output to an intermediate file [if " +
        		"'-file=filename.sql' then will write to that file; else " +
        		"will write to the input file name with '-insert.sql' appended");
        LOGGER.info("-url=jdbc:mysql://localhost:3306/medline [Replace with appropriate MySQL url]");
        LOGGER.info("-usage or -help => this message");
        System.exit(1);
    }
    
    static public void main(String[] args, Class<? extends GenericXMLParser> parserClass) throws Exception {

        boolean dtdValidate = false;
        boolean xsdValidate = false;
        String schemaSource = null;
        
        String outputFileName = null;

        /* Parse arguments to get the supplied options */
        for (int ii = 0; ii < args.length; ii++) {

            /* former functionality; untested, obscuring for now
            if (args[ii].equals("-dtd")) {
                dtdValidate = true;
            } else if (args[ii].equals("-xsd")) {
                xsdValidate = true;
            } else if (args[ii].equals("-validate")) {
                parseValidate = true;
            } else if (args[ii].equals("-xsdss")) {
                if (ii == args.length - 1) {
                    usage();
                }
                xsdValidate = true;
                schemaSource = args[++ii];
            */
            
            
            if (args[ii].startsWith("-file")) {
                IS_TO_FILE = true;
                
                if (args[ii].startsWith("-file=")) {
                    outputFileName = args[ii].substring(6);
                }
                
            } else if (args[ii].startsWith("-url=")) {
                String url = args[ii].substring(5);
                
                if (!url.startsWith("jdbc:mysql://")) {
                    LOGGER.severe("URL must begin 'jdbc:mysql://'; exiting");
                    System.exit(1);
                }
                
                DB_URL = url;
            
            } else if (args[ii].equals("-usage") || args[ii].equals("-help")) {
                usage();
                
            } else {
                XML_INPUT_FILENAME = args[ii];

                /* Must be last arg */
                if (ii != args.length - 1) {
                    usage();
                }
            }
        }
        
        if (XML_INPUT_FILENAME == null) {
            usage();
        } else {
            if (IS_TO_FILE == true) {
                /* If intermediate file chosen then open it to write */
                
                if (null == outputFileName) {
                    outputFileName = XML_INPUT_FILENAME + "-insert.sql";
                }
                
                LOGGER.config("Writing SQL output to file: " + outputFileName);
                
                // oh the joys of charsets -- never assume utf-8 is default                
                // first, the file stream must be wrapped and told the encoder
                OutputStreamWriter osw = new OutputStreamWriter(
                        new FileOutputStream(outputFileName),
                        Charset.forName("utf-8") );
                
                OUTFILE = new PrintWriter( new BufferedWriter( osw ) );
                
                // TODO MySQL specific?
                OUTFILE.println("SET NAMES 'utf8';");
            }
        }

        /* Create a JAXP SAXParserFactory and configure it */
        SAXParserFactory spf = SAXParserFactory.newInstance();

        /*
         * Set namespaceAware to true to get a parser that corresponds to the
         * default SAX2 namespace feature setting. This is necessary because the
         * default value from JAXP 1.0 was defined to be false.
         */
        spf.setNamespaceAware(false);

        /* Validation part 1: set whether validation is on */
        spf.setValidating(dtdValidate || xsdValidate);

        /* Create a JAXP SAXParser */
        SAXParser saxParser = spf.newSAXParser();

        /* Validation part 2a: set the schema language if necessary */
        if (xsdValidate) {
            try {
                saxParser.setProperty(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA);
            } catch (SAXNotRecognizedException ex) {
                /* This can happen if the parser does not support JAXP 1.2 */
                LOGGER.log(Level.SEVERE, 
                        "Error: JAXP SAXParser property not recognized: "
                        + JAXP_SCHEMA_LANGUAGE 
                        + "; Check to see if parser conforms to JAXP 1.2 spec.", ex);
                System.exit(1);
            }
        }

        /*
         * Validation part 2b: Set the schema source, if any. See the JAXP 1.2
         * maintenance update specification for more complex usages of this
         * feature.
         */
        if (schemaSource != null) {
            saxParser.setProperty(JAXP_SCHEMA_SOURCE, new File(schemaSource));
        }

        /* Get the encapsulated SAX XMLReader */
        xmlReader = saxParser.getXMLReader();

        /* Get an instance of the parser for the specific class */
        GenericXMLParser parser = parserClass.newInstance();

        /* Set the ContentHandler of the XMLReader */
        xmlReader.setContentHandler(parser);

        /* Set an ErrorHandler before parsing */
        xmlReader.setErrorHandler(new MyErrorHandler(System.err));

        LOGGER.info("Going to parse the File " + XML_INPUT_FILENAME);

        /* Tell the XMLReader to parse the XML document */
        xmlReader.parse(convertToFileURL(XML_INPUT_FILENAME));

        // flush and close the output if necessary
        if (null != OUTFILE) {
            OUTFILE.flush();
            OUTFILE.close();
        }
    }

    /**
     * Error handler to report errors and warnings
     */
    private static class MyErrorHandler implements ErrorHandler {
        /* Error handler output goes here */
        private PrintStream out;

        MyErrorHandler(PrintStream out) {
            this.out = out;
        }

        /**
         * Returns a string describing parse exception details
         */
        private String getParseExceptionInfo(SAXParseException spe) {
            String systemId = spe.getSystemId();
            if (systemId == null) {
                systemId = "null";
            }
            String info = "URI=" + systemId + " Line=" + spe.getLineNumber()
                    + ": " + spe.getMessage();
            return info;
        }

        /**
         * The following methods are standard SAX ErrorHandler methods. See SAX
         * documentation for more info.
         */
        public void warning(SAXParseException spe) throws SAXException {
            out.println("Warning: " + getParseExceptionInfo(spe));
        }

        public void error(SAXParseException spe) throws SAXException {
            String message = "Error: " + getParseExceptionInfo(spe);
            throw new SAXException(message);
        }

        public void fatalError(SAXParseException spe) throws SAXException {
            String message = "Fatal Error: " + getParseExceptionInfo(spe);
            throw new SAXException(message);
        }
    }

    /**
     * Stores the handler for the current node, it also sets the handler in the
     * XML reader.
     */
    static public void addChildHandler(ContentHandler childHandler) {
        childHandlers.push(childHandler);
        xmlReader.setContentHandler(childHandler);
    }

    /**
     * Removes the current childhandler from the heap and sets the parent
     * handler as the current handler
     */
    static public void removeChildHandler() {
        childHandlers.pop();
        ContentHandler parentHandler = childHandlers.peek();
        xmlReader.setContentHandler(parentHandler);
    }
}
