// $Id$
/*
 * Copyright 2010 Institute for Systems Biology
 *                Seattle, Washington, USA.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package biotextEngine.xmlparsers.medline;

import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.logging.Logger;

import biotextEngine.util.Scrivener;
import biotextEngine.xmlparsers.GenericXMLParser;
import biotextEngine.xmlparsers.NodeHandler;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;

/**
 * This class the parses a medline document, extends the generic xml parser
 * 
 * @author Ariel Schwartz
 * @author Gaurav Bhalotia
 */
public class MedlineParser extends GenericXMLParser {
    private static final Logger LOGGER = Scrivener.getLogger(MedlineParser.class);
    
    private static volatile boolean IS_TIME_TO_REPORT = true;
    
    // start a daemon thread for notifying logging
    private static final ScheduledExecutorService SCHEDULER = Executors
            .newScheduledThreadPool(1, new ThreadFactory() {
                @Override
                public Thread newThread(Runnable r) {
                    Thread thread = new Thread(r);
                    thread.setDaemon(true);
                    return thread;
                }
            });
    
    static {
        // log every five seconds
        SCHEDULER.scheduleAtFixedRate(new Runnable() {
                public void run() { IS_TIME_TO_REPORT = true; }
            }, 0, 5, TimeUnit.SECONDS);
    }
    
    /* For patching errors */
    public static int eCount = 0;

    /* For printing the current status */
    private int numCitations = 0;

    static public void main(String[] args) throws Exception {
        GenericXMLParser.main(args, MedlineParser.class);
        
        // we're done
        SCHEDULER.shutdownNow();
        LOGGER.info("Parsing complete");
    }

    /* Parser calls this for each element in a document */
    public void startElement(String namespaceURI, String localName,
            String qName, Attributes atts) throws SAXException {
        
        if (currentElement == null || qName.compareTo("MedlineCitation") != 0) {
            currentElement = qName;
            NodeHandler.pushElement(qName);

        } else if (currentElement.equals("MedlineCitationSet")) {
            try {
                MedlineCitation medlineCitation = new MedlineCitation(xmlFileName);
                this.numCitations++;
                
                if (IS_TIME_TO_REPORT) {
                    IS_TIME_TO_REPORT = false;
                    LOGGER.fine("Parsed " + numCitations + " citations");
                }
                
                medlineCitation.setContentHandler(medlineCitation,
                        namespaceURI, localName, qName, atts);
                
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}
