/*
 * Copyright (c) 2007-2011 by The Broad Institute, Inc. and the Massachusetts Institute of
 * Technology.  All Rights Reserved.
 *
 * This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
 * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
 *
 * THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR
 * WARRANTES OF ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING,
 * WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 * PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER
 * OR NOT DISCOVERABLE.  IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR RESPECTIVE
 * TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES
 * OF ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES,
 * ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER
 * THE BROAD OR MIT SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT
 * SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
 */

package org.broad.igv.util.sigma;

import jsc.independentsamples.MannWhitneyTest;
import net.sf.samtools.util.CloseableIterator;
import org.apache.commons.math.stat.StatUtils;
import org.broad.igv.feature.AbstractFeatureParser;
import org.broad.igv.feature.IGVFeature;
import org.broad.igv.feature.Locus;
import org.broad.igv.sam.Alignment;
import org.broad.igv.sam.PairedAlignment;
import org.broad.igv.sam.reader.AlignmentQueryReader;
import org.broad.igv.sam.reader.SamQueryReaderFactory;
import org.broad.igv.util.ParsingUtils;
import org.broad.igv.util.ResourceLocator;
import org.broad.igv.util.collections.DoubleArrayList;
import org.broad.igv.util.stats.Distribution;
import org.broad.tribble.Feature;
import org.broad.tribble.readers.AsciiLineReader;

import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * @author jrobinso
 * @date Feb 16, 2011
 */
public class InsertSizeAnalysis {
    static String chr = "chr1";

    static String[] testBams = {
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G8552/259/v2/259.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G4202/265/v2/265.bam"};

    static String[] wgsBams = {
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G8552/259/v2/259.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G4202/265/v2/265.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G4200/266/v4/266.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G4203/352/v6/352.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G4204/414/v5/414.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G8551/4/v1/4.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G4198/8/v3/8.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G4206/13/v4/13.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G4205/15/v3/15.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G8556/564/v2/564.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G8558/563/v3/563.bam"
    };

    static String[] exomeBams = {
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/259/v1/259.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/265/v4/265.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/266/v1/266.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/352/v2/352.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/375/v2/375.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/414/v3/414.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/15/v3/15.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/8/v3/8.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/564/v2/564.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/701/v1/701.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/4/v2/4.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/13/v1/13.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/563/v1/563.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/566/v1/566.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/469/v1/469.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/479/v1/479.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/exome/491/v1/491.bam"
    };

    static String[] normalBams = {
            "http://iwww.broadinstitute.org/igvdata/sigma/aggregation/G4201/375/v2/375.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/bams/controls/bwa_validation/bwa_hg18_control1.bam",
            "http://iwww.broadinstitute.org/igvdata/sigma/bams/controls/bwa_validation/bwa_hg18_control2.bam"
    };


    public static void main(String[] args) throws IOException {
        // Run on a BED file
        runKCCN3();
        //runCodingRepeats();

    }

    private static void runCodingRepeats() throws IOException {
        String bedFile = "http://iwww.broadinstitute.org/igvdata/sigma/annotations/CodingRepeats.bed";
        AsciiLineReader reader = ParsingUtils.openAsciiReader(new ResourceLocator(bedFile));
        List<Feature> features = AbstractFeatureParser.getInstanceFor(bedFile).loadFeatures(reader);

        PrintWriter pw = new PrintWriter("codingregions_isize.xls");
        PrintWriter bedWriter = new PrintWriter("codingregions_isize.bed");
        PrintWriter samWriter = new PrintWriter("CodingRegionRepeats.sam");

        for (Feature f : features) {
            IGVFeature feat = (IGVFeature) f;
            int repeatStart = feat.getStart();
            int repeatEnd = feat.getEnd();

            int flankLeft = repeatStart - 50000;
            int flankRight = repeatEnd + 50000;

            System.out.println(feat.getName());
            run(feat.getName(), repeatStart, repeatEnd, flankLeft, flankRight, wgsBams, pw, bedWriter, samWriter);

            pw.flush();
            samWriter.flush();
            bedWriter.flush();
        }

        pw.close();
        samWriter.close();
        bedWriter.close();
    }

    private static void runKCCN3() throws IOException {

        //chr1:154705256-154705348

        PrintWriter bedWriter = new PrintWriter("mef2d.bed");

        PrintWriter samWriter = new PrintWriter(new FileWriter("mef2d_wgs.sam"));

        int repeatStart = 154705256;
        int repeatEnd = 154705348;

        int flankLeft = repeatStart - 100000;
        int flankRight = repeatEnd + 100000;
        //int flankRight = 153108938;

        PrintWriter pw = new PrintWriter(new FileWriter("mef2d_wgs.xls")); // new PrintWriter(new FileWriter( + ".dist.txt"));
        run("KCCN3 2", repeatStart, repeatEnd, flankLeft, flankRight, wgsBams, pw, bedWriter, samWriter);


        pw.close();
        samWriter.close();
        bedWriter.close();

    }

    private static void run(String repeatName, int repeatStart, int repeatEnd, int flankLeft, int flankRight,
                            String[] bams, PrintWriter pw, PrintWriter bedWriter, PrintWriter samWriter) throws IOException {

        Distribution allDist = new Distribution(700);
        Distribution spanningDist = new Distribution(700);

        DoubleArrayList pooledControlISizes = new DoubleArrayList(12000);
        DoubleArrayList pooledSpanningISizes = new DoubleArrayList(1000);


        double avgTotalLength = 0;
        double avgSpanningLength = 0;

        // For each sample
        for (String bamfile : bams) {

            Map<String, PairedAlignment> pairCache = new HashMap();
            List<PairedAlignment> pairs = new ArrayList(15000);
            AlignmentQueryReader reader = SamQueryReaderFactory.getReader(bamfile, true);

            int qStart = repeatStart - 10000;
            int qEnd = repeatEnd + 10000;
            CloseableIterator<Alignment> iter = reader.query(chr, qStart, qEnd, false);

            while (iter.hasNext()) {
                Alignment al = iter.next();
                if (al.isPaired() && al.getMate().isMapped() && al.isProperPair() && !al.isVendorFailedRead() &&
                        al.getInferredInsertSize() < 2000) {
                    PairedAlignment pair = pairCache.get(al.getReadName());
                    if (pair == null) {
                        pairCache.put(al.getReadName(), new PairedAlignment(al));
                    } else {
                        pairCache.remove(al.getReadName());
                        pair.setSecondAlignment(al);
                        pairs.add(pair);
                    }

                } else {
                    if (al.getStart() < repeatStart && al.getEnd() > repeatEnd) {
                        samWriter.println(al.toString());
                    } 
                }
            }
            iter.close();
            reader.close();

            double[] isizes = new double[pairs.size()];
            for (int i = 0; i < pairs.size(); i++) {
                PairedAlignment pair = pairs.get(i);
                int iSize = Math.abs(pair.getInferredInsertSize());
                isizes[i] = iSize;
            }

            if (isizes.length == 0) {
                System.out.println(bamfile + "  NO PAIRS");
                continue;
            }
            int medianISize = (int) StatUtils.percentile(isizes, 50);

            // Loop through pairs and compute isize distributions.   Shift isize by median to make all samples
            // comparable
            for (PairedAlignment pair : pairs) {

                avgTotalLength += pair.getEnd() - pair.getStart();
                int iSize = Math.abs(pair.getInferredInsertSize()) - medianISize;
                allDist.addDataPoint(iSize);
                pooledControlISizes.add(iSize);

                Alignment leftAlignment = pair.getFirstAlignment();
                Alignment rightAlignment = pair.getSecondAlignment();

                // Test -- template spans the repeat but neither alignment does.
                if (leftAlignment.getStart() < (repeatStart-20) && leftAlignment.getEnd() < repeatEnd &&
                        rightAlignment.getEnd() > (repeatEnd+20) && rightAlignment.getStart() > repeatStart &&
                        leftAlignment.getStart() > flankLeft && rightAlignment.getEnd() < flankRight) {
                    spanningDist.addDataPoint(iSize);
                    avgSpanningLength += pair.getEnd() - pair.getStart();
                    pooledSpanningISizes.add(iSize);

                    samWriter.println(leftAlignment.toString());
                    samWriter.println(rightAlignment.toString());
                } else {
                    //if (leftAlignment.getStart() < repeatStart && leftAlignment.getEnd() > repeatEnd) {
                    //    samWriter.println(leftAlignment.toString());
                    //}
                    //if (rightAlignment.getStart() < repeatStart && rightAlignment.getEnd() > repeatEnd) {
                    //    samWriter.println(rightAlignment.toString());
                   // }
                }


            }
        }


        double[] controls = pooledControlISizes.toArray();
        double[] spanning = pooledSpanningISizes.toArray();


        double allMean = StatUtils.mean(controls);
        double controlMedian = StatUtils.percentile(controls, 50);
        double spanningMedian = StatUtils.percentile(spanning, 50);

        double significance = 1.0;
        double U = 1.0;

        if (spanning.length > 2) {
            try {
                MannWhitneyTest test2 = new MannWhitneyTest(controls, spanning);
                significance = test2.getSP();
                U = test2.getStatistic();
            } catch (Exception e) {
                e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
            }
        }

        String locusString = chr + ":" + repeatStart + "-" + repeatEnd;
        printTabDelimited(pw, repeatName, locusString, String.valueOf(spanning.length), String.valueOf(spanningMedian),
                String.valueOf(significance));

        double score = significance == 0 ? Double.MAX_VALUE : -Math.log10(significance);
        printTabDelimited(bedWriter, chr, String.valueOf(repeatStart), String.valueOf(repeatEnd), String.valueOf(score),
                String.valueOf(spanningMedian));
    }


    static void printTabDelimited(PrintWriter pw, String... args) {

        if (args.length > 0) {
            pw.print(args[0]);

            for (int i = 1; i < args.length; i++) {
                pw.print("\t" + args[i]);
            }
        }
        pw.println();


    }
}

