/*
 * Decompiled with CFR 0.152.
 */
package org.halophiles.assembly.qc;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.Vector;
import org.halophiles.assembly.Contig;
import org.halophiles.assembly.ReadPair;
import org.halophiles.assembly.ReadSet;
import org.halophiles.assembly.qc.EMClusterer;
import org.halophiles.assembly.qc.ReadCluster;
import org.halophiles.assembly.qc.ScaffoldExporter;
import org.halophiles.assembly.qc.SpatialClusterer;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class MisassemblyBreaker {
    private static Comparator<int[]> BLOCK_COMP = new Comparator<int[]>(){

        @Override
        public int compare(int[] arg0, int[] arg1) {
            return arg0[1] - arg1[1];
        }
    };
    private static final int HANDFUL_SIZE = 100;
    private static final int SAM_LINE_LEN = 215;
    private static final String TAG_KEEP = "XT:A:U";
    private static final int MIN_PTS = 3;
    private static int N_ESTREADPAIRS = 100000;
    private static NumberFormat NF;
    private static boolean INWARD;
    private static boolean OUTWARD;
    private static double NIN;
    private static double NOUT;
    private static double ALPHA;
    private static double P;
    public static int MAX_INTERPOINT_DIST;
    private static int MAX_INTERBLOCK_DIST;
    static int MEAN_BLOCK_LEN;
    static int MIN_BLOCK_LEN;
    static int MAX_BLOCK_LEN;
    private static Collection<SpatialClusterer> matches;

    public static void main(String[] args) {
        if (args.length != 4 && args.length != 3) {
            System.err.println("Usage: java -jar A5qc.jar <sam_file> <contig_file> <output_file>");
            System.exit(-1);
        }
        try {
            NF = NumberFormat.getInstance();
            NF.setMaximumFractionDigits(0);
            NF.setGroupingUsed(false);
            String samPath = args[0];
            int numLibs = 1;
            if (args.length == 4) {
                numLibs = Integer.parseInt(args[3]);
            }
            System.out.println("[a5_qc] Reading " + samPath);
            File samFile = new File(samPath);
            RandomAccessFile raf = new RandomAccessFile(samFile, "r");
            Map<String, Contig> contigs = MisassemblyBreaker.readContigs(raf);
            System.out.println("[a5_qc] Found " + contigs.size() + " contigs");
            System.out.println("[a5_qc] Reading in a subset of reads for insert size estimation.");
            long before = System.currentTimeMillis();
            Map<String, ReadPair> reads = MisassemblyBreaker.readSubsetByChunk(raf, contigs);
            raf.close();
            long after = System.currentTimeMillis();
            System.out.println("[a5_qc] Took " + (after - before) / 1000L + " seconds to read in " + reads.size() + " read pairs.");
            if (reads.size() <= 0) {
                System.err.println("[a5_qc] No paired reads found. Cannot generate match files for running FISH misassembly detection.");
                System.exit(-1);
            }
            MisassemblyBreaker.setOrientation(reads.values());
            if (INWARD && !OUTWARD) {
                System.out.println("[a5_qc] Found a substantial amount of innies, but found no outties.");
            } else if (!INWARD && OUTWARD) {
                System.out.println("[a5_qc] Found a substantial amount of outties, but found no innies.");
            } else {
                System.out.println("[a5_qc] Found both innies and outties.");
            }
            double[][] clusterStats = MisassemblyBreaker.getLibraryStats(reads, numLibs);
            double[][] ranges = new double[clusterStats.length][2];
            for (int i = 0; i < clusterStats.length; ++i) {
                if (clusterStats[i][0] > 1000.0) {
                    ranges[i][0] = clusterStats[i][0] - clusterStats[i][3] * clusterStats[i][1];
                    ranges[i][1] = clusterStats[i][0] + clusterStats[i][3] * clusterStats[i][1];
                    continue;
                }
                ranges[i][0] = 1.0;
                ranges[i][1] = clusterStats[i][0] * 2.0;
            }
            MisassemblyBreaker.setMAXBLOCKLEN(clusterStats);
            MisassemblyBreaker.loadData(samPath, contigs, ranges);
            MisassemblyBreaker.setParameters(clusterStats);
            MisassemblyBreaker.printParams();
            Iterator<SpatialClusterer> mbIt = matches.iterator();
            HashMap<String, Vector<int[]>> blocks = new HashMap<String, Vector<int[]>>();
            Vector<int[]> xBlocks = null;
            Vector<int[]> yBlocks = null;
            while (mbIt.hasNext()) {
                SpatialClusterer pc = mbIt.next();
                xBlocks = new Vector<int[]>();
                yBlocks = new Vector<int[]>();
                pc.buildReadPairClusters();
                MisassemblyBreaker.addBlocks(pc, xBlocks, yBlocks);
                MisassemblyBreaker.removeTerminalBlocks(pc.getContig1(), xBlocks);
                MisassemblyBreaker.removeTerminalBlocks(pc.getContig2(), yBlocks);
                if (blocks.containsKey(pc.getContig1().name)) {
                    ((Vector)blocks.get(pc.getContig1().name)).addAll(xBlocks);
                } else {
                    blocks.put(pc.getContig1().name, xBlocks);
                }
                if (blocks.containsKey(pc.getContig2().name)) {
                    ((Vector)blocks.get(pc.getContig2().name)).addAll(yBlocks);
                    continue;
                }
                blocks.put(pc.getContig2().name, yBlocks);
            }
            Iterator it = blocks.keySet().iterator();
            while (it.hasNext()) {
                MisassemblyBreaker.removeRepeats((Vector)blocks.get(it.next()));
            }
            it = blocks.keySet().iterator();
            Vector<String> toRm = new Vector<String>();
            while (it.hasNext()) {
                String tmpCtg = (String)it.next();
                Vector tmpBlocks = (Vector)blocks.get(tmpCtg);
                System.out.println("[a5_qc] Found " + tmpBlocks.size() + " blocks on contig " + contigs.get(tmpCtg).getId());
                if (tmpBlocks.isEmpty()) {
                    toRm.add(tmpCtg);
                } else {
                    Collections.sort(tmpBlocks, BLOCK_COMP);
                }
                for (int[] tmp : tmpBlocks) {
                    System.out.println("        " + tmp[0] + " - " + tmp[1]);
                }
            }
            MisassemblyBreaker.removeKeys(blocks, toRm);
            if (blocks.isEmpty()) {
                System.out.println("[a5_qc] No blocks were found. Not breaking scaffolds.");
                System.exit(0);
            }
            File brokenScafFile = new File(args[2]);
            brokenScafFile.createNewFile();
            ScaffoldExporter out = new ScaffoldExporter(brokenScafFile);
            File ctgFile = new File(args[1]);
            BufferedReader br = new BufferedReader(new FileReader(ctgFile));
            br.read();
            Object tmpAr = null;
            StringBuilder sb = null;
            while (br.ready()) {
                String tmpCtg = br.readLine();
                sb = new StringBuilder();
                char c = (char)br.read();
                while (c != '>') {
                    if (MisassemblyBreaker.isNuc(c)) {
                        sb.append(c);
                    }
                    if (!br.ready()) break;
                    c = (char)br.read();
                }
                if (!blocks.containsKey(tmpCtg)) {
                    out.export(tmpCtg, sb);
                    continue;
                }
                Vector tmpBlks = (Vector)blocks.get(tmpCtg);
                if (tmpBlks.size() < 2) {
                    out.export(tmpCtg, sb);
                    continue;
                }
                tmpAr = new int[tmpBlks.size()][];
                tmpBlks.toArray((T[])tmpAr);
                Arrays.sort(tmpAr, BLOCK_COMP);
                int left = 1;
                int right = 1;
                for (int i = 1; i < ((int[][])tmpAr).length; ++i) {
                    if (tmpAr[i - 1][1] > tmpAr[i][0]) {
                        right = (tmpAr[i - 1][1] + tmpAr[i][0]) / 2;
                        System.out.println("[a5_qc] Exporting " + tmpCtg + " at " + left + "-" + right);
                        out.export(tmpCtg, sb, left, right);
                        left = right + 1;
                        continue;
                    }
                    if (tmpAr[i][0] - tmpAr[i - 1][1] >= MAX_INTERBLOCK_DIST) continue;
                    right = tmpAr[i - 1][1];
                    System.out.println("[a5_qc] Exporting " + tmpCtg + " at " + left + "-" + right);
                    out.export(tmpCtg, sb, left, right);
                    left = tmpAr[i][0];
                }
                right = sb.length();
                System.out.println("[a5_qc] Exporting " + tmpCtg + " at " + left + "-" + right);
                out.export(tmpCtg, sb, left, right);
            }
        }
        catch (IOException e) {
            e.printStackTrace();
            System.exit(-1);
        }
        catch (Exception e) {
            e.printStackTrace();
            System.exit(-1);
        }
    }

    private static void addBlocks(SpatialClusterer sc, Vector<int[]> xBlocks, Vector<int[]> yBlocks) {
        ReadCluster[] kclumps = sc.getReadClusters();
        int xlen = 0;
        int ylen = 0;
        int[] x = null;
        int[] y = null;
        for (int i = 0; i < kclumps.length; ++i) {
            xlen = kclumps[i].xMax - kclumps[i].xMin;
            ylen = kclumps[i].yMax - kclumps[i].yMin;
            double xden = (double)kclumps[i].size() / (double)xlen;
            double yden = (double)kclumps[i].size() / (double)ylen;
            x = new int[]{kclumps[i].xMin, kclumps[i].xMax};
            y = new int[2];
            if (kclumps[i].yMin < 0) {
                y[0] = Math.abs(kclumps[i].yMax);
                y[1] = Math.abs(kclumps[i].yMin);
            } else {
                y[0] = kclumps[i].yMin;
                y[1] = kclumps[i].yMax;
            }
            if (xlen >= MIN_BLOCK_LEN && xlen <= MAX_BLOCK_LEN && ylen >= MIN_BLOCK_LEN && ylen <= MAX_BLOCK_LEN) {
                xBlocks.add(x);
                yBlocks.add(y);
                continue;
            }
            if (xden >= P) {
                System.out.print("");
            }
            if (!(yden >= P)) continue;
            System.out.print("");
        }
    }

    private static void removeTerminalBlocks(Contig contig, Vector<int[]> blocks) {
        int i = 0;
        while (i < blocks.size()) {
            if (blocks.get(i)[1] < MAX_BLOCK_LEN || contig.len - blocks.get(i)[0] < MAX_BLOCK_LEN) {
                blocks.remove(i);
                continue;
            }
            ++i;
        }
    }

    private static void removeRepeats(Vector<int[]> blocks) {
        Collections.sort(blocks, BLOCK_COMP);
        int[] block1 = null;
        int[] block2 = null;
        int i = 0;
        while (i < blocks.size() - 1 && blocks.size() > 1) {
            block1 = blocks.get(i);
            if (block1[0] < (block2 = blocks.get(i + 1))[1] && block2[0] < block1[1]) {
                double intersection = block1[1] - block2[0];
                double union = block2[1] - block1[0];
                if (intersection / union > 0.5) {
                    blocks.remove(i);
                    blocks.remove(i);
                    continue;
                }
                ++i;
                continue;
            }
            ++i;
        }
    }

    private static boolean isNuc(char c) {
        switch (c) {
            case 'a': {
                return true;
            }
            case 'c': {
                return true;
            }
            case 'g': {
                return true;
            }
            case 't': {
                return true;
            }
            case 'n': {
                return true;
            }
            case 'A': {
                return true;
            }
            case 'C': {
                return true;
            }
            case 'G': {
                return true;
            }
            case 'T': {
                return true;
            }
            case 'N': {
                return true;
            }
        }
        return false;
    }

    public static void loadData(String samPath, Map<String, Contig> ctgs, double[][] ranges) throws IOException {
        for (int i = 0; i < ranges.length; ++i) {
            System.out.println("[a5_qc] Filtering read pairs with inserts between " + NF.format(ranges[i][0]) + "-" + NF.format(ranges[i][1]));
        }
        HashMap<String, SpatialClusterer> clusterers = new HashMap<String, SpatialClusterer>();
        HashMap<String, Vector> ctgClusterers = new HashMap<String, Vector>();
        HashMap<String, Integer> counts = new HashMap<String, Integer>();
        Vector tmpMBs = null;
        File samFile = new File(samPath);
        FileInputStream fis = new FileInputStream(samFile);
        long start = fis.getChannel().position();
        long len = fis.getChannel().size() - start;
        BufferedReader br = new BufferedReader(new InputStreamReader(fis));
        int genomeLen = 0;
        String[] hdr = null;
        String contigName = null;
        HashMap<String, Integer> coordOffset = new HashMap<String, Integer>();
        int offset = 0;
        while (MisassemblyBreaker.nextCharIs(br, '@')) {
            hdr = br.readLine().split("\t");
            offset = genomeLen;
            if (!hdr[0].equals("@SQ")) continue;
            for (int i = 1; i < hdr.length; ++i) {
                if (hdr[i].startsWith("LN:")) {
                    genomeLen += Integer.parseInt(hdr[i].substring(hdr[i].indexOf("LN:") + 3));
                    continue;
                }
                if (!hdr[i].startsWith("SN:")) continue;
                contigName = hdr[i].substring(hdr[i].indexOf("SN:") + 3);
            }
            coordOffset.put(contigName, offset);
        }
        int windowLen = Math.max(1000, MEAN_BLOCK_LEN);
        int[][] readCounts = null;
        int numWindow = genomeLen / windowLen;
        if (genomeLen % windowLen != 0) {
            readCounts = new int[2][numWindow + 1];
            readCounts[0][numWindow] = genomeLen;
        } else {
            readCounts = new int[2][numWindow];
        }
        for (int i = 0; i < numWindow; ++i) {
            readCounts[0][i] = windowLen * (i + 1);
            readCounts[1][i] = 0;
        }
        String[] line1 = null;
        String[] line2 = null;
        int left1 = 0;
        int left2 = 0;
        String ctgStr = null;
        String tmp = null;
        SpatialClusterer pc = null;
        Contig ctg1 = null;
        Contig ctg2 = null;
        int ctgNameComp = -10;
        System.out.print("[a5_qc] Reading SAM file...");
        long currPos = start;
        double perc = 0.0;
        double ten = 1.0;
        int numKeep = 0;
        int total = 0;
        int index = 0;
        long before = System.currentTimeMillis();
        int rdLen = 0;
        while (br.ready()) {
            currPos = fis.getChannel().position() - start;
            if ((double)currPos / (double)len * 10.0 > ten) {
                System.out.print(".." + NF.format(10.0 * ten) + "%");
                ten += 1.0;
            }
            line1 = br.readLine().split("\t");
            line2 = br.readLine().split("\t");
            line1[0] = MisassemblyBreaker.trimPairNumber(line1[0]);
            line2[0] = MisassemblyBreaker.trimPairNumber(line2[0]);
            while (!line1[0].equals(line2[0]) && br.ready()) {
                line1 = line2;
                line2 = br.readLine().split("\t");
                line2[0] = MisassemblyBreaker.trimPairNumber(line2[0]);
            }
            ++total;
            left1 = Integer.parseInt(line1[3]);
            left2 = Integer.parseInt(line2[3]);
            if (left1 == 0 || left2 == 0) continue;
            ctg1 = ctgs.get(line1[2]);
            ctg2 = ctgs.get(line2[2]);
            int tmpLen = MisassemblyBreaker.cigarLength(line1[5]);
            if (tmpLen > rdLen) {
                rdLen = tmpLen;
            }
            if ((tmpLen = MisassemblyBreaker.cigarLength(line2[5])) > rdLen) {
                rdLen = tmpLen;
            }
            if ((index = Arrays.binarySearch(readCounts[0], (offset = ((Integer)coordOffset.get(ctg1.name)).intValue()) + left1)) < 0) {
                index = -1 * (index + 1);
            }
            int[] nArray = readCounts[1];
            int n = index;
            nArray[n] = nArray[n] + 1;
            offset = (Integer)coordOffset.get(ctg2.name);
            index = Arrays.binarySearch(readCounts[0], offset + left2);
            if (index < 0) {
                index = -1 * (index + 1);
            }
            int[] nArray2 = readCounts[1];
            int n2 = index;
            nArray2[n2] = nArray2[n2] + 1;
            ctgNameComp = line1[2].compareTo(line2[2]);
            if (line1.length >= 12 && line2.length >= 12 && (line1[11].equals("XT:A:R") || line2[11].equals("XT:A:R"))) continue;
            if (ctgNameComp != 0) {
                if (ctgNameComp < 0) {
                    ctgStr = line1[2] + "-" + line2[2];
                    if (clusterers.containsKey(ctgStr)) {
                        pc = (SpatialClusterer)clusterers.get(ctgStr);
                    } else {
                        pc = new SpatialClusterer(ctg1, ctg2);
                        clusterers.put(ctgStr, pc);
                    }
                    pc.addMatch(left1, left2);
                } else {
                    ctgStr = line2[2] + "-" + line1[2];
                    if (clusterers.containsKey(ctgStr)) {
                        pc = (SpatialClusterer)clusterers.get(ctgStr);
                    } else {
                        pc = new SpatialClusterer(ctg2, ctg1);
                        clusterers.put(ctgStr, pc);
                    }
                    pc.addMatch(left2, left1);
                }
                if (counts.containsKey(ctg1.name)) {
                    counts.put(ctg1.name, (Integer)counts.get(ctg1.name) + 2);
                } else {
                    counts.put(ctg1.name, 2);
                }
                if (counts.containsKey(ctg2.name)) {
                    counts.put(ctg2.name, (Integer)counts.get(ctg2.name) + 2);
                } else {
                    counts.put(ctg2.name, 2);
                }
            } else {
                int ins = left2 > left1 ? left2 + MisassemblyBreaker.cigarLength(line2[5]) - left1 : left1 + MisassemblyBreaker.cigarLength(line1[5]) - left2;
                if (MisassemblyBreaker.inRange(ranges, ins)) continue;
                ctgStr = line2[2] + "-" + line1[2];
                if (clusterers.containsKey(ctgStr)) {
                    pc = (SpatialClusterer)clusterers.get(ctgStr);
                } else {
                    pc = new SpatialClusterer(ctg2, ctg1);
                    clusterers.put(ctgStr, pc);
                }
                if (left2 < left1) {
                    pc.addMatch(left2, left1);
                } else {
                    pc.addMatch(left1, left2);
                }
                if (counts.containsKey(ctg1.name)) {
                    counts.put(ctg1.name, (Integer)counts.get(ctg1.name) + 2);
                } else {
                    counts.put(ctg1.name, 2);
                }
            }
            if (ctgClusterers.containsKey(ctg1.name)) {
                tmpMBs = (Vector)ctgClusterers.get(ctg1.name);
            } else {
                tmpMBs = new Vector();
                ctgClusterers.put(ctg1.name, tmpMBs);
            }
            tmpMBs.add(ctgStr);
            if (ctgClusterers.containsKey(ctg2.name)) {
                tmpMBs = (Vector)ctgClusterers.get(ctg2.name);
            } else {
                tmpMBs = new Vector();
                ctgClusterers.put(ctg2.name, tmpMBs);
            }
            tmpMBs.add(ctgStr);
            ++numKeep;
        }
        long after = System.currentTimeMillis();
        System.out.println("..100%... done!... Took " + (after - before) / 1000L + " seconds.");
        perc = (double)numKeep / (double)total * 100.0;
        System.out.println("[a5_qc] Keeping " + NF.format(perc) + "% (" + numKeep + "/" + total + ") of reads.");
        ReadCluster.RDLEN = rdLen;
        P = Double.POSITIVE_INFINITY;
        for (int i = 0; i < readCounts[1].length; ++i) {
            if (readCounts[1][i] == 0 || !(P > (double)readCounts[1][i])) continue;
            P = readCounts[1][i];
        }
        P /= (double)windowLen;
        Iterator ctgIt = ctgClusterers.keySet().iterator();
        HashSet<String> ctgToRm = new HashSet<String>();
        HashSet<String> psPairsToRm = new HashSet<String>();
        while (ctgIt.hasNext()) {
            tmp = (String)ctgIt.next();
            if ((Integer)counts.get(tmp) >= 3) continue;
            ctgToRm.add(tmp);
            psPairsToRm.addAll((Collection)ctgClusterers.get(tmp));
        }
        MisassemblyBreaker.removeKeys(ctgs, ctgToRm);
        MisassemblyBreaker.removeKeys(clusterers, psPairsToRm);
        matches = clusterers.values();
    }

    private static void setParameters(double[][] ranges) {
        int maxSd = 0;
        for (int i = 0; i < ranges.length; ++i) {
            if (!(ranges[i][1] > (double)maxSd)) continue;
            maxSd = (int)ranges[i][1];
        }
        MAX_INTERPOINT_DIST = Math.max(ReadCluster.RDLEN, (int)(Math.log(ALPHA) / Math.log(Math.max(1.0 - P, 0.0))) - 1);
        MIN_BLOCK_LEN = 2 * MAX_INTERPOINT_DIST;
        MAX_INTERBLOCK_DIST = (int)(2.0 * (Math.pow(1.0 - ALPHA, 1.0 / (P * (double)MEAN_BLOCK_LEN)) * (double)MEAN_BLOCK_LEN - 1.0));
        MAX_INTERBLOCK_DIST = 2 * MEAN_BLOCK_LEN;
        SpatialClusterer.MIN_PTS = (int)(P * (double)MIN_BLOCK_LEN);
        SpatialClusterer.EPS = MAX_INTERPOINT_DIST;
    }

    private static void setMAXBLOCKLEN(double[][] clusterStats) {
        for (double[] cluster : clusterStats) {
            if (!(cluster[0] > (double)MEAN_BLOCK_LEN)) continue;
            MEAN_BLOCK_LEN = (int)cluster[0];
            MAX_BLOCK_LEN = (int)(cluster[0] + cluster[1] * cluster[3]);
        }
    }

    private static void printParams() {
        System.out.println("[a5_qc] parameters:");
        System.out.println("        P                   = " + P);
        System.out.println("        MIN_BLOCK_LEN       = " + MIN_BLOCK_LEN);
        System.out.println("        MEAN_BLOCK_LEN      = " + MEAN_BLOCK_LEN);
        System.out.println("        MAX_BLOCK_LEN       = " + MAX_BLOCK_LEN);
        System.out.println("        MAX_INTERBLOCK_DIST = " + MAX_INTERBLOCK_DIST);
        System.out.println("        MAX_INTERPOINT_DIST = " + MAX_INTERPOINT_DIST);
        System.out.println("        EPSILON             = " + SpatialClusterer.EPS);
        System.out.println("        MIN_POINTS          = " + SpatialClusterer.MIN_PTS);
    }

    private static void setOrientation(Collection<ReadPair> reads) {
        NIN = 0.0;
        NOUT = 0.0;
        Iterator<ReadPair> it = reads.iterator();
        ReadPair tmp = null;
        while (it.hasNext()) {
            tmp = it.next();
            if (tmp.inward) {
                NIN += 1.0;
                continue;
            }
            if (!tmp.outward) continue;
            NOUT += 1.0;
        }
        double total = NIN + NOUT;
        if (NIN / total > 0.1) {
            INWARD = true;
        }
        if (NOUT / total > 0.1) {
            OUTWARD = true;
        }
    }

    private static boolean inRange(double[][] ranges, double ins) {
        for (int i = 0; i < ranges.length; ++i) {
            if (!(ins >= ranges[i][0]) || !(ins <= ranges[i][1])) continue;
            return true;
        }
        return false;
    }

    private static double[][] getLibraryStats(Map<String, ReadPair> reads, int numLibs) {
        double maxL;
        int maxK = 20;
        double delta = 5.0E-8;
        Vector<ReadPair> toFilt = new Vector<ReadPair>();
        Iterator<ReadPair> rpIt = reads.values().iterator();
        ReadPair tmp = null;
        while (rpIt.hasNext()) {
            tmp = rpIt.next();
            if (!tmp.paired || !tmp.ctg1.equals(tmp.ctg2)) continue;
            toFilt.add(tmp);
        }
        double[] ins = ReadPair.estimateInsertSize(reads.values());
        int bestModel = 0;
        EMClusterer[] models = new EMClusterer[maxK];
        models[0] = MisassemblyBreaker.runEM(toFilt, 2, delta);
        double prevL = maxL = models[0].likelihood();
        int numWorseSteps = 0;
        for (int i = 1; i < maxK && numWorseSteps < 3; ++i) {
            models[i] = MisassemblyBreaker.runEM(toFilt, i + 2, delta);
            if (models[i].likelihood() > maxL) {
                bestModel = i;
                maxL = models[i].likelihood();
            }
            if (!(prevL > models[i].likelihood())) continue;
            ++numWorseSteps;
        }
        System.out.println("[a5_qc] Found " + (bestModel + 1) + " clusters.");
        ReadSet[] clusters = new ReadSet[models[bestModel].getClusters().size()];
        models[bestModel].getClusters().toArray(clusters);
        Arrays.sort(clusters, new Comparator<ReadSet>(){

            @Override
            public int compare(ReadSet x, ReadSet y) {
                double ry;
                double rx = x.sd() / x.mean();
                if (rx < (ry = y.sd() / y.mean())) {
                    return -1;
                }
                if (rx > ry) {
                    return 1;
                }
                return 0;
            }
        });
        System.out.println("[a5_qc] Found the following clusters:");
        Vector<ReadSet> signal = new Vector<ReadSet>();
        for (int i = 0; i < clusters.length; ++i) {
            NF.setMaximumFractionDigits(0);
            System.out.print("[a5_qc] cluster" + NF.format(clusters[i].getId()) + ": mu=" + MisassemblyBreaker.pad(NF.format(clusters[i].mean()), 10) + "sd=" + MisassemblyBreaker.pad(NF.format(clusters[i].sd()), 10) + "n=" + MisassemblyBreaker.pad(NF.format(clusters[i].size()), 10));
            NF.setMaximumFractionDigits(2);
            double perc = 100.0 * (double)clusters[i].size() / (double)toFilt.size();
            System.out.print("perc=" + MisassemblyBreaker.pad(NF.format(perc), 10));
            if (clusters[i].sd() <= clusters[i].mean() && i < numLibs) {
                signal.add(clusters[i]);
                System.out.println("  (signal)");
                continue;
            }
            System.out.println("  (noise)");
        }
        Iterator sigIt = null;
        ReadSet sigSet = null;
        String rmClusters = "";
        sigIt = signal.iterator();
        int nSd = 6;
        double[][] ret = new double[signal.size()][];
        double min = Double.POSITIVE_INFINITY;
        double max = Double.NEGATIVE_INFINITY;
        int i = 0;
        while (sigIt.hasNext()) {
            sigSet = (ReadSet)sigIt.next();
            rmClusters = rmClusters + " cluster" + sigSet.getId();
            for (ReadPair tmpRp : sigSet.getReads()) {
                if ((double)tmpRp.getInsert() < min) {
                    min = tmpRp.getInsert();
                }
                if (!((double)tmpRp.getInsert() > max)) continue;
                max = tmpRp.getInsert();
            }
            nSd = Math.min((int)sigSet.mean() / (int)sigSet.sd(), 6);
            ret[i] = new double[6];
            ret[i][0] = sigSet.mean();
            ret[i][1] = sigSet.sd();
            ret[i][2] = sigSet.size();
            ret[i][3] = nSd;
            ret[i][4] = min;
            ret[i][5] = max;
            MisassemblyBreaker.removeKeys(reads, sigSet.getReadHdrs());
            ++i;
        }
        if (rmClusters.length() > 0) {
            System.out.println("[a5_qc] Removing " + rmClusters);
        }
        ins = ReadPair.estimateInsertSize(reads.values());
        System.out.println("[a5_qc] Final stats for sample after filtering: mu=" + NF.format(ins[0]) + " sd=" + NF.format(ins[1]) + " n=" + NF.format(ins[2]));
        return ret;
    }

    private static EMClusterer runEM(Collection<ReadPair> toFilt, int K, double delta) {
        System.out.print("[a5_qc] EM-clustering insert sizes with K=" + K + "... ");
        EMClusterer em = new EMClusterer(toFilt, K);
        long before = System.currentTimeMillis();
        int iters = em.iterate(1000, delta);
        long after = System.currentTimeMillis();
        double LK3 = em.likelihood();
        System.out.println("stopping after " + iters + " iterations with delta=" + delta + ". L = " + LK3 + ". Took " + (after - before) / 1000L + " seconds.");
        return em;
    }

    private static <V> void removeKeys(Map<String, V> reads, Collection<String> torm) {
        Iterator<String> it = torm.iterator();
        while (it.hasNext()) {
            reads.remove(it.next());
        }
    }

    private static Map<String, Contig> readContigs(RandomAccessFile raf) throws IOException {
        HashMap<String, Contig> contigs = new HashMap<String, Contig>();
        String[] hdr = null;
        String name = null;
        while (MisassemblyBreaker.nextCharIs(raf, '@')) {
            hdr = raf.readLine().split("\t");
            if (!hdr[0].equals("@SQ")) continue;
            int len = -1;
            for (String s : hdr) {
                if (s.startsWith("SN")) {
                    name = s.substring(3);
                    continue;
                }
                if (!s.startsWith("LN")) continue;
                len = Integer.parseInt(s.substring(3));
            }
            if (name == null) {
                System.err.println("[a5_qc] Found nameless contig in SAM header");
            } else if (len == -1) {
                System.err.println("[a5_qc] Found contig of unknown length in SAM header");
            }
            contigs.put(name, new Contig(name, len));
        }
        return contigs;
    }

    private static Map<String, ReadPair> readSubsetByChunk(RandomAccessFile raf, Map<String, Contig> contigs) throws IOException {
        long pos = raf.getFilePointer();
        long len = raf.length();
        long step = (len - pos) * 100L / (long)N_ESTREADPAIRS;
        int i = 0;
        String[] line1 = null;
        String[] line2 = null;
        int left1 = 0;
        int left2 = 0;
        HashMap<String, ReadPair> reads = new HashMap<String, ReadPair>();
        ReadPair tmp = null;
        EfficientSAMFileSampler esfs = new EfficientSAMFileSampler(raf, 21500, step);
        String[][] lines = null;
        while (i < N_ESTREADPAIRS && esfs.hasNextPair()) {
            lines = esfs.nextPair();
            line1 = lines[0];
            line2 = lines[1];
            left1 = Integer.parseInt(line1[3]);
            if (line2.length < 4) {
                System.out.print("");
            }
            left2 = Integer.parseInt(line2[3]);
            if (left1 == 0 || left2 == 0) continue;
            tmp = new ReadPair(line1[0]);
            boolean rev1 = MisassemblyBreaker.isReverse(line1[1]);
            boolean rev2 = MisassemblyBreaker.isReverse(line2[1]);
            if (contigs.get(line1[2]).equals(contigs.get(line2[2])) && rev1 != rev2 && line1.length > 11 && line2.length > 11 && line1[11].equals(TAG_KEEP) && line2[11].equals(TAG_KEEP)) {
                if (left1 < left2) {
                    if (rev1) {
                        contigs.get(line1[2]).addOut();
                        contigs.get(line2[2]).addOut();
                    } else {
                        contigs.get(line1[2]).addIn();
                        contigs.get(line2[2]).addIn();
                    }
                } else if (rev1) {
                    contigs.get(line1[2]).addIn();
                    contigs.get(line2[2]).addIn();
                } else {
                    contigs.get(line1[2]).addOut();
                    contigs.get(line2[2]).addOut();
                }
            }
            tmp.addRead(left1, rev1, MisassemblyBreaker.cigarLength(line1[5]), contigs.get(line1[2]), Integer.parseInt(line1[4]), line1[5]);
            tmp.addRead(left2, rev2, MisassemblyBreaker.cigarLength(line2[5]), contigs.get(line2[2]), Integer.parseInt(line2[4]), line2[5]);
            reads.put(line1[0], tmp);
            ++i;
        }
        return reads;
    }

    private static boolean nextCharIs(RandomAccessFile raf, char c) throws IOException {
        char next = (char)raf.read();
        raf.seek(raf.getFilePointer() - 1L);
        return next == c;
    }

    private static boolean nextCharIs(BufferedReader br, char c) throws IOException {
        if (!br.ready()) {
            return false;
        }
        boolean ret = false;
        br.mark(1);
        char b = (char)br.read();
        ret = b == c;
        br.reset();
        return ret;
    }

    private static String pad(String s, int len) {
        String ret = new String(s);
        for (int i = 0; i < len - s.length(); ++i) {
            ret = ret + " ";
        }
        return ret;
    }

    private static int cigarLength(String cig) {
        StringTokenizer tok = new StringTokenizer(cig, "MIDNSHP", true);
        int alignLen = 0;
        while (tok.hasMoreTokens()) {
            int len = Integer.parseInt(tok.nextToken());
            char op = tok.nextToken().charAt(0);
            if (op != 'M') continue;
            alignLen += len;
        }
        return alignLen;
    }

    private static boolean isReverse(String flag) {
        int iflag = Integer.parseInt(flag);
        return MisassemblyBreaker.getBit(4, iflag) == 1;
    }

    private static int getBit(int bit, int flag) {
        int mod = 0;
        for (int dig = 0; flag != 0 && dig <= bit; flag /= 2, ++dig) {
            mod = flag % 2;
        }
        return mod;
    }

    private static String trimPairNumber(String s) {
        if (s.contains("/")) {
            return s.substring(0, s.lastIndexOf("/"));
        }
        return s;
    }

    static {
        INWARD = false;
        OUTWARD = false;
        NIN = 0.0;
        NOUT = 0.0;
        ALPHA = 1.0E-4;
    }

    private static class EfficientSAMFileSampler {
        private RandomAccessFile raf;
        private byte[] buf;
        private StringTokenizer tok;
        private int tokLeft;
        private int currChunkSize;
        private long step;

        public EfficientSAMFileSampler(RandomAccessFile file, int bufSize, long step) throws IOException {
            this.raf = file;
            this.buf = new byte[bufSize];
            this.tokLeft = 0;
            this.currChunkSize = 0;
            this.step = step;
            this.resetTok();
        }

        public String[][] nextPair() throws IOException {
            if (this.raf.getFilePointer() == this.raf.length()) {
                throw new IOException("Reached end of file.");
            }
            if (this.currChunkSize == 100) {
                this.raf.seek(this.raf.getFilePointer() + this.step);
                this.resetTok();
            } else if (this.tokLeft < 3) {
                this.resetTok();
            }
            String[] line1 = this.tok.nextToken().split("\t");
            String[] line2 = this.tok.nextToken().split("\t");
            this.tokLeft -= 2;
            while (!line1[0].equals(line2[0]) && line1.length < 12 && line2.length < 12 && this.tok.hasMoreTokens()) {
                line1 = line2;
                line2 = this.tok.nextToken().split("\t");
                --this.tokLeft;
                if (this.tok.hasMoreTokens()) continue;
                this.resetTok();
            }
            String[][] ret = new String[][]{line1, line2};
            if (line1.length < 11) {
                System.out.print("");
            }
            if (line2.length < 4) {
                System.out.print("");
            }
            ++this.currChunkSize;
            return ret;
        }

        public boolean hasNextPair() throws IOException {
            boolean ret;
            long bytesLeft = this.raf.length() - this.raf.getFilePointer();
            boolean bl = ret = this.tokLeft >= 3 || bytesLeft >= (long)this.buf.length;
            if (!ret) {
                System.out.print("");
            }
            return ret;
        }

        private void resetTok() throws IOException {
            this.raf.read(this.buf);
            this.tok = new StringTokenizer(new String(this.buf), "\n");
            this.tok.nextToken();
            this.tokLeft = this.tok.countTokens() - 1;
        }
    }
}

