/*
 * Decompiled with CFR 0.152.
 */
package picard.vcf;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.tribble.Tribble;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFFileReader;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.VcfOrBcf;
import picard.vcf.ByIntervalListVariantContextIterator;
import picard.vcf.GenotypeConcordanceContingencyMetrics;
import picard.vcf.GenotypeConcordanceCounts;
import picard.vcf.GenotypeConcordanceDetailMetrics;
import picard.vcf.GenotypeConcordanceScheme;
import picard.vcf.GenotypeConcordanceSchemeFactory;
import picard.vcf.GenotypeConcordanceStates;
import picard.vcf.GenotypeConcordanceSummaryMetrics;
import picard.vcf.OrderedSet;
import picard.vcf.PairedVariantSubContextIterator;
import picard.vcf.VcTuple;

@CommandLineProgramProperties(usage="Calculates the concordance between genotype data for two samples in two different VCFs - one being considered the truth (or reference) the other being considered the call.  The concordance is broken into separate results sections for SNPs and indels.  Summary and detailed statistics are reported\n\nNote that for any pair of variants to compare, only the alleles for the samples under interrogation are considered and MNP, Symbolic, and Mixed classes of variants are not included.", usageShort="Calculates the concordance between genotype data for two samples in two different VCFs", programGroup=VcfOrBcf.class)
public class GenotypeConcordance
extends CommandLineProgram {
    @Option(shortName="TV", doc="The VCF containing the truth sample")
    public File TRUTH_VCF;
    @Option(shortName="CV", doc="The VCF containing the call sample")
    public File CALL_VCF;
    @Option(shortName="O", doc="Basename for the two metrics files that are to be written. Resulting files will be <OUTPUT>.genotype_concordance_summary_metrics  and <OUTPUT>.genotype_concordance_detail_metrics.")
    public File OUTPUT;
    @Option(shortName="TS", doc="The name of the truth sample within the truth VCF")
    public String TRUTH_SAMPLE;
    @Option(shortName="CS", doc="The name of the call sample within the call VCF")
    public String CALL_SAMPLE;
    @Option(doc="One or more interval list files that will be used to limit the genotype concordance.  Note - if intervals are specified, the VCF files must be indexed.")
    public List<File> INTERVALS;
    @Option(doc="If true, multiple interval lists will be intersected. If false multiple lists will be unioned.")
    public boolean INTERSECT_INTERVALS = true;
    @Option(doc="Genotypes below this genotype quality will have genotypes classified as LowGq.")
    public int MIN_GQ = 0;
    @Option(doc="Genotypes below this depth will have genotypes classified as LowDp.")
    public int MIN_DP = 0;
    @Option(doc="If true, output all rows in detailed statistics even when count == 0.  When false only output rows with non-zero counts.")
    public boolean OUTPUT_ALL_ROWS = false;
    @Option(doc="If true, use the VCF index, else iterate over the entire VCF.", optional=true)
    public boolean USE_VCF_INDEX = false;
    @Option(shortName="MISSING_HOM", doc="Default is false, which follows the GA4GH Scheme. If true, missing sites in the truth set will be treated as HOM_REF sites and sites missing in both the truth and call sets will be true negatives. Useful when hom ref sites are left out of the truth set. This flag can only be used with a high confidence interval list.")
    public boolean MISSING_SITES_HOM_REF = false;
    private final Log log = Log.getInstance(GenotypeConcordance.class);
    private final ProgressLogger progress = new ProgressLogger(this.log, 10000, "checked", "variants");
    public static final String SUMMARY_METRICS_FILE_EXTENSION = ".genotype_concordance_summary_metrics";
    public static final String DETAILED_METRICS_FILE_EXTENSION = ".genotype_concordance_detail_metrics";
    public static final String CONTINGENCY_METRICS_FILE_EXTENSION = ".genotype_concordance_contingency_metrics";
    protected GenotypeConcordanceCounts snpCounter;
    protected GenotypeConcordanceCounts indelCounter;

    public GenotypeConcordanceCounts getSnpCounter() {
        return this.snpCounter;
    }

    public GenotypeConcordanceCounts getIndelCounter() {
        return this.indelCounter;
    }

    public static void main(String[] args) {
        new GenotypeConcordance().instanceMainWithExit(args);
    }

    @Override
    protected String[] customCommandLineValidation() {
        IOUtil.assertFileIsReadable((File)this.TRUTH_VCF);
        IOUtil.assertFileIsReadable((File)this.CALL_VCF);
        boolean usingIntervals = this.INTERVALS != null && this.INTERVALS.size() > 0;
        ArrayList<String> errors = new ArrayList<String>();
        if (usingIntervals) {
            this.USE_VCF_INDEX = true;
        }
        if (this.USE_VCF_INDEX) {
            if (!this.indexExists(this.TRUTH_VCF)) {
                errors.add("The index file was not found for the TRUTH VCF.  Note that if intervals are specified, the VCF files must be indexed.");
            }
            if (!this.indexExists(this.CALL_VCF)) {
                errors.add("The index file was not found for the CALL VCF.  Note that if intervals are specified, the VCF files must be indexed.");
            }
        }
        if (this.MISSING_SITES_HOM_REF && !usingIntervals) {
            errors.add("You cannot use the MISSING_HOM option without also supplying an interval list over which missing sites are considered confident homozygous reference calls.");
        }
        if (errors.isEmpty()) {
            return null;
        }
        return errors.toArray(new String[errors.size()]);
    }

    private boolean indexExists(File vcf) {
        return Tribble.indexFile((File)vcf).exists() || Tribble.tabixIndexFile((File)vcf).exists();
    }

    @Override
    protected int doWork() {
        Object callIterator;
        Object truthIterator;
        File summaryMetricsFile = new File(this.OUTPUT + SUMMARY_METRICS_FILE_EXTENSION);
        File detailedMetricsFile = new File(this.OUTPUT + DETAILED_METRICS_FILE_EXTENSION);
        File contingencyMetricsFile = new File(this.OUTPUT + CONTINGENCY_METRICS_FILE_EXTENSION);
        IOUtil.assertFileIsWritable((File)summaryMetricsFile);
        IOUtil.assertFileIsWritable((File)detailedMetricsFile);
        IOUtil.assertFileIsWritable((File)contingencyMetricsFile);
        boolean usingIntervals = this.INTERVALS != null && this.INTERVALS.size() > 0;
        IntervalList intervals = null;
        SAMSequenceDictionary intervalsSamSequenceDictionary = null;
        if (usingIntervals) {
            this.log.info(new Object[]{"Starting to load intervals list(s)."});
            long genomeBaseCount = 0L;
            for (File f : this.INTERVALS) {
                IOUtil.assertFileIsReadable((File)f);
                IntervalList tmpIntervalList = IntervalList.fromFile((File)f);
                if (genomeBaseCount == 0L) {
                    intervalsSamSequenceDictionary = tmpIntervalList.getHeader().getSequenceDictionary();
                    genomeBaseCount = intervalsSamSequenceDictionary.getReferenceLength();
                }
                if (intervals == null) {
                    intervals = tmpIntervalList;
                    continue;
                }
                if (this.INTERSECT_INTERVALS) {
                    intervals = IntervalList.intersection((IntervalList)intervals, (IntervalList)tmpIntervalList);
                    continue;
                }
                intervals = IntervalList.union((IntervalList)intervals, (IntervalList)tmpIntervalList);
            }
            if (intervals != null) {
                intervals = intervals.uniqued();
            }
            this.log.info(new Object[]{"Finished loading up intervals list(s)."});
        }
        VCFFileReader truthReader = new VCFFileReader(this.TRUTH_VCF, this.USE_VCF_INDEX);
        VCFFileReader callReader = new VCFFileReader(this.CALL_VCF, this.USE_VCF_INDEX);
        if (!truthReader.getFileHeader().getGenotypeSamples().contains(this.TRUTH_SAMPLE)) {
            throw new PicardException("File " + this.TRUTH_VCF.getAbsolutePath() + " does not contain genotypes for sample " + this.TRUTH_SAMPLE);
        }
        if (!callReader.getFileHeader().getGenotypeSamples().contains(this.CALL_SAMPLE)) {
            throw new PicardException("File " + this.CALL_VCF.getAbsolutePath() + " does not contain genotypes for sample " + this.CALL_SAMPLE);
        }
        SequenceUtil.assertSequenceDictionariesEqual((SAMSequenceDictionary)truthReader.getFileHeader().getSequenceDictionary(), (SAMSequenceDictionary)callReader.getFileHeader().getSequenceDictionary());
        if (usingIntervals) {
            SequenceUtil.assertSequenceDictionariesEqual((SAMSequenceDictionary)intervalsSamSequenceDictionary, (SAMSequenceDictionary)truthReader.getFileHeader().getSequenceDictionary());
        }
        if (usingIntervals) {
            truthIterator = new ByIntervalListVariantContextIterator(truthReader, intervals);
            callIterator = new ByIntervalListVariantContextIterator(callReader, intervals);
        } else {
            truthIterator = truthReader.iterator();
            callIterator = callReader.iterator();
        }
        PairedVariantSubContextIterator pairedIterator = new PairedVariantSubContextIterator((Iterator<VariantContext>)truthIterator, this.TRUTH_SAMPLE, (Iterator<VariantContext>)callIterator, this.CALL_SAMPLE, truthReader.getFileHeader().getSequenceDictionary());
        this.snpCounter = new GenotypeConcordanceCounts();
        this.indelCounter = new GenotypeConcordanceCounts();
        HashMap<String, Integer> unClassifiedStatesMap = new HashMap<String, Integer>();
        this.log.info(new Object[]{"Starting iteration over variants."});
        while (pairedIterator.hasNext()) {
            VcTuple tuple = pairedIterator.next();
            VariantContext.Type truthVariantContextType = tuple.truthVariantContext != null ? tuple.truthVariantContext.getType() : VariantContext.Type.NO_VARIATION;
            VariantContext.Type callVariantContextType = tuple.callVariantContext != null ? tuple.callVariantContext.getType() : VariantContext.Type.NO_VARIATION;
            boolean stateClassified = false;
            GenotypeConcordanceStates.TruthAndCallStates truthAndCallStates = this.determineState(tuple.truthVariantContext, this.TRUTH_SAMPLE, tuple.callVariantContext, this.CALL_SAMPLE, this.MIN_GQ, this.MIN_DP);
            if (truthVariantContextType == VariantContext.Type.SNP) {
                if (callVariantContextType == VariantContext.Type.SNP || callVariantContextType == VariantContext.Type.MIXED || callVariantContextType == VariantContext.Type.NO_VARIATION) {
                    this.snpCounter.increment(truthAndCallStates);
                    stateClassified = true;
                }
            } else if (truthVariantContextType == VariantContext.Type.INDEL) {
                if (callVariantContextType == VariantContext.Type.INDEL || callVariantContextType == VariantContext.Type.MIXED || callVariantContextType == VariantContext.Type.NO_VARIATION) {
                    this.indelCounter.increment(truthAndCallStates);
                    stateClassified = true;
                }
            } else if (truthVariantContextType == VariantContext.Type.MIXED) {
                if (callVariantContextType == VariantContext.Type.SNP) {
                    this.snpCounter.increment(truthAndCallStates);
                    stateClassified = true;
                } else if (callVariantContextType == VariantContext.Type.INDEL) {
                    this.indelCounter.increment(truthAndCallStates);
                    stateClassified = true;
                }
            } else if (truthVariantContextType == VariantContext.Type.NO_VARIATION) {
                if (callVariantContextType == VariantContext.Type.SNP) {
                    this.snpCounter.increment(truthAndCallStates);
                    stateClassified = true;
                } else if (callVariantContextType == VariantContext.Type.INDEL) {
                    this.indelCounter.increment(truthAndCallStates);
                    stateClassified = true;
                }
            }
            if (!stateClassified) {
                String condition = truthVariantContextType + " " + callVariantContextType;
                Integer count = (Integer)unClassifiedStatesMap.get(condition);
                if (count == null) {
                    count = 0;
                }
                count = count + 1;
                unClassifiedStatesMap.put(condition, count);
            }
            VariantContext variantContextForLogging = tuple.truthVariantContext != null ? tuple.truthVariantContext : tuple.callVariantContext;
            this.progress.record(variantContextForLogging.getContig(), variantContextForLogging.getStart());
        }
        if (this.MISSING_SITES_HOM_REF) {
            long intervalBaseCount = intervals.getBaseCount();
            this.addMissingTruthAndMissingCallStates(this.snpCounter.getCounterSize(), intervalBaseCount, this.snpCounter);
            this.addMissingTruthAndMissingCallStates(this.indelCounter.getCounterSize(), intervalBaseCount, this.indelCounter);
        }
        MetricsFile genotypeConcordanceSummaryMetricsFile = this.getMetricsFile();
        GenotypeConcordanceSummaryMetrics summaryMetrics = new GenotypeConcordanceSummaryMetrics(VariantContext.Type.SNP, this.snpCounter, this.TRUTH_SAMPLE, this.CALL_SAMPLE, this.MISSING_SITES_HOM_REF);
        genotypeConcordanceSummaryMetricsFile.addMetric((MetricBase)summaryMetrics);
        summaryMetrics = new GenotypeConcordanceSummaryMetrics(VariantContext.Type.INDEL, this.indelCounter, this.TRUTH_SAMPLE, this.CALL_SAMPLE, this.MISSING_SITES_HOM_REF);
        genotypeConcordanceSummaryMetricsFile.addMetric((MetricBase)summaryMetrics);
        genotypeConcordanceSummaryMetricsFile.write(summaryMetricsFile);
        MetricsFile genotypeConcordanceDetailMetrics = this.getMetricsFile();
        this.outputDetailMetricsFile(VariantContext.Type.SNP, genotypeConcordanceDetailMetrics, this.snpCounter, this.TRUTH_SAMPLE, this.CALL_SAMPLE);
        this.outputDetailMetricsFile(VariantContext.Type.INDEL, genotypeConcordanceDetailMetrics, this.indelCounter, this.TRUTH_SAMPLE, this.CALL_SAMPLE);
        genotypeConcordanceDetailMetrics.write(detailedMetricsFile);
        MetricsFile genotypeConcordanceContingencyMetricsFile = this.getMetricsFile();
        GenotypeConcordanceContingencyMetrics contingencyMetrics = new GenotypeConcordanceContingencyMetrics(VariantContext.Type.SNP, this.snpCounter, this.TRUTH_SAMPLE, this.CALL_SAMPLE, this.MISSING_SITES_HOM_REF);
        genotypeConcordanceContingencyMetricsFile.addMetric((MetricBase)contingencyMetrics);
        contingencyMetrics = new GenotypeConcordanceContingencyMetrics(VariantContext.Type.INDEL, this.indelCounter, this.TRUTH_SAMPLE, this.CALL_SAMPLE, this.MISSING_SITES_HOM_REF);
        genotypeConcordanceContingencyMetricsFile.addMetric((MetricBase)contingencyMetrics);
        genotypeConcordanceContingencyMetricsFile.write(contingencyMetricsFile);
        for (String condition : unClassifiedStatesMap.keySet()) {
            this.log.info(new Object[]{"Uncovered truth/call Variant Context Type Counts: " + condition + " " + unClassifiedStatesMap.get(condition)});
        }
        return 0;
    }

    private void addMissingTruthAndMissingCallStates(double numVariants, long intervalBaseCount, GenotypeConcordanceCounts counter) {
        double countMissingMissing = (double)intervalBaseCount - numVariants;
        GenotypeConcordanceStates.TruthAndCallStates missingMissing = new GenotypeConcordanceStates.TruthAndCallStates(GenotypeConcordanceStates.TruthState.MISSING, GenotypeConcordanceStates.CallState.MISSING);
        counter.increment(missingMissing, countMissingMissing);
    }

    private void outputDetailMetricsFile(VariantContext.Type variantType, MetricsFile<GenotypeConcordanceDetailMetrics, ?> genotypeConcordanceDetailMetricsFile, GenotypeConcordanceCounts counter, String truthSampleName, String callSampleName) {
        GenotypeConcordanceSchemeFactory schemeFactory = new GenotypeConcordanceSchemeFactory();
        GenotypeConcordanceScheme scheme = schemeFactory.getScheme(this.MISSING_SITES_HOM_REF);
        scheme.validateScheme();
        for (GenotypeConcordanceStates.TruthState truthState : GenotypeConcordanceStates.TruthState.values()) {
            for (GenotypeConcordanceStates.CallState callState : GenotypeConcordanceStates.CallState.values()) {
                int count = counter.getCount(truthState, callState);
                String contingencyValues = scheme.getContingencyStateString(truthState, callState);
                if (count <= 0 && !this.OUTPUT_ALL_ROWS) continue;
                GenotypeConcordanceDetailMetrics detailMetrics = new GenotypeConcordanceDetailMetrics();
                detailMetrics.VARIANT_TYPE = variantType;
                detailMetrics.TRUTH_SAMPLE = truthSampleName;
                detailMetrics.CALL_SAMPLE = callSampleName;
                detailMetrics.TRUTH_STATE = truthState;
                detailMetrics.CALL_STATE = callState;
                detailMetrics.COUNT = count;
                detailMetrics.CONTINGENCY_VALUES = contingencyValues;
                genotypeConcordanceDetailMetricsFile.addMetric((MetricBase)detailMetrics);
            }
        }
    }

    final GenotypeConcordanceStates.TruthAndCallStates determineState(VariantContext truthContext, String truthSample, VariantContext callContext, String callSample, int minGq, int minDp) {
        int allele1idx;
        int allele0idx;
        GenotypeConcordanceStates.TruthState truthState = null;
        GenotypeConcordanceStates.CallState callState = null;
        Genotype truthGenotype = null;
        Genotype callGenotype = null;
        if (truthContext == null) {
            truthState = GenotypeConcordanceStates.TruthState.MISSING;
        } else if (truthContext.isMixed()) {
            truthState = GenotypeConcordanceStates.TruthState.IS_MIXED;
        } else if (truthContext.isFiltered()) {
            truthState = GenotypeConcordanceStates.TruthState.VC_FILTERED;
        } else {
            truthGenotype = truthContext.getGenotype(truthSample);
            if (truthGenotype.isNoCall()) {
                truthState = GenotypeConcordanceStates.TruthState.NO_CALL;
            } else if (truthGenotype.isFiltered()) {
                truthState = GenotypeConcordanceStates.TruthState.GT_FILTERED;
            } else if (truthGenotype.getGQ() != -1 && truthGenotype.getGQ() < minGq) {
                truthState = GenotypeConcordanceStates.TruthState.LOW_GQ;
            } else if (truthGenotype.getDP() != -1 && truthGenotype.getDP() < minDp) {
                truthState = GenotypeConcordanceStates.TruthState.LOW_DP;
            } else if (truthGenotype.isMixed()) {
                truthState = GenotypeConcordanceStates.TruthState.NO_CALL;
            }
        }
        if (callContext == null) {
            callState = GenotypeConcordanceStates.CallState.MISSING;
        } else if (callContext.isMixed()) {
            callState = GenotypeConcordanceStates.CallState.IS_MIXED;
        } else if (callContext.isFiltered()) {
            callState = GenotypeConcordanceStates.CallState.VC_FILTERED;
        } else {
            callGenotype = callContext.getGenotype(callSample);
            if (callGenotype.isNoCall()) {
                callState = GenotypeConcordanceStates.CallState.NO_CALL;
            } else if (callGenotype.isFiltered()) {
                callState = GenotypeConcordanceStates.CallState.GT_FILTERED;
            } else if (callGenotype.getGQ() != -1 && callGenotype.getGQ() < minGq) {
                callState = GenotypeConcordanceStates.CallState.LOW_GQ;
            } else if (callGenotype.getDP() != -1 && callGenotype.getDP() < minDp) {
                callState = GenotypeConcordanceStates.CallState.LOW_DP;
            } else if (callGenotype.isMixed()) {
                callState = GenotypeConcordanceStates.CallState.NO_CALL;
            }
        }
        String truthRef = truthContext != null ? truthContext.getReference().getBaseString() : null;
        String callRef = callContext != null ? callContext.getReference().getBaseString() : null;
        String truthAllele1 = null;
        String truthAllele2 = null;
        if (null == truthState) {
            if (truthGenotype.getAlleles().size() != 2) {
                throw new IllegalStateException("Genotype for Variant Context: " + truthContext + " does not have exactly 2 alleles");
            }
            truthAllele1 = truthGenotype.getAllele(0).getBaseString();
            truthAllele2 = truthGenotype.getAllele(1).getBaseString();
        }
        String callAllele1 = null;
        String callAllele2 = null;
        if (null == callState) {
            if (callGenotype.getAlleles().size() != 2) {
                throw new IllegalStateException("Genotype for Variant Context: " + callContext + " does not have exactly 2 alleles");
            }
            callAllele1 = callGenotype.getAllele(0).getBaseString();
            callAllele2 = callGenotype.getAllele(1).getBaseString();
        }
        if (truthRef != null && callRef != null && !truthRef.equals(callRef)) {
            String suffix;
            if (truthRef.length() < callRef.length()) {
                suffix = this.getStringSuffix(callRef, truthRef, "Ref alleles mismatch between: " + truthContext + " and " + callContext);
                truthRef = truthRef + suffix;
                if (null == truthState) {
                    truthAllele1 = truthGenotype.getAllele(0).getBaseString() + suffix;
                    truthAllele2 = truthGenotype.getAllele(1).getBaseString() + suffix;
                }
            } else if (truthRef.length() > callRef.length()) {
                suffix = this.getStringSuffix(truthRef, callRef, "Ref alleles mismatch between: " + truthContext + " and " + callContext);
                callRef = callRef + suffix;
                if (null == callState) {
                    callAllele1 = callGenotype.getAllele(0).getBaseString() + suffix;
                    callAllele2 = callGenotype.getAllele(1).getBaseString() + suffix;
                }
            } else {
                throw new IllegalStateException("Ref alleles mismatch between: " + truthContext + " and " + callContext);
            }
        }
        OrderedSet<String> allAlleles = new OrderedSet<String>();
        if (truthContext != null || callContext != null) {
            allAlleles.smartAdd(truthContext == null ? callRef : truthRef);
        }
        if (null == truthState) {
            allAlleles.smartAdd(truthAllele1);
            allAlleles.smartAdd(truthAllele2);
        }
        if (null == callState) {
            if (allAlleles.indexOf(callAllele1) > 1 || allAlleles.indexOf(callAllele2) > 1) {
                allAlleles.remove(2);
                allAlleles.remove(1);
                allAlleles.smartAdd(truthAllele2);
                allAlleles.smartAdd(truthAllele1);
            }
            allAlleles.smartAdd(callAllele1);
            allAlleles.smartAdd(callAllele2);
        }
        if (null == truthState) {
            allele0idx = allAlleles.indexOf(truthAllele1);
            truthState = allele0idx == (allele1idx = allAlleles.indexOf(truthAllele2)) ? GenotypeConcordanceStates.TruthState.getHom(allele0idx) : GenotypeConcordanceStates.TruthState.getVar(allele0idx, allele1idx);
        }
        if (null == callState && null == (callState = (allele0idx = allAlleles.indexOf(callAllele1)) == (allele1idx = allAlleles.indexOf(callAllele2)) ? GenotypeConcordanceStates.CallState.getHom(allele0idx) : GenotypeConcordanceStates.CallState.getHet(allele0idx, allele1idx))) {
            throw new IllegalStateException("This should never happen...  Could not classify the call variant: " + callGenotype);
        }
        return new GenotypeConcordanceStates.TruthAndCallStates(truthState, callState);
    }

    final String getStringSuffix(String longerString, String shorterString, String errorMsg) {
        if (!longerString.startsWith(shorterString)) {
            throw new IllegalStateException(errorMsg);
        }
        return longerString.substring(shorterString.length());
    }
}

