/*
 * Decompiled with CFR 0.152.
 */
package edu.msu.cme.rdp.alignment.pairwise;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.HashMap;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;

public class PwToTaxonAbund {
    private static final String dformat = "%1$.4f";
    private int min_length = 1;
    private float ident_cutoff = 0.0f;
    private HashMap<String, String> rankMap = new HashMap();
    private HashMap<String, HashMap<String, Double>> sampleTaxaMap = new HashMap();
    private HashMap<String, Double> sampleCountMap = new HashMap();
    private static Options options = new Options();

    public static float findCoverage(String str) {
        int i;
        if (!str.contains("cov")) {
            return 1.0f;
        }
        String[] vals = str.split("_");
        for (i = 0; i < vals.length && !vals[i].equals("cov"); ++i) {
        }
        return Float.parseFloat(vals[i + 1]);
    }

    public PwToTaxonAbund(int min_length, float ident_cutoff) throws FileNotFoundException, IOException {
        this.min_length = min_length;
        this.ident_cutoff = ident_cutoff;
    }

    public void process(File infile) throws FileNotFoundException, IOException {
        String line;
        int underscore_index;
        int index;
        BufferedReader reader = new BufferedReader(new FileReader(infile));
        String filename = infile.getName();
        int dot_index = filename.indexOf(".");
        int n = index = dot_index < (underscore_index = filename.indexOf("_")) ? dot_index : underscore_index;
        if (index != -1) {
            filename = filename.substring(0, index);
        }
        HashMap<String, Double> taxaMap = new HashMap<String, Double>();
        double totalCount = 0.0;
        while ((line = reader.readLine()) != null) {
            if (!line.startsWith("@")) continue;
            String seqID = line.split("\\t")[0].substring(1);
            String[] vals = line.split("\t");
            if (vals.length <= 1 || !vals[1].equals("1")) continue;
            double cov = PwToTaxonAbund.findCoverage(seqID);
            int query_length = Integer.parseInt(vals[7]);
            float identity = Float.parseFloat(vals[4]);
            if (identity >= this.ident_cutoff && query_length >= this.min_length) {
                Double prevCount;
                String[] lineageVals = vals[vals.length - 1].split(";");
                String lineage = "";
                for (int i = 0; i < lineageVals.length; ++i) {
                    prevCount = (Double)taxaMap.get(lineage = lineage.equals("") ? lineageVals[i] : lineage + ";" + lineageVals[i]);
                    if (prevCount == null) {
                        taxaMap.put(lineage, cov);
                    } else {
                        taxaMap.put(lineage, cov + prevCount);
                    }
                    String rank = "NA";
                    if (lineageVals[i].contains("_")) {
                        rank = lineageVals[i].split("_")[0];
                    }
                    this.rankMap.put(lineage, rank);
                }
                String matchID = vals[vals.length - 1] + ";terminal__" + vals[10];
                if (!vals[11].startsWith("domain") && !vals[11].startsWith("superkingdom")) {
                    String[] temp = vals[11].split("\\s+");
                    matchID = matchID + "_" + temp[0];
                    if (temp.length > 1) {
                        matchID = matchID + "_" + temp[1];
                    }
                }
                if ((prevCount = (Double)taxaMap.get(matchID)) == null) {
                    taxaMap.put(matchID, cov);
                } else {
                    taxaMap.put(matchID, cov + prevCount);
                }
                this.rankMap.put(matchID, "terminal");
            }
            totalCount += cov;
        }
        reader.close();
        this.sampleTaxaMap.put(filename, taxaMap);
        this.sampleCountMap.put(filename, totalCount);
    }

    public void print(File outfile) throws IOException {
        Double count;
        HashMap<String, Double> taxaMap;
        int index;
        String t;
        PrintStream outstream = new PrintStream(outfile);
        outstream.print("Taxon\tRank\tTaxonName");
        for (String sample : this.sampleTaxaMap.keySet()) {
            outstream.print("\t" + sample);
        }
        outstream.print("\n");
        for (String taxonname : this.rankMap.keySet()) {
            t = taxonname.split(";")[taxonname.split(";").length - 1];
            index = t.indexOf("__");
            if (index != -1) {
                t = t.substring(index + 2);
            }
            outstream.print(taxonname + "\t" + this.rankMap.get(taxonname) + "\t" + t);
            for (String sample : this.sampleTaxaMap.keySet()) {
                Double totalCount;
                taxaMap = this.sampleTaxaMap.get(sample);
                count = taxaMap.get(taxonname);
                if (count == null) {
                    count = 0.0;
                }
                if ((totalCount = this.sampleCountMap.get(sample)) > 0.0) {
                    outstream.print("\t" + String.format(dformat, count / totalCount));
                    continue;
                }
                outstream.print("\t0");
            }
            outstream.print("\n");
        }
        outstream.print("\n");
        outstream.print("Taxon\tRank\tTaxonName");
        for (String sample : this.sampleTaxaMap.keySet()) {
            outstream.print("\t" + sample);
        }
        outstream.print("\n");
        for (String taxonname : this.rankMap.keySet()) {
            t = taxonname.split(";")[taxonname.split(";").length - 1];
            index = t.indexOf("__");
            if (index != -1) {
                t = t.substring(index + 2);
            }
            outstream.print(taxonname + "\t" + this.rankMap.get(taxonname) + "\t" + t);
            for (String sample : this.sampleTaxaMap.keySet()) {
                taxaMap = this.sampleTaxaMap.get(sample);
                count = taxaMap.get(taxonname);
                if (count == null) {
                    count = 0.0;
                }
                outstream.print("\t" + String.format(dformat, count));
            }
            outstream.print("\n");
        }
        outstream.close();
    }

    public static void main(String[] args) throws FileNotFoundException, IOException {
        int min_length = 1;
        float min_pct = 0.0f;
        try {
            CommandLine line = new PosixParser().parse(options, args);
            if (line.hasOption("e") && (min_pct = Float.parseFloat(line.getOptionValue("e"))) > 1.0f) {
                throw new Exception("min_pct should be less than 1, entered " + min_pct);
            }
            if (line.hasOption("l")) {
                min_length = Integer.parseInt(line.getOptionValue("l"));
            }
            if ((args = line.getArgs()).length < 2) {
                throw new Exception("Unexpected number of command line arguments");
            }
        }
        catch (Exception e) {
            new HelpFormatter().printHelp(" <options> out.txt in_pw1.txt in_pw2.txt ...\n in_pw.txt is the output from AlignmentTool pairwise-knn \nthe last column contains the reference lineage: domain__xx; phylum__xx; class__x; order__xx; family__xx; genus__xxIf the seqID contains the coverage, the coverage is used to get the count of assignments", options);
            System.err.println("ERROR: " + e.getMessage());
            e.printStackTrace();
            return;
        }
        PwToTaxonAbund theObj = new PwToTaxonAbund(min_length, min_pct);
        for (int i = 1; i < args.length; ++i) {
            theObj.process(new File(args[i]));
        }
        theObj.print(new File(args[0]));
    }

    static {
        options.addOption(new Option("c", "seqCoverage", true, "contains the ID and coverage separated by space or tab. Used to adjust the sequence abundance"));
        options.addOption(new Option("e", "identity", true, "the minimum protein identity, default is 0, range [0-1]"));
        options.addOption("l", "seqlength", true, "min length of query");
    }
}

