/*
 * Decompiled with CFR 0.152.
 */
package proteogenomicUtils;

import basicUtils.Utils;
import java.util.ArrayList;
import java.util.Hashtable;
import ms2dbUtils.MS2DB;
import proteogenomicUtils.GenomicLocator;
import sixFrameUtils.SixFrameBuilder;

public class ORFUseAnalyzer {
    public static String usageInfo = "proteogenomicUtils.ORFUseAnalyzer\nAnalyzes novel peptide locations for ORF use.  Determines\nhistogram of ORF lengths containing locations, and histogram of unique peptides\nas a function of ORF length.  Can determine if peptide is found in splice graph.\n[REQUIRED]:\n -r [FILE/DIR] File or directory containing novel locations\n[OPTIONAL]:\n -d [DIR] Directory containing splice graphs.";
    private String[] locationFileNames;
    private String[] dbFileNames;

    public ORFUseAnalyzer(String inputFile, String dbDir) {
        if (Utils.IsDir(inputFile)) {
            this.locationFileNames = Utils.ListDir(inputFile);
        } else if (Utils.IsFile(inputFile)) {
            this.locationFileNames = new String[1];
            this.locationFileNames[0] = inputFile;
        } else {
            System.err.println("ERROR: Invalid input file/directory '" + inputFile + "'");
            System.exit(-1);
        }
        if (dbDir != null && !Utils.IsDir(dbDir)) {
            System.err.println("ERROR: Invalid database directory '" + dbDir + "'!");
            System.exit(-1);
        } else if (dbDir != null) {
            this.dbFileNames = Utils.ListDir(dbDir);
        }
    }

    public static void main(String[] args) {
        String[] options = new String[]{"-r", "-d"};
        boolean[] values = new boolean[]{true, true};
        Hashtable CommandLineArgs = Utils.ParseCommandLine(args, options, values);
        if (!CommandLineArgs.containsKey("-r")) {
            System.err.println("ERROR: Must specify an input file or directory!");
            System.err.println(usageInfo);
            System.exit(0);
        }
        String inputFile = (String)CommandLineArgs.get("-r");
        String dbDir = null;
        if (CommandLineArgs.containsKey("-d")) {
            dbDir = (String)CommandLineArgs.get("-d");
        }
        ORFUseAnalyzer a = new ORFUseAnalyzer(inputFile, dbDir);
        a.analyze();
    }

    private void analyze() {
        int[] locations = new int[10];
        int[] peptides = new int[10];
        ArrayList[] pepSeqs = new ArrayList[10];
        MS2DB[] spliceGraphs = null;
        if (this.dbFileNames != null) {
            spliceGraphs = new MS2DB[this.dbFileNames.length];
            int i = 0;
            while (i < spliceGraphs.length) {
                spliceGraphs[i] = new MS2DB(this.dbFileNames[i]);
                ++i;
            }
        }
        boolean debug = false;
        int i = 0;
        while (i < this.locationFileNames.length) {
            System.out.println("Looking at locations in " + this.locationFileNames[i]);
            ArrayList locs = GenomicLocator.GenomicColumns.LoadFromFile(this.locationFileNames[i]);
            System.out.println(" - loaded " + locs.size() + " locations");
            int j = 0;
            while (j < locs.size()) {
                Object[] loc = (Object[])locs.get(j);
                String protName = (String)loc[GenomicLocator.GenomicColumns.ProteinName];
                if (SixFrameBuilder.IsSixFrameFASTAHeader(protName)) {
                    String[] headerBits = SixFrameBuilder.ParseSixFrameHeader(protName);
                    int Start = Integer.parseInt(headerBits[1]);
                    int End = Integer.parseInt(headerBits[2]);
                    int length = (End - Start) / 3;
                    int bin = length / 10 - 4;
                    if (bin >= locations.length) {
                        bin = locations.length - 1;
                    }
                    int n = bin;
                    locations[n] = locations[n] + 1;
                    boolean isUnique = (Boolean)loc[GenomicLocator.GenomicColumns.IsUnique];
                    if (isUnique) {
                        String pep = (String)loc[GenomicLocator.GenomicColumns.Peptide];
                        if (pep.indexOf(46) >= 0) {
                            pep = pep.substring(2, pep.length() - 2);
                        }
                        if (pepSeqs[bin] == null) {
                            pepSeqs[bin] = new ArrayList();
                        }
                        pepSeqs[bin].add(pep);
                        int n2 = bin;
                        peptides[n2] = peptides[n2] + 1;
                    }
                }
                ++j;
            }
            ++i;
        }
        int k = 0;
        while (k < locations.length) {
            int recoveredPeps = 0;
            if (peptides[k] != pepSeqs[k].size()) {
                System.err.println("Counting " + peptides[k] + " but only have " + pepSeqs[k].size() + " sequences!!");
            }
            if (pepSeqs[k] != null) {
                String[] peps = Utils.ConvertArraylistToStringArray(pepSeqs[k]);
                boolean[] found = new boolean[peps.length];
                int m = 0;
                while (m < spliceGraphs.length) {
                    Hashtable spliceLocs = spliceGraphs[m].GetAllLocations(peps);
                    int n = 0;
                    while (n < peps.length) {
                        ArrayList temp;
                        if (spliceLocs.containsKey(peps[n]) && (temp = (ArrayList)spliceLocs.get(peps[n])) != null && temp.size() > 0) {
                            if (!found[n]) {
                                ++recoveredPeps;
                            }
                            found[n] = true;
                        }
                        ++n;
                    }
                    ++m;
                }
            }
            System.out.println("[" + (k + 4) * 10 + "-" + ((k + 5) * 10 - 1) + "]: " + locations[k] + " locations with " + peptides[k] + " peptides with " + recoveredPeps + " recovered!");
            ++k;
        }
    }
}

