/*
 * Decompiled with CFR 0.152.
 */
package proteogenomicUtils;

import basicUtils.Utils;
import errorUtils.ErrorThrower;
import java.io.BufferedReader;
import java.io.FileWriter;
import java.util.Hashtable;
import trieUtils.TrieDB;

public class CompareAUGUSTUSToKnown {
    public static String usageInfo = "proteogenomicUtils.CompareAUGUSTUSToKnow version 2012.03.26\nCompareAUGUSTUSToKnown is designed to compare the AUGUSTUS predictions to the known proteome, to identify cases where AUGUSTUS\npredicts an annotated gene.  This is useful for identifying oredicted genes that are fusions or extensions of known genes\nUsage:\n -r [FILE] The augustus output table from AUGUSTUSRunner (the protein sequence is assumed to be in column 7)\n -t [FILE] The FASTA or trie file containing the Known Proteome\n -w [FILE] An output file containing the proteins that have a known protein supported\n(-d Run in debug mode)\n";
    private String tableFile;
    private TrieDB proteomeDB;
    private String outputFileName;
    private boolean debug = false;
    private int seqCol = 6;

    public CompareAUGUSTUSToKnown(String tableFile, String proteomeFile, String outputFile) {
        if (!Utils.IsFile(tableFile)) {
            ErrorThrower.ThrowError(1, tableFile);
        }
        this.tableFile = tableFile;
        if (!Utils.IsFile(proteomeFile)) {
            ErrorThrower.ThrowError(1, proteomeFile);
        }
        if (Utils.GetFileExtension(proteomeFile).equalsIgnoreCase(".trie")) {
            this.proteomeDB = new TrieDB(proteomeFile);
        } else {
            String[] fileNames = TrieDB.prepDB(proteomeFile, null);
            this.proteomeDB = new TrieDB(fileNames[0]);
        }
        this.outputFileName = outputFile;
    }

    public static void main(String[] args) {
        String[] options = new String[]{"-r", "-t", "-w", "-d"};
        boolean[] blArray = new boolean[4];
        blArray[0] = true;
        blArray[1] = true;
        blArray[2] = true;
        boolean[] values = blArray;
        Hashtable CommandLineArgs = Utils.ParseCommandLine(args, options, values);
        if (!(CommandLineArgs.containsKey("-r") && CommandLineArgs.containsKey("-t") && CommandLineArgs.containsKey("-w"))) {
            System.err.println("ERROR: Must specify an augustus table file, a proteome file, and an output file");
            System.err.println(usageInfo);
            System.exit(0);
        }
        String tableFile = (String)CommandLineArgs.get("-r");
        String proteomeFile = (String)CommandLineArgs.get("-t");
        String outputFile = (String)CommandLineArgs.get("-w");
        CompareAUGUSTUSToKnown c = new CompareAUGUSTUSToKnown(tableFile, proteomeFile, outputFile);
        if (CommandLineArgs.containsKey("-d")) {
            c.debug = true;
        }
        c.compare();
    }

    private void compare() {
        int count = 0;
        BufferedReader buf = Utils.openBufferedReader(this.tableFile);
        FileWriter outF = Utils.openFileWriter(this.outputFileName);
        String line = Utils.readNextLine(buf, this.tableFile);
        while (line != null) {
            if (count % 100 == 0) {
                System.out.println("Processing line " + count + "...");
            }
            ++count;
            if ((line = line.trim()).length() == 0 || line.charAt(0) == '#') {
                Utils.writeLine(outF, this.outputFileName, String.valueOf(line) + "\n");
                line = Utils.readNextLine(buf, this.tableFile);
                continue;
            }
            String[] bits = line.split("\t");
            String predictedSeq = bits[this.seqCol];
            String exactMatchNames = "";
            String substringMatchNames = "";
            String superStringMatchNames = "";
            int exactMatchCount = 0;
            int substringMatchCount = 0;
            int superStringMatchCount = 0;
            int i = 0;
            while (i < this.proteomeDB.getNumProteins()) {
                String knownSeq = this.proteomeDB.getProteinSequence(i);
                String knownName = this.proteomeDB.getProteinName(i);
                if (predictedSeq.equalsIgnoreCase(knownSeq)) {
                    exactMatchNames = exactMatchNames.length() == 0 ? knownName : String.valueOf(exactMatchNames) + "," + knownName;
                    ++exactMatchCount;
                } else if (predictedSeq.indexOf(knownSeq) >= 0) {
                    substringMatchNames = substringMatchNames.length() == 0 ? knownName : String.valueOf(substringMatchNames) + "," + knownName;
                    ++substringMatchCount;
                } else if (knownSeq.indexOf(predictedSeq) >= 0) {
                    superStringMatchNames = superStringMatchNames.length() == 0 ? knownName : String.valueOf(superStringMatchNames) + "," + knownName;
                    ++superStringMatchCount;
                }
                ++i;
            }
            if (this.debug) {
                System.out.println(String.valueOf(bits[0]) + "." + bits[2] + "." + bits[3]);
                System.out.println("predicted: " + predictedSeq);
                System.out.println("exactMatches: " + exactMatchCount);
                System.out.println("  " + exactMatchNames);
                System.out.println("substringMatches: " + substringMatchCount);
                System.out.println("  " + substringMatchNames);
                System.out.println("superstringMatches: " + superStringMatchCount);
                System.out.println("  " + superStringMatchNames);
                Utils.WaitForEnter();
            }
            Utils.writeLine(outF, this.outputFileName, String.valueOf(line) + "\t" + exactMatchCount + "\t" + exactMatchNames + "\t" + substringMatchCount + "\t" + substringMatchNames + "\t" + superStringMatchCount + "\t" + superStringMatchNames + "\n");
            line = Utils.readNextLine(buf, this.tableFile);
        }
        Utils.closeBufferedReader(buf, this.tableFile);
        Utils.closeFileWriter(outF, this.outputFileName);
    }
}

