/*
 * Decompiled with CFR 0.152.
 */
package proteogenomicUtils;

import basicUtils.GFFFile;
import basicUtils.Utils;
import errorUtils.ErrorThrower;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Hashtable;
import proteogenomicUtils.GenomicLocator;
import proteogenomicUtils.ProteogenomicUtils;
import trieUtils.TrieDB;

public class NovelPredictionAggregator {
    private static final int AUGUSTUS_SHIFT_STEP = 1;
    private static final int REDUCE_STEP = 2;
    private static final int MAP_NOVEL_STEP = 3;
    private static final int FIND_KNOWN_STEP = 4;
    private static int NUM_STEPS = 5;
    public static String usageInfo = "proteogenomicUtils.NovelPredictionAggregator version 2012.04.18\nThe goal of the script is to create a non-redundant list of predicted transcripts.  The process proceeds\nin several steps: \n[STEP 1]: Shift instance-specific gene coordinates to global coordinates\nGiven a directory of input files, in this step the script finds all augustus prediction results (SEQNAME_START-END.augustus.gff) and \nshifts the START.  The shifted GFF files are written to the output directory\n[STEP 2]: Compile non-redundant list of transcripts\nA unique locus identifier is created for each 'gene' feature in the augustus predictions (SEQNAME_START_END).  Transcripts are defined as\nCDS coordinate list at a gene locus.  This step also removes proteins with stop codons\n[STEP 3]: Find supporting peptides.\nGiven a directory of novel peptide location files, find peptides that support the predicted results\nOnly sequence matching is done, not coordinate matching\n[STEP 4]: Alignment to known proteins\nMatching to known genes is done to determine if each predicted transcript is a completely \nnovel transcript, a superstring of a known transcript, a gene fusion, or a gene fusion.  The union of transcripts is performed\nto arrive at a complete gene model with both novel and known isoforms\n[STEP 5]: Expression of isoforms\nIdentify genes with two or more isoforms identified\n\nArguments:\n -a [DIR] Directory containing augustus prediction results\n -o [DIR] An output directory to write all intermediate results to\n -n [DIR] Directory of novel peptide locations\n(-k [DIR] Directory of known peptide sequences (InsPecT format))\n(-p [FILE] FASTA or trie file of known protein sequences)\n(-g [FILE] GFF file of known protein coordinates)\n(-t [FILE] Translation file for mapping GFF transcript names to FASTA protein names)\n(-y [NUM] Column in the translation file containing the GFF transcript name (default: 2))\n(-u [NUM] Column in the translation file containing the FASTA protein name (default: 3))\n(-s [NUM] Resume from a particular step)\n";
    private String inputDir = null;
    private String outputDir = null;
    private String novelDir = null;
    private String knownDir = null;
    private TrieDB knownDB = null;
    private String knownGFF = null;
    private String tranFile = null;
    private Hashtable tranToProtMap = null;
    private int startStep = 1;

    public NovelPredictionAggregator(String inputDir, String outputDir, String novelDir, String knownDir, String knownDB, String knownGFF, String tranFile, int tranCol, int protCol, int startStep) {
        System.out.println("[STEP 0]: Initialization");
        if (!Utils.IsDir(inputDir)) {
            ErrorThrower.ThrowError(1, inputDir);
        }
        this.inputDir = inputDir;
        if (!Utils.IsDir(outputDir)) {
            System.out.println(" Creating directory " + outputDir);
            Utils.MakeDir(outputDir);
        }
        this.outputDir = outputDir;
        if (!Utils.IsDir(novelDir)) {
            ErrorThrower.ThrowError(1, novelDir);
        }
        this.novelDir = novelDir;
        if (knownDir != null && !Utils.IsDir(knownDir)) {
            ErrorThrower.ThrowError(1, knownDir);
        }
        this.knownDir = knownDir;
        if (knownDB != null && !Utils.IsFile(knownDB)) {
            ErrorThrower.ThrowError(1, knownDB);
        }
        if (knownDB != null) {
            this.knownDB = TrieDB.createDatabaseObject(knownDB);
            if (this.knownDB == null) {
                ErrorThrower.ThrowError(1, knownDB);
            }
        }
        if (knownGFF != null && !Utils.IsFile(knownGFF)) {
            ErrorThrower.ThrowError(1, knownGFF);
        }
        this.knownGFF = knownGFF;
        if (tranCol < 0) {
            ErrorThrower.ThrowError(4, "GFF transcript column must be non-negative");
        }
        if (protCol < 0) {
            ErrorThrower.ThrowError(4, "FASTA protein column must be non-negative");
        }
        if (tranFile != null && !Utils.IsFile(tranFile)) {
            ErrorThrower.ThrowError(1, tranFile);
        }
        if (tranFile != null) {
            System.out.println(" Loading translation file " + tranFile);
            this.tranToProtMap = ProteogenomicUtils.loadTranslationFile(tranFile, tranCol, protCol);
            System.out.println(" - loaded " + this.tranToProtMap.size() + " entries");
        }
        if (startStep < 1 || startStep > NUM_STEPS) {
            ErrorThrower.ThrowError(4, "Start step must be between 1 and " + NUM_STEPS);
        }
        this.startStep = startStep;
    }

    public static void main(String[] args) {
        String temp;
        String[] options = new String[]{"-a", "-o", "-n", "-k", "-p", "-g", "-t", "-y", "-u", "-s"};
        boolean[] values = new boolean[]{true, true, true, true, true, true, true, true, true, true};
        Hashtable CommandLineArgs = Utils.ParseCommandLine(args, options, values);
        if (!(CommandLineArgs.containsKey("-a") && CommandLineArgs.containsKey("-o") && CommandLineArgs.containsKey("-n"))) {
            System.err.println(usageInfo);
            ErrorThrower.ThrowError(2, "Must specify an augustus prediction directory (-a), an output directory (-o), and a novel peptide location directory (-n)");
        }
        String inputDir = (String)CommandLineArgs.get("-a");
        String outputDir = (String)CommandLineArgs.get("-o");
        String novelDir = (String)CommandLineArgs.get("-n");
        String knownDir = null;
        String knownDB = null;
        String knownGFF = null;
        String tranFile = null;
        int tranCol = 2;
        int protCol = 3;
        int startStep = 1;
        if (CommandLineArgs.containsKey("-k")) {
            knownDir = (String)CommandLineArgs.get("-k");
        }
        if (CommandLineArgs.containsKey("-p")) {
            knownDB = (String)CommandLineArgs.get("-p");
        }
        if (CommandLineArgs.containsKey("-g")) {
            knownGFF = (String)CommandLineArgs.get("-g");
        }
        if (CommandLineArgs.containsKey("-t")) {
            tranFile = (String)CommandLineArgs.get("-t");
        }
        if (CommandLineArgs.containsKey("-y")) {
            temp = (String)CommandLineArgs.get("-y");
            try {
                tranCol = Integer.parseInt(temp);
            }
            catch (Exception E) {
                ErrorThrower.ThrowError(4, "GFF transcript column (-y) must be an integer");
            }
        }
        if (CommandLineArgs.containsKey("-u")) {
            temp = (String)CommandLineArgs.get("-u");
            try {
                protCol = Integer.parseInt(temp);
            }
            catch (Exception E) {
                ErrorThrower.ThrowError(4, "FASTA protein column (-u) must be an integer");
            }
        }
        if (CommandLineArgs.containsKey("-s")) {
            temp = (String)CommandLineArgs.get("-s");
            try {
                startStep = Integer.parseInt(temp);
            }
            catch (Exception E) {
                ErrorThrower.ThrowError(4, "Start step (-s) must be an integer");
            }
        }
        NovelPredictionAggregator a = new NovelPredictionAggregator(inputDir, outputDir, novelDir, knownDir, knownDB, knownGFF, tranFile, tranCol, protCol, startStep);
        a.aggregate();
    }

    private void aggregate() {
        if (this.startStep == 1) {
            this.prepareOutputDirectory();
        }
        if (this.startStep <= 1) {
            this.shiftAugustusCoords(1);
        } else {
            System.out.println("Skipping STEP 1");
        }
        if (this.startStep <= 2) {
            this.createNonRedundantList(2);
        } else {
            System.out.println("Skipping STEP 2");
        }
        if (this.startStep <= 3) {
            this.mapNovelPeptides(3);
        } else {
            System.out.println("Skipping STEP 3");
        }
        if (this.startStep <= 4 && this.knownDB != null && this.knownGFF != null) {
            this.findOverlappingKnownGenes(4);
        } else {
            System.out.println("Skipping STEP 4");
        }
    }

    private void findOverlappingKnownGenes(int currStep) {
        boolean debug = false;
        System.out.println("[STEP " + currStep + "]: Finding relationships with known genes");
        System.out.println("Loading gene coordinates from " + this.knownGFF);
        Hashtable GeneCoordHash = this.loadGeneCoords("mrna");
        String tableFileName = String.valueOf(this.outputDir) + File.separator + "allTranscripts.nr.withPeptides.txt";
        String outFile = String.valueOf(this.outputDir) + File.separator + "allTranscripts.nr.withPeptides.withKnown.txt";
        BufferedReader buf = Utils.openBufferedReader(tableFileName);
        String Line = Utils.readNextLine(buf, tableFileName);
        FileWriter outF = Utils.openFileWriter(outFile);
        while (Line != null) {
            if ((Line = Line.trim()).length() == 0) {
                Utils.writeLine(outF, outFile, String.valueOf(Line) + "\n");
                Line = Utils.readNextLine(buf, tableFileName);
                continue;
            }
            if (Line.charAt(0) == '#') {
                Utils.writeLine(outF, outFile, String.valueOf(Line) + "\tExactMatchKnown\tSubStringKnown\tOverlapKnown\tAntisenseOverlapKnown\n");
                Line = Utils.readNextLine(buf, tableFileName);
                continue;
            }
            String[] bits = Line.split("\t");
            String sequence = bits[3];
            String[] nameBits = bits[0].split("_");
            String seqName = nameBits[0];
            int start = Integer.parseInt(nameBits[1]);
            int end = Integer.parseInt(nameBits[2]);
            String strand = nameBits[3];
            String overlapSameStrand = "";
            String subString = "";
            String exactString = "";
            String overlapOppStrand = "";
            String key = String.valueOf(seqName) + "_" + strand;
            if (!GeneCoordHash.containsKey(key)) {
                ErrorThrower.ThrowWarning(13, key);
            } else {
                ArrayList genes = (ArrayList)GeneCoordHash.get(key);
                int i = 0;
                while (i < genes.size()) {
                    int geneEnd;
                    String[] geneInfo = (String[])genes.get(i);
                    int geneStart = Integer.parseInt(geneInfo[1]);
                    if (Utils.HasOverlap(geneStart, geneEnd = Integer.parseInt(geneInfo[2]), start, end)) {
                        String geneSeq;
                        int protID;
                        String pName = geneInfo[0];
                        if (this.tranToProtMap != null) {
                            if (!this.tranToProtMap.containsKey(geneInfo[0].toLowerCase())) {
                                ErrorThrower.ThrowWarning(13, "Unable to find protein name for '" + geneInfo[0] + "'!");
                            } else {
                                pName = (String)this.tranToProtMap.get(geneInfo[0].toLowerCase());
                            }
                        }
                        if ((protID = this.knownDB.getProteinIDFromPartial(pName)) < 0) {
                            ErrorThrower.ThrowError(15, "Cannot find protein sequence for '" + pName + "'!");
                        }
                        if ((geneSeq = this.knownDB.getProteinSequence(protID)).equals(sequence)) {
                            exactString = exactString.length() == 0 ? String.valueOf(exactString) + geneInfo[0] : String.valueOf(exactString) + "," + geneInfo[0];
                        } else if (sequence.indexOf(geneSeq) >= 0) {
                            subString = subString.length() == 0 ? String.valueOf(subString) + geneInfo[0] : String.valueOf(subString) + "," + geneInfo[0];
                        } else {
                            overlapSameStrand = overlapSameStrand.length() == 0 ? String.valueOf(overlapSameStrand) + geneInfo[0] : String.valueOf(overlapSameStrand) + "," + geneInfo[0];
                        }
                    }
                    ++i;
                }
            }
            String oppStrand = "";
            if (strand.equals("1")) {
                oppStrand = "0";
            } else if (strand.equals("0")) {
                oppStrand = "1";
            } else {
                ErrorThrower.ThrowError(4, "Strand must be '1', or '0'");
            }
            String oppKey = String.valueOf(seqName) + "_" + oppStrand;
            if (!GeneCoordHash.containsKey(oppKey)) {
                ErrorThrower.ThrowWarning(13, oppKey);
            } else {
                ArrayList genes = (ArrayList)GeneCoordHash.get(oppKey);
                int i = 0;
                while (i < genes.size()) {
                    int geneEnd;
                    String[] geneInfo = (String[])genes.get(i);
                    int geneStart = Integer.parseInt(geneInfo[1]);
                    if (Utils.HasOverlap(geneStart, geneEnd = Integer.parseInt(geneInfo[2]), start, end)) {
                        overlapOppStrand = overlapOppStrand.length() == 0 ? String.valueOf(overlapOppStrand) + geneInfo[0] : String.valueOf(overlapOppStrand) + "," + geneInfo[0];
                    }
                    ++i;
                }
            }
            if (exactString.length() == 0) {
                exactString = "*";
            }
            if (subString.length() == 0) {
                subString = "*";
            }
            if (overlapSameStrand.length() == 0) {
                overlapSameStrand = "*";
            }
            if (overlapOppStrand.length() == 0) {
                overlapOppStrand = "*";
            }
            String newLine = String.valueOf(Line) + "\t" + exactString + "\t" + subString + "\t" + overlapSameStrand + "\t" + overlapOppStrand + "\n";
            if (debug) {
                System.out.println(bits[0]);
                System.out.println("exact: " + exactString);
                System.out.println("sub: " + subString);
                System.out.println("overlap: " + overlapSameStrand);
                System.out.println("antisense: " + overlapOppStrand);
                Utils.WaitForEnter();
            }
            Utils.writeLine(outF, outFile, newLine);
            Line = Utils.readNextLine(buf, tableFileName);
        }
        Utils.closeFileWriter(outF, outFile);
        Utils.closeBufferedReader(buf, tableFileName);
    }

    private Hashtable loadGeneCoords(String featureType) {
        Hashtable<String, ArrayList> ret = new Hashtable<String, ArrayList>();
        BufferedReader buf = Utils.openBufferedReader(this.knownGFF);
        String Line = Utils.readNextLine(buf, this.knownGFF);
        int count = 0;
        while (Line != null) {
            if ((Line = Line.trim()).length() == 0 || Line.charAt(0) == '#') {
                Line = Utils.readNextLine(buf, this.knownGFF);
                continue;
            }
            String GeneName = null;
            String[] Bits = Line.split("\t");
            String SeqName = Bits[GFFFile.GFFColumns.SequenceName];
            String Notes = Bits[GFFFile.GFFColumns.Attributes];
            String[] NoteElements = Notes.split(";");
            if (Bits[GFFFile.GFFColumns.FeatureType].equalsIgnoreCase(featureType)) {
                GeneName = null;
                int j = 0;
                while (j < NoteElements.length) {
                    String[] Bits2 = NoteElements[j].split("=");
                    if (Bits2[0].toLowerCase().indexOf("id") >= 0) {
                        GeneName = Bits2[1].replaceAll("\"", "");
                        break;
                    }
                    ++j;
                }
                if (GeneName == null) {
                    ErrorThrower.ThrowError(17, "Require 'ID' to identify transcript name");
                }
                int Start = Integer.parseInt(Bits[GFFFile.GFFColumns.Start]);
                int End = Integer.parseInt(Bits[GFFFile.GFFColumns.End]);
                String[] info = new String[]{GeneName, String.valueOf(Start), String.valueOf(End)};
                String key = String.valueOf(SeqName) + "_" + GFFFile.ParseStrand(Bits[GFFFile.GFFColumns.Strand]);
                ArrayList list = null;
                list = ret.containsKey(key) ? (ArrayList)ret.get(key) : new ArrayList();
                list.add(info);
                ret.put(key, list);
                ++count;
            }
            Line = Utils.readNextLine(buf, this.knownGFF);
        }
        Utils.closeBufferedReader(buf, this.knownGFF);
        System.out.println(" Loaded " + count + " transcript locations");
        return ret;
    }

    private void mapNovelPeptides(int currStep) {
        System.out.println("[STEP " + currStep + "]: Mapping novel peptides");
        String fastaFileName = String.valueOf(this.outputDir) + File.separator + "allTranscripts.nr.fasta";
        String tableFileName = String.valueOf(this.outputDir) + File.separator + "allTranscripts.nr.txt";
        TrieDB novelDB = TrieDB.createDatabaseObject(fastaFileName);
        System.out.println(" Loading novel peptides from " + this.inputDir);
        String[] peptides = GenomicLocator.GenomicColumns.LoadAllPeptides(this.novelDir);
        System.out.println(" - Loaded " + peptides.length + " novel peptide sequences");
        Hashtable locations = novelDB.GetAllLocations(peptides);
        Hashtable<String, String[]> protein2Peptides = new Hashtable<String, String[]>();
        int peptidesNotInPrediction = 0;
        int peptidesInOnePrediction = 0;
        int peptidesInOneGene = 0;
        int i = 0;
        while (i < peptides.length) {
            if (!locations.containsKey(peptides[i])) {
                ++peptidesNotInPrediction;
            } else {
                ArrayList locs = (ArrayList)locations.get(peptides[i]);
                if (locs.size() == 0) {
                    ++peptidesNotInPrediction;
                } else {
                    HashSet<String> geneNames = new HashSet<String>();
                    ArrayList<String> proteins = new ArrayList<String>();
                    int j = 0;
                    while (j < locs.size()) {
                        Object[] loc = (Object[])locs.get(j);
                        String proteinName = (String)loc[3];
                        proteins.add(proteinName);
                        String geneName = proteinName.split("[.]")[0];
                        geneNames.add(geneName);
                        ++j;
                    }
                    ProteogenomicUtils.uniqueType uType = null;
                    if (locs.size() == 1) {
                        ++peptidesInOnePrediction;
                        uType = ProteogenomicUtils.uniqueType.PROT_UNIQUE;
                    } else if (geneNames.size() == 1) {
                        ++peptidesInOneGene;
                        uType = ProteogenomicUtils.uniqueType.LOC_UNIQUE;
                    } else {
                        uType = ProteogenomicUtils.uniqueType.SHARED;
                    }
                    String[] peptidesForProtein = null;
                    int j2 = 0;
                    while (j2 < proteins.size()) {
                        String currProtein = (String)proteins.get(j2);
                        peptidesForProtein = protein2Peptides.containsKey(currProtein) ? (String[])protein2Peptides.get(currProtein) : new String[]{"", "", ""};
                        if (uType == ProteogenomicUtils.uniqueType.PROT_UNIQUE) {
                            peptidesForProtein[0] = peptidesForProtein[0].length() == 0 ? String.valueOf(peptidesForProtein[0]) + peptides[i] : String.valueOf(peptidesForProtein[0]) + "," + peptides[i];
                        } else if (uType == ProteogenomicUtils.uniqueType.LOC_UNIQUE) {
                            peptidesForProtein[1] = peptidesForProtein[1].length() == 0 ? String.valueOf(peptidesForProtein[1]) + peptides[i] : String.valueOf(peptidesForProtein[1]) + "," + peptides[i];
                        } else {
                            peptidesForProtein[2] = peptidesForProtein[2].length() == 0 ? String.valueOf(peptidesForProtein[2]) + peptides[i] : String.valueOf(peptidesForProtein[2]) + "," + peptides[i];
                        }
                        protein2Peptides.put(currProtein, peptidesForProtein);
                        ++j2;
                    }
                }
            }
            ++i;
        }
        System.out.println(" - Peptides not in a prediction: " + peptidesNotInPrediction);
        System.out.println(" - Peptides in a single protein: " + peptidesInOnePrediction);
        System.out.println(" - Peptides in a single gene: " + peptidesInOneGene);
        System.out.println(" - Peptides in multiple genes: " + (peptides.length - peptidesNotInPrediction - peptidesInOnePrediction - peptidesInOneGene));
        String outFile = String.valueOf(this.outputDir) + File.separator + "allTranscripts.nr.withPeptides.txt";
        System.out.println(" Writing updated table " + outFile);
        FileWriter outF = Utils.openFileWriter(outFile);
        BufferedReader buf = Utils.openBufferedReader(tableFileName);
        String Line = Utils.readNextLine(buf, tableFileName);
        int proteinsWithUnique = 0;
        int proteinsWithNone = 0;
        int proteinsWithLocUnique = 0;
        int proteinsWithShared = 0;
        while (Line != null) {
            if ((Line = Line.trim()).length() == 0) {
                Utils.writeLine(outF, outFile, String.valueOf(Line) + "\n");
                Line = Utils.readNextLine(buf, tableFileName);
                continue;
            }
            if (Line.charAt(0) == '#') {
                Utils.writeLine(outF, outFile, String.valueOf(Line) + "\tNovelUnique\tNovelLocusUnique\tNovelShared\n");
                Line = Utils.readNextLine(buf, tableFileName);
                continue;
            }
            String[] bits = Line.split("\t");
            String pName = String.valueOf(bits[0]) + "." + bits[1];
            if (!protein2Peptides.containsKey(pName)) {
                ++proteinsWithNone;
                Line = String.valueOf(Line) + "\t*\t*\t*\n";
                Utils.writeLine(outF, outFile, Line);
            } else {
                String[] peptidesForProtein = (String[])protein2Peptides.get(pName);
                if (peptidesForProtein[0].length() == 0) {
                    peptidesForProtein[0] = "*";
                }
                if (peptidesForProtein[1].length() == 0) {
                    peptidesForProtein[1] = "*";
                }
                if (peptidesForProtein[2].length() == 0) {
                    peptidesForProtein[2] = "*";
                }
                Line = String.valueOf(Line) + "\t" + peptidesForProtein[0] + "\t" + peptidesForProtein[1] + "\t" + peptidesForProtein[2] + "\n";
                if (peptidesForProtein[0].length() > 0) {
                    ++proteinsWithUnique;
                } else if (peptidesForProtein[1].length() > 0) {
                    ++proteinsWithLocUnique;
                } else {
                    ++proteinsWithShared;
                }
                Utils.writeLine(outF, outFile, Line);
            }
            Line = Utils.readNextLine(buf, tableFileName);
        }
        Utils.closeBufferedReader(buf, tableFileName);
        Utils.closeFileWriter(outF, outFile);
        System.out.println(" - Proteins with unique peptide(s): " + proteinsWithUnique);
        System.out.println(" - Proteins with loc-unique peptide(s): " + proteinsWithLocUnique);
        System.out.println(" - Proteins with shared peptide(s): " + proteinsWithShared);
        System.out.println(" - Proteins with no peptide(s): " + proteinsWithNone);
    }

    private void createNonRedundantList(int currStep) {
        System.out.println("[STEP " + currStep + "]: Compiling non-redundant database");
        boolean debug = false;
        String augustusDir = String.valueOf(this.outputDir) + File.separator + "augustus";
        if (!Utils.IsDir(augustusDir)) {
            ErrorThrower.ThrowErrorCustum(100, "Expected directory '" + augustusDir + "' does not exist!");
        }
        String tableFileName = String.valueOf(this.outputDir) + File.separator + "allTranscripts.nr.txt";
        FileWriter outF = Utils.openFileWriter(tableFileName);
        Utils.writeLine(outF, tableFileName, "#GeneName\tTranscriptNum\tCDSCoords\tSequence\n");
        String fastaFileName = String.valueOf(this.outputDir) + File.separator + "allTranscripts.nr.fasta";
        FileWriter outFasta = Utils.openFileWriter(fastaFileName);
        String[] augustusFiles = Utils.ListDir(augustusDir);
        Hashtable<String, ArrayList> geneToTranscript = new Hashtable<String, ArrayList>();
        int i = 0;
        while (i < augustusFiles.length) {
            if (debug) {
                System.out.println("[DEBUG] Reading file " + augustusFiles[i]);
            }
            BufferedReader buf = Utils.openBufferedReader(augustusFiles[i]);
            String line = Utils.readNextLine(buf, augustusFiles[i]);
            int transNum = 1;
            String currGeneName = "";
            String currCDSCoords = "";
            String currProteinSequence = "";
            boolean readingProtein = false;
            while (line != null) {
                String[] newTranscript;
                ArrayList transcripts;
                if ((line = line.trim()).length() == 0) {
                    line = Utils.readNextLine(buf, augustusFiles[i]);
                    continue;
                }
                if (line.indexOf("start gene") >= 0) {
                    if (debug) {
                        System.out.println("[DEBUG] Found the start of a gene");
                        System.out.println("[DEBUG] " + line);
                    }
                    transNum = 1;
                    currProteinSequence = "";
                    currCDSCoords = "";
                    currGeneName = "";
                    readingProtein = false;
                } else if (line.indexOf("protein sequence") >= 0) {
                    if (debug) {
                        System.out.println("[DEBUG] Found the start of a protein sequence");
                        System.out.println("[DEBUG] " + line);
                    }
                    currProteinSequence = line.substring(line.indexOf("[") + 1);
                    readingProtein = true;
                } else if (readingProtein) {
                    if (debug) {
                        System.out.println("[DEBUG] Continuing a protein sequence");
                        System.out.println("[DEBUG] " + line);
                    }
                    if ((currProteinSequence = String.valueOf(currProteinSequence) + line.split(" ")[1]).indexOf(93) >= 0) {
                        currProteinSequence = currProteinSequence.substring(0, currProteinSequence.indexOf(93));
                        transcripts = null;
                        transcripts = geneToTranscript.containsKey(currGeneName) ? (ArrayList)geneToTranscript.get(currGeneName) : new ArrayList();
                        newTranscript = new String[]{String.valueOf(transNum), currCDSCoords, currProteinSequence};
                        if (debug) {
                            System.out.println("[DEBUG] Considering a new transcript: " + currGeneName + " " + newTranscript[0] + " " + newTranscript[1] + " " + newTranscript[2]);
                        }
                        if (currProteinSequence.indexOf(88) < 0 && !this.transcriptInList(newTranscript, transcripts)) {
                            if (debug) {
                                System.out.println("[DEBUG] This is a new transcript being added to the other " + transcripts.size() + " transcripts");
                                Utils.WaitForEnter();
                            }
                            transcripts.add(newTranscript);
                            geneToTranscript.put(currGeneName, transcripts);
                        } else if (debug) {
                            System.out.println("[DEBUG] This transcripts is a duplicate!");
                            Utils.WaitForEnter();
                        }
                        currProteinSequence = "";
                        currCDSCoords = "";
                        ++transNum;
                        readingProtein = false;
                    }
                } else if (line.indexOf("end gene") >= 0) {
                    if (debug) {
                        System.out.println("[DEBUG] Found the end of a gene");
                        System.out.println("[DEBUG] " + line);
                    }
                    if (currProteinSequence.length() > 0) {
                        transcripts = null;
                        transcripts = geneToTranscript.containsKey(currGeneName) ? (ArrayList)geneToTranscript.get(currGeneName) : new ArrayList();
                        newTranscript = new String[]{String.valueOf(transNum), currCDSCoords, currProteinSequence};
                        if (debug) {
                            System.out.println("[DEBUG] Considering a new transcript: " + currGeneName + " " + newTranscript[0] + " " + newTranscript[1] + " " + newTranscript[2]);
                        }
                        if (currProteinSequence.indexOf(88) < 0 && !this.transcriptInList(newTranscript, transcripts)) {
                            if (debug) {
                                System.out.println("[DEBUG] This is a new transcript being added to the other " + transcripts.size() + " transcripts");
                                Utils.WaitForEnter();
                            }
                            transcripts.add(newTranscript);
                            geneToTranscript.put(currGeneName, transcripts);
                        } else if (debug) {
                            System.out.println("[DEBUG] This transcripts is a duplicate!");
                            Utils.WaitForEnter();
                        }
                    }
                    currGeneName = "";
                    currProteinSequence = "";
                    currCDSCoords = "";
                    transNum = 1;
                    readingProtein = false;
                } else if (line.charAt(0) != '#') {
                    String[] bits = line.split("\t");
                    if (bits[GFFFile.GFFColumns.FeatureType].equalsIgnoreCase("gene")) {
                        currGeneName = String.valueOf(bits[GFFFile.GFFColumns.SequenceName]) + "_" + bits[GFFFile.GFFColumns.Start] + "_" + bits[GFFFile.GFFColumns.End] + "_" + GFFFile.ParseStrand(bits[GFFFile.GFFColumns.Strand]);
                    } else if (bits[GFFFile.GFFColumns.FeatureType].equalsIgnoreCase("cds")) {
                        currCDSCoords = currCDSCoords.length() == 0 ? String.valueOf(currCDSCoords) + bits[GFFFile.GFFColumns.Start] + "-" + bits[GFFFile.GFFColumns.End] : String.valueOf(currCDSCoords) + "," + bits[GFFFile.GFFColumns.Start] + "-" + bits[GFFFile.GFFColumns.End];
                        if (debug) {
                            System.out.println("[DEBUG] Found a CDS coord");
                            System.out.println("[DEBUG] " + line);
                        }
                    }
                }
                line = Utils.readNextLine(buf, augustusFiles[i]);
            }
            Utils.closeBufferedReader(buf, augustusFiles[i]);
            ++i;
        }
        int count = 0;
        System.out.println(" Loaded " + geneToTranscript.size() + " genes");
        String[] genes = Utils.GetHashtableKeysString(geneToTranscript);
        int i2 = 0;
        while (i2 < genes.length) {
            ArrayList transcripts = (ArrayList)geneToTranscript.get(genes[i2]);
            int j = 0;
            while (j < transcripts.size()) {
                String[] currTran = (String[])transcripts.get(j);
                String outLine = String.valueOf(genes[i2]) + "\t" + currTran[0] + "\t" + currTran[1] + "\t" + currTran[2] + "\n";
                Utils.writeLine(outF, tableFileName, outLine);
                Utils.writeLine(outFasta, fastaFileName, ">" + genes[i2] + "." + currTran[0] + "\n");
                Utils.writeLine(outFasta, fastaFileName, String.valueOf(currTran[2]) + "\n");
                ++count;
                ++j;
            }
            ++i2;
        }
        Utils.closeFileWriter(outF, tableFileName);
        Utils.closeFileWriter(outF, fastaFileName);
        System.out.println(" Loaded " + count + " transcripts");
    }

    private boolean transcriptInList(String[] newTranscript, ArrayList transcripts) {
        if (transcripts == null || transcripts.size() == 0) {
            return false;
        }
        int i = 0;
        while (i < transcripts.size()) {
            String[] currTranscript = (String[])transcripts.get(i);
            if (currTranscript[1].equalsIgnoreCase(newTranscript[1])) {
                return true;
            }
            ++i;
        }
        return false;
    }

    private void shiftAugustusCoords(int currStep) {
        System.out.println("[STEP " + currStep + "]: Shifting augustus coordintates");
        String newOutputDir = String.valueOf(this.outputDir) + File.separator + "augustus";
        System.out.println(" Making directory " + newOutputDir);
        Utils.MakeDir(newOutputDir);
        boolean debug = false;
        String[] fileList = Utils.ListDir(this.inputDir);
        int count = 0;
        int i = 0;
        while (i < fileList.length) {
            String[] fileBits;
            String baseName = Utils.GetBaseName(fileList[i]);
            if (debug) {
                System.out.println("[DEBUG] Considering file " + baseName);
            }
            if ((fileBits = baseName.split("[.]")).length != 3) {
                if (debug) {
                    System.out.println("[DEBUG] fileBits has length=" + fileBits.length);
                    Utils.WaitForEnter();
                }
            } else if (!fileBits[1].equalsIgnoreCase("augustus") || !fileBits[2].equalsIgnoreCase("gff")) {
                if (debug) {
                    System.out.println("[DEBUG] file does not contain correct names at " + fileBits[1] + " and " + fileBits[2]);
                    Utils.WaitForEnter();
                }
            } else {
                String[] nameBits = fileBits[0].split("_");
                if (nameBits.length != 2) {
                    if (debug) {
                        System.out.println("[DEBUG] nameBits has length=" + nameBits.length);
                        Utils.WaitForEnter();
                    }
                } else {
                    String[] coords = nameBits[1].split("-");
                    int start = Integer.parseInt(coords[0]);
                    String newFileName = String.valueOf(newOutputDir) + File.separator + baseName;
                    GFFFile.shiftGFFCoords(fileList[i], newFileName, start);
                    ++count;
                }
            }
            ++i;
        }
        System.out.println(" Wrote " + count + " shifted files");
    }

    private void prepareOutputDirectory() {
        if (!Utils.IsDir(this.outputDir)) {
            System.out.println(" Creating output directory " + this.outputDir);
            Utils.MakeDir(this.outputDir);
        } else {
            System.out.println(" Clearing output directory " + this.outputDir);
            Utils.ClearDir(this.outputDir);
        }
    }
}

