/*
 * Decompiled with CFR 0.152.
 */
package maizeUtils;

import basicUtils.Utils;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import trieUtils.TrieDB;

public class RepeatORFFinder {
    private static final int HASH_KEY_LEN = 5;
    public static String usageInfo = "maizeUtils.RepeatORFFinder version 2011.06.17\nGiven a collection of TRIE databases of ORFs, this program\n1. Identifies ORFs that are identical to one another\n2. Creates new ORF databases with the redundant sequences removed\nREQUIRED:\n-r [FILE/DIR] TRIE file of ORF database, or directory containing TRIE files\n-w [FILE] FASTA output file with redundant sequences removed\n-d [DIR] Directory to write intermediate files\n";
    private TrieDB[] trieFiles;
    private String outputFileName;
    private String intermediateDir;
    private Hashtable ORFHash;

    public RepeatORFFinder(String inputDir, String outputFile, String intermediateDir) {
        if (Utils.IsDir(inputDir)) {
            String[] fileNames = Utils.ListDir(inputDir);
            int trieCount = 0;
            int i = 0;
            while (i < fileNames.length) {
                String ext = Utils.GetFileExtension(fileNames[i]);
                if (ext.compareTo(".trie") == 0) {
                    ++trieCount;
                } else {
                    fileNames[i] = null;
                }
                ++i;
            }
            this.trieFiles = new TrieDB[trieCount];
            trieCount = 0;
            i = 0;
            while (i < fileNames.length) {
                if (fileNames[i] != null) {
                    this.trieFiles[trieCount] = new TrieDB(fileNames[i]);
                    ++trieCount;
                }
                ++i;
            }
        } else if (Utils.IsFile(inputDir)) {
            this.trieFiles = new TrieDB[1];
            this.trieFiles[0] = new TrieDB(inputDir);
        } else {
            System.err.println("ERROR: Invalid input file/dir '" + inputDir + "'!!");
            System.exit(-1);
        }
        this.outputFileName = outputFile;
        if (!Utils.IsDir(intermediateDir)) {
            Utils.MakeDir(intermediateDir);
        }
        this.intermediateDir = intermediateDir;
        int i = 0;
        while (i < this.trieFiles.length) {
            System.out.println("Added " + this.trieFiles[i].GetDBFileName());
            ++i;
        }
    }

    public static void main(String[] args) {
        String[] options = new String[]{"-r", "-w", "-d"};
        boolean[] values = new boolean[]{true, true, true};
        Hashtable CommandLineArgs = Utils.ParseCommandLine(args, options, values);
        if (!(CommandLineArgs.containsKey("-r") && CommandLineArgs.containsKey("-w") && CommandLineArgs.containsKey("-d"))) {
            System.err.println("ERROR: Missing Arguments");
            System.out.println(usageInfo);
            return;
        }
        String inputDir = (String)CommandLineArgs.get("-r");
        String outputFile = (String)CommandLineArgs.get("-w");
        String intermediateDir = (String)CommandLineArgs.get("-d");
        RepeatORFFinder r = new RepeatORFFinder(inputDir, outputFile, intermediateDir);
        r.findRepeats();
    }

    private void findRepeats() {
        this.hashORFs();
        this.removeRedundancy();
    }

    private void removeRedundancy() {
        FileWriter f = null;
        int outFileIndex = 0;
        String outputFileBaseName = Utils.GetBaseName(this.outputFileName);
        String currFileName = String.valueOf(outputFileBaseName) + "." + outFileIndex + ".fasta";
        System.out.println("Writing to " + currFileName);
        int AAsWritten = 0;
        int totalAAsWritten = 0;
        try {
            f = new FileWriter(currFileName);
        }
        catch (IOException E) {
            E.printStackTrace();
            System.exit(-1);
        }
        Enumeration e = this.ORFHash.keys();
        int singletonHashes = 0;
        int currHashKeyIndex = 0;
        while (e.hasMoreElements()) {
            ArrayList remainingIDs;
            if (currHashKeyIndex % 100000 == 0) {
                System.out.println("Removing redundancy... " + currHashKeyIndex);
            }
            String hashKey = (String)e.nextElement();
            ++currHashKeyIndex;
            ArrayList ids = (ArrayList)this.ORFHash.get(hashKey);
            if (ids.size() > 1) {
                remainingIDs = this.removeRedundancy(ids);
            } else {
                remainingIDs = ids;
                ++singletonHashes;
            }
            int j = 0;
            while (j < remainingIDs.size()) {
                int[] currID = (int[])remainingIDs.get(j);
                String proteinSeq = this.trieFiles[currID[0]].getProteinSequence(currID[1]);
                String proteinName = this.trieFiles[currID[0]].getProteinName(currID[1]);
                try {
                    f.write(">" + proteinName + "\n");
                    f.write(String.valueOf(proteinSeq) + "\n");
                }
                catch (IOException E) {
                    E.printStackTrace();
                    System.exit(-1);
                }
                totalAAsWritten += proteinSeq.length();
                if ((AAsWritten += proteinSeq.length()) >= 100000000) {
                    AAsWritten = 0;
                    ++outFileIndex;
                    try {
                        f.close();
                    }
                    catch (IOException E) {
                        E.printStackTrace();
                        System.exit(-1);
                    }
                    currFileName = String.valueOf(outputFileBaseName) + "." + outFileIndex + ".fasta";
                    System.out.println("Writing to " + currFileName);
                    try {
                        f = new FileWriter(currFileName);
                    }
                    catch (IOException E) {
                        E.printStackTrace();
                        System.exit(-1);
                    }
                }
                ++j;
            }
        }
        if (f != null) {
            try {
                f.close();
            }
            catch (IOException E) {
                E.printStackTrace();
                System.exit(-1);
            }
        }
        System.out.println("totalAAsWritten: " + totalAAsWritten);
        System.out.println("singletonHashes: " + singletonHashes);
    }

    private ArrayList removeRedundancy(ArrayList ids) {
        boolean debug = false;
        String[] proteinSeqs = new String[ids.size()];
        if (debug) {
            System.out.println("Removing redundancy from list of size " + ids.size());
        }
        int i = 0;
        while (i < proteinSeqs.length) {
            int[] id = (int[])ids.get(i);
            if (debug) {
                System.out.println("[" + id[0] + "," + id[1] + "]");
            }
            proteinSeqs[i] = this.trieFiles[id[0]].getProteinSequence(id[1]);
            ++i;
        }
        i = 0;
        while (i < proteinSeqs.length) {
            if (proteinSeqs[i] != null) {
                int j = i + 1;
                while (j < proteinSeqs.length) {
                    if (proteinSeqs[j] != null && proteinSeqs[i].compareTo(proteinSeqs[j]) == 0) {
                        if (debug) {
                            int[] id = (int[])ids.get(i);
                            System.out.println("Protein A: " + this.trieFiles[id[0]].getProteinName(id[1]));
                            System.out.println(proteinSeqs[i]);
                            id = (int[])ids.get(j);
                            System.out.println("Protein B: " + this.trieFiles[id[0]].getProteinName(id[1]));
                            System.out.println(proteinSeqs[j]);
                            Utils.WaitForEnter();
                        }
                        proteinSeqs[j] = null;
                    }
                    ++j;
                }
            }
            ++i;
        }
        ArrayList ret = new ArrayList();
        int i2 = 0;
        while (i2 < proteinSeqs.length) {
            if (proteinSeqs[i2] != null) {
                ret.add(ids.get(i2));
            }
            ++i2;
        }
        if (debug) {
            System.out.println("Returning " + ret.size() + " sequences");
        }
        return ret;
    }

    private void hashORFs() {
        this.ORFHash = new Hashtable();
        String hashKey = "";
        String proteinSeq = "";
        ArrayList ids = null;
        int i = 0;
        while (i < this.trieFiles.length) {
            System.out.println("Looking at " + this.trieFiles[i].getNumProteins() + " proteins in " + this.trieFiles[i].GetDBFileName());
            int j = 0;
            while (j < this.trieFiles[i].getNumProteins()) {
                if (j % 100000 == 0) {
                    System.out.println("Completed " + j + " ...");
                }
                ids = this.ORFHash.containsKey(hashKey = String.valueOf((proteinSeq = this.trieFiles[i].getProteinSequence(j)).substring(0, Math.min(5, proteinSeq.length()))) + "@" + proteinSeq.length()) ? (ArrayList)this.ORFHash.get(hashKey) : new ArrayList();
                int[] ID = new int[]{i, j++};
                ids.add(ID);
                this.ORFHash.put(hashKey, ids);
            }
            System.out.println("Total HashKeys: " + this.ORFHash.size());
            ++i;
        }
        int[] log10counts = new int[5];
        Enumeration keys = this.ORFHash.keys();
        while (keys.hasMoreElements()) {
            String key = (String)keys.nextElement();
            ids = (ArrayList)this.ORFHash.get(key);
            int n = (int)Math.log10(ids.size());
        }
    }
}

