/*
 * Decompiled with CFR 0.152.
 */
package maizeUtils;

import basicUtils.Utils;
import java.util.ArrayList;
import java.util.Hashtable;
import trieUtils.TrieDB;

public class NMerCounter {
    private static final String UsageInfo = "maizeUtils.NMerCounter\nCounts the number of appearances of N-mers for the purpose of estimating compression\nRequired:\n -r [FILE] Trie file of proteins for counting\n -n [NUM] size of n-mers\n";
    private int nMerSize;
    private TrieDB trieFile;
    private int numReusedNMers = 0;
    private int numNMersWithUnique = 0;
    private int numReusedNMerPos;
    private boolean debug = false;

    public NMerCounter(String inFileName, int size) {
        if (!Utils.IsFile(inFileName)) {
            System.err.println("ERROR: Invalid input file name " + inFileName);
            System.exit(-1);
        }
        if (size <= 0) {
            System.err.println("ERROR: Invalid n-mer size '" + size + "'!!");
            System.exit(-1);
        }
        this.nMerSize = size;
        this.trieFile = new TrieDB(inFileName);
        System.out.println("Trie size: " + this.trieFile.GetNumAA());
        System.out.println("MAX SIZE : 2147483647");
        if (this.trieFile.GetNumAA() > Integer.MAX_VALUE) {
            System.err.println("ERROR: Implementation failure.  Can only store addresses up to 2147483647, but DB contains " + this.trieFile.GetNumAA());
        }
    }

    private void runCounter() {
        String[] currNMers = new String[5000];
        int[] filePos = new int[5000];
        int nMerPos = 0;
        int numProteins = this.trieFile.getNumProteins();
        if (this.debug) {
            System.out.println("DEBUG: Total proteins to consider: " + numProteins);
        }
        int i = 0;
        while (i < numProteins) {
            String proteinSeq;
            if (i != 0 && i % 1000 == 0) {
                System.out.println("[" + i + "/" + numProteins + "]");
                System.out.println("NMers that are unique: " + this.numNMersWithUnique);
                System.out.println("NMers that are reused: " + this.numReusedNMers);
                System.out.println("Positions with resued NMers: " + this.numReusedNMerPos);
            }
            if ((proteinSeq = this.trieFile.getProteinSequence(i)).length() < this.nMerSize) {
                System.err.println("WARNING: Found a protein of length " + proteinSeq.length() + " < " + this.nMerSize);
            } else {
                int proteinPos = this.trieFile.GetTriePos(i);
                int j = 0;
                while (j < proteinSeq.length() - this.nMerSize + 1) {
                    String nMer;
                    currNMers[nMerPos] = nMer = proteinSeq.substring(j, j + this.nMerSize);
                    filePos[nMerPos] = proteinPos + j;
                    ++nMerPos;
                    if (this.debug) {
                        System.out.println("DEBUG: Added new nMer: " + nMer);
                    }
                    if (nMerPos == currNMers.length) {
                        System.out.println("DEBUG: Reached " + nMerPos + " strings!");
                        this.hashNMerLocations(currNMers, filePos, nMerPos);
                        nMerPos = 0;
                    }
                    ++j;
                }
            }
            ++i;
        }
        if (nMerPos > 0) {
            if (this.debug) {
                System.out.println("DEBUG: Reached " + nMerPos + " strings!");
            }
            this.hashNMerLocations(currNMers, filePos, nMerPos);
        }
        System.out.println("NMers that are unique: " + this.numNMersWithUnique);
        System.out.println("NMers that are reused: " + this.numReusedNMers);
        System.out.println("Positions with resued NMers: " + this.numReusedNMerPos);
    }

    private void hashNMerLocations(String[] nMers, int[] filePos, int numNMers) {
        Hashtable locs = this.trieFile.GetAllLocations(nMers);
        int i = 0;
        while (i < numNMers) {
            if (this.debug) {
                System.out.println("DEBUG: Determining locations of " + nMers[i]);
            }
            ArrayList currLocs = (ArrayList)locs.get(nMers[i]);
            Object[] currLoc = (Object[])currLocs.get(0);
            int proteinID = (Integer)currLoc[1];
            int pos = this.trieFile.GetTriePos(proteinID) + (Integer)currLoc[2];
            if (this.debug) {
                System.out.println("DEBUG: Found at " + currLocs.size() + " positions, first one is at " + pos + ", this one is at " + filePos[i]);
            }
            if (pos < filePos[i]) {
                ++this.numReusedNMerPos;
                if (this.debug) {
                    System.out.println("DEBUG: The first position already happened!");
                }
            } else if (currLocs.size() > 1) {
                ++this.numReusedNMers;
                if (this.debug) {
                    System.out.println("DEBUG: This is the first appearance of this nMer, but it's reused!!");
                }
                int j = 1;
                while (j < currLocs.size()) {
                    currLoc = (Object[])currLocs.get(j);
                    proteinID = (Integer)currLoc[1];
                    pos = this.trieFile.GetTriePos(proteinID) + (Integer)currLoc[2];
                    if (this.debug) {
                        System.out.println("DEBUG: Marking ahead to position " + pos + "!!");
                    }
                    ++j;
                }
            } else {
                ++this.numNMersWithUnique;
                if (this.debug) {
                    System.out.println("DEBUG: This is the first appearance of this nMer, but it's unique!!");
                }
            }
            if (this.debug) {
                Utils.WaitForEnter();
            }
            nMers[i] = null;
            ++i;
        }
    }

    public static void main(String[] args) {
        String[] Commands = new String[]{"-r", "-n"};
        boolean[] Values = new boolean[]{true, true};
        Hashtable Options = Utils.ParseCommandLine(args, Commands, Values);
        if (!Options.containsKey("-r") || !Options.containsKey("-n")) {
            System.err.println("ERROR: Must specify an input trie file and a word size!!");
            System.out.println(UsageInfo);
            return;
        }
        String inFileName = (String)Options.get("-r");
        int nSize = Integer.parseInt((String)Options.get("-n"));
        NMerCounter c = new NMerCounter(inFileName, nSize);
        c.runCounter();
    }
}

