/*
 * Decompiled with CFR 0.152.
 */
package proteogenomicUtils;

import basicUtils.Utils;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import trieUtils.TrieDB;

public class TestInclusionOfProtein {
    public static String UsageInfo = "proteogenomicsUtils.TestInclusionofProtein version 08.31.2010\nLoads a file of newly predicted proteins, and test if they are present in the trie database\nIf protein is not in database, then determines if peptides supporting the protein are in the database\n[REQUIRED]:\n -r [FILE] FIle containing newly predicted proteins\n -t [FILE] Trie file of proteome of interest\n -g [FILE] GFF file of new genes\n[OPTIONAL]:\n -e [FILE] File of Transposable elements\n -d Run in Debug mode\n";
    private boolean Debug = false;
    private String InputFile;
    private TrieDB Proteome;
    private String GFFFile;
    private String TEFile = null;
    private ArrayList TECoords = null;
    private ArrayList UnaccountedForModels;
    private ArrayList OverlapWithTE;

    public TestInclusionOfProtein(String inputFile, String proteomeFile, String GFFFile2) {
        this.InputFile = inputFile;
        this.Proteome = new TrieDB(proteomeFile);
        this.GFFFile = GFFFile2;
        this.UnaccountedForModels = new ArrayList();
    }

    public TestInclusionOfProtein(String inputFile, String proteomeFile, String GFFFile2, String TEFile) {
        this(inputFile, proteomeFile, GFFFile2);
        this.TEFile = TEFile;
    }

    private void RunTest() {
        this.PopulateMissedModels();
        Utils.WaitForEnter();
        if (this.TEFile != null) {
            this.LoadTEs();
            this.DetermineOverlapWithTE();
        }
    }

    private void LoadTEs() {
        this.TECoords = new ArrayList();
        BufferedReader buf = null;
        String Line = null;
        try {
            buf = new BufferedReader(new FileReader(this.TEFile));
            Line = buf.readLine();
        }
        catch (IOException E) {
            E.printStackTrace();
            System.exit(-1);
        }
        while (Line != null) {
            if ((Line = Line.trim()).length() == 0 || Line.charAt(0) == '#') {
                try {
                    Line = buf.readLine();
                }
                catch (IOException E) {
                    E.printStackTrace();
                    System.exit(-1);
                }
                continue;
            }
            String[] Bits = Line.split("\t");
            String Name = Bits[0];
            int Chr = Integer.parseInt("" + Name.charAt(2));
            int Start = Integer.parseInt(Bits[2]);
            int End = Integer.parseInt(Bits[3]);
            int[] Coords = new int[]{Start, End, Chr};
            this.TECoords.add(Coords);
            try {
                Line = buf.readLine();
            }
            catch (IOException E) {
                E.printStackTrace();
                System.exit(-1);
            }
        }
        try {
            buf.close();
        }
        catch (IOException E) {
            E.printStackTrace();
            System.exit(-1);
        }
        System.out.println("Loaded " + this.TECoords.size() + " TEs");
    }

    private void DetermineOverlapWithTE() {
        boolean LocalDebug = true;
        this.OverlapWithTE = new ArrayList();
        int GeneCount = 0;
        int CMGeneCount = 0;
        BufferedReader buf = null;
        String Line = null;
        try {
            buf = new BufferedReader(new FileReader(this.GFFFile));
            Line = buf.readLine();
        }
        catch (IOException E) {
            E.printStackTrace();
            System.exit(-1);
        }
        while (Line != null) {
            if ((Line = Line.trim()).length() == 0 || Line.charAt(0) == '#') {
                try {
                    Line = buf.readLine();
                }
                catch (IOException E) {
                    E.printStackTrace();
                    System.exit(-1);
                }
                continue;
            }
            String[] Bits = Line.split("\t");
            if (Bits[2].compareTo("gene") == 0) {
                ++GeneCount;
                if (Bits[0].charAt(3) == 'C' || Bits[0].charAt(3) == 'M') {
                    ++CMGeneCount;
                    try {
                        Line = buf.readLine();
                    }
                    catch (IOException E) {
                        E.printStackTrace();
                        System.exit(-1);
                    }
                    continue;
                }
                int Chr = Integer.parseInt("" + Bits[0].charAt(3));
                String Name = Bits[8];
                if (this.UnaccountedForModels.contains(Name)) {
                    if (LocalDebug) {
                        System.out.println("Found coords for unaccounted for gene: " + Name);
                    }
                    int Start = Integer.parseInt(Bits[3]);
                    int End = Integer.parseInt(Bits[4]);
                    if (LocalDebug) {
                        System.out.println(" " + Start + "-" + End + " on chr " + Chr);
                    }
                    if (this.GeneHasOverlapWithTE(Start, End, Chr)) {
                        if (LocalDebug) {
                            System.out.println("He's got overlap with a TE!!");
                        }
                        this.OverlapWithTE.add(Name);
                    }
                }
            }
            try {
                Line = buf.readLine();
            }
            catch (IOException E) {
                E.printStackTrace();
                System.exit(-1);
            }
        }
        try {
            buf.close();
        }
        catch (IOException E) {
            E.printStackTrace();
            System.exit(-1);
        }
        System.out.println("Total overlapping TE: " + this.OverlapWithTE.size());
        System.out.println("Total genes in GFF: " + GeneCount);
        System.out.println("C and M genes: " + CMGeneCount);
    }

    private boolean GeneHasOverlapWithTE(int start, int end, int chr) {
        int i = 0;
        while (i < this.TECoords.size()) {
            int[] CurrTE = (int[])this.TECoords.get(i);
            if (chr == CurrTE[2] && Utils.HasOverlap(start, end, CurrTE[0], CurrTE[1])) {
                return true;
            }
            ++i;
        }
        return false;
    }

    private void PopulateMissedModels() {
        BufferedReader buf = null;
        String Line = null;
        try {
            buf = new BufferedReader(new FileReader(this.InputFile));
            Line = buf.readLine();
        }
        catch (IOException E) {
            E.printStackTrace();
            System.exit(-1);
        }
        int FoundCount = 0;
        ArrayList<String> ToSearch = new ArrayList<String>();
        ArrayList<String> GeneNames = new ArrayList<String>();
        Hashtable<String, Integer> FoundGeneNames = new Hashtable<String, Integer>();
        Hashtable<String, Integer> AllGeneNames = new Hashtable<String, Integer>();
        ArrayList<String> NotFoundProteins = new ArrayList<String>();
        while (Line != null) {
            if ((Line = Line.trim()).length() == 0 || Line.charAt(0) == '#') {
                try {
                    Line = buf.readLine();
                }
                catch (IOException E) {
                    E.printStackTrace();
                    System.exit(-1);
                }
                continue;
            }
            String[] Bits = Line.split("\t");
            ToSearch.add(Bits[3]);
            GeneNames.add(Bits[2]);
            if (ToSearch.size() >= 500) {
                System.out.println("Reached " + ToSearch.size() + " proteins..");
                Hashtable AllLocations = this.Proteome.GetAllLocations(Utils.ConvertArraylistToStringArray(ToSearch));
                int i = 0;
                while (i < ToSearch.size()) {
                    String CurrProtein = (String)ToSearch.get(i);
                    ArrayList Locations = (ArrayList)AllLocations.get(CurrProtein);
                    AllGeneNames.put((String)GeneNames.get(i), new Integer(1));
                    if (Locations == null || Locations.size() == 0) {
                        NotFoundProteins.add((String)GeneNames.get(i));
                        if (this.Debug) {
                            System.out.println("Not found!!");
                        }
                    } else {
                        ++FoundCount;
                        FoundGeneNames.put((String)GeneNames.get(i), new Integer(1));
                        if (this.Debug) {
                            System.out.println("Found!!");
                            int j = 0;
                            while (j < Locations.size()) {
                                Object[] Loc = (Object[])Locations.get(j);
                                System.out.println(" - " + (String)Loc[3]);
                                ++j;
                            }
                        }
                    }
                    ++i;
                }
                ToSearch.clear();
                GeneNames.clear();
            }
            try {
                Line = buf.readLine();
            }
            catch (IOException E) {
                E.printStackTrace();
                System.exit(-1);
            }
            if (!this.Debug) continue;
            Utils.WaitForEnter();
        }
        if (ToSearch.size() > 0) {
            System.out.println("Remaining " + ToSearch.size() + " proteins..");
            Hashtable AllLocations = this.Proteome.GetAllLocations(Utils.ConvertArraylistToStringArray(ToSearch));
            int i = 0;
            while (i < ToSearch.size()) {
                String CurrProtein = (String)ToSearch.get(i);
                ArrayList Locations = (ArrayList)AllLocations.get(CurrProtein);
                AllGeneNames.put((String)GeneNames.get(i), new Integer(1));
                if (Locations == null || Locations.size() == 0) {
                    NotFoundProteins.add((String)GeneNames.get(i));
                    if (this.Debug) {
                        System.out.println("Not found!!");
                    }
                } else {
                    ++FoundCount;
                    FoundGeneNames.put((String)GeneNames.get(i), new Integer(1));
                    if (this.Debug) {
                        System.out.println("Found!!");
                        int j = 0;
                        while (j < Locations.size()) {
                            Object[] Loc = (Object[])Locations.get(j);
                            System.out.println(" - " + (String)Loc[3]);
                            ++j;
                        }
                    }
                }
                ++i;
            }
        }
        System.out.println("Found " + FoundCount + "/" + (FoundCount + NotFoundProteins.size()) + " proteins in TAIR8!");
        System.out.println("Total of " + FoundGeneNames.size() + "/" + AllGeneNames.size() + " gene models found at least in part");
        try {
            buf.close();
        }
        catch (IOException E) {
            E.printStackTrace();
            System.exit(-1);
        }
        Enumeration E = AllGeneNames.keys();
        while (E.hasMoreElements()) {
            String Gene = (String)E.nextElement();
            if (FoundGeneNames.containsKey(Gene)) continue;
            this.UnaccountedForModels.add(Gene);
        }
        System.out.println("Total unaccounted for models: " + this.UnaccountedForModels.size());
    }

    public static void main(String[] args) {
        String GFFFile2;
        String ProteomeFile;
        String InputFile;
        String[] options = new String[]{"-r", "-t", "-g", "-d", "-e"};
        boolean[] blArray = new boolean[5];
        blArray[0] = true;
        blArray[1] = true;
        blArray[2] = true;
        blArray[4] = true;
        boolean[] values = blArray;
        Hashtable CommandLineArgs = Utils.ParseCommandLine(args, options, values);
        if (!(CommandLineArgs.containsKey("-r") && CommandLineArgs.containsKey("-t") && CommandLineArgs.containsKey("-g"))) {
            System.err.println("ERROR: Must specify an predicted protein file, a proteome file, and a gff file!");
            System.err.println(UsageInfo);
            System.exit(0);
        }
        if (!Utils.IsFile(InputFile = (String)CommandLineArgs.get("-r"))) {
            System.err.println("ERROR: Must specify a valid file for the input: " + InputFile);
            System.err.println(UsageInfo);
            System.exit(0);
        }
        if (!Utils.IsFile(ProteomeFile = (String)CommandLineArgs.get("-t"))) {
            System.err.println("ERROR: Must specify a valid proteome file: " + ProteomeFile);
            System.err.println(UsageInfo);
            System.exit(0);
        }
        if (!Utils.IsFile(GFFFile2 = (String)CommandLineArgs.get("-g"))) {
            System.err.println("ERROR: Must specify a valid gff file: " + GFFFile2);
            System.err.println(UsageInfo);
            System.exit(0);
        }
        TestInclusionOfProtein Tester2 = null;
        if (CommandLineArgs.containsKey("-e")) {
            String TEFile = (String)CommandLineArgs.get("-e");
            if (!Utils.IsFile(TEFile)) {
                System.err.println("ERROR: Must specify a valid gff file: " + TEFile);
                System.err.println(UsageInfo);
                System.exit(0);
            }
            Tester2 = new TestInclusionOfProtein(InputFile, ProteomeFile, GFFFile2, TEFile);
        } else {
            Tester2 = new TestInclusionOfProtein(InputFile, ProteomeFile, GFFFile2);
        }
        if (CommandLineArgs.containsKey("-d")) {
            Tester2.Debug = true;
        }
        Tester2.RunTest();
    }
}

