package com.compomics.marc;

import com.compomics.util.experiment.annotation.go.GOFactory;
import com.compomics.util.experiment.identification.SequenceFactory;
import com.compomics.util.protein.Header;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;

/**
 *
 * @author Marc
 */
public class ProteinAnnotation {

    private static final String separator = "\t";

    private SequenceFactory sequenceFactory = SequenceFactory.getInstance(1000000);

    private GOFactory goFactory = GOFactory.getInstance();

    public static void main(String[] args) {
        ProteinAnnotation m = new ProteinAnnotation();

        try {
            m.loadDb();
            System.out.println("Database imported");
            m.annotateProteinGroupsWithAccessions();
            System.out.println("annotation finished");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void loadDb() throws IOException, FileNotFoundException, ClassNotFoundException {
        File fastaFile = new File("D:\\databases\\uniprot_eukaryota_reviewed_pyrococcus_furiosus_21.10.13_concatenated_target_decoy.fasta");
        sequenceFactory.loadFastaFile(fastaFile, null);
    }

    public void loadGene() throws IOException, FileNotFoundException, ClassNotFoundException {
        File mappingFile = new File("D:\\projects\\heidrun TMT\\mmusculus_gene_ensembl_go_mappings");
        goFactory.initialize(mappingFile, null);
    }

    /**
     * Indicates whether the proteins found are loaded in the sequence factory
     */
    public void annotateProteinGroupsWithAccessions() throws FileNotFoundException, IOException, InterruptedException, ClassNotFoundException {
        File inFile = new File("D:\\projects\\denovoGUI\\pfu\\5 05\\tags only psms.txt");
        File outFile = new File("D:\\projects\\denovoGUI\\pfu\\5 05\\tags only psms annotated.txt");
        BufferedReader br = new BufferedReader(new FileReader(inFile));
        try {
            BufferedWriter bw = new BufferedWriter(new FileWriter(outFile));
            try {
                String line = br.readLine();
                bw.write(line + separator + "entrapment");
                bw.newLine();
                while ((line = br.readLine()) != null) {
                    if (!(line = line.trim()).equals("")) {
                        String[] lineContent = line.split(separator);
                        String accessions = lineContent[1];
                        if (!accessions.equals("")) {
                            String[] split = accessions.split(", ");
                            boolean entrapment = true;
                            for (String accession : split) {
                                Header header = sequenceFactory.getHeader(accession);
                                String species = header.getTaxonomy();
                                String description = header.getDescription();
                                if (species.startsWith("Pyrococcus furiosus") || description.contains("keratin") || description.contains("trypsin")) {
                                    entrapment = false;
                                    break;
                                }
                            }
                            bw.write(line);
                            if (entrapment) {
                                bw.write(separator + "1");
                            } else {
                                bw.write(separator + "0");
                            }
                            bw.newLine();
                        }
                    }
                }
            } finally {
                bw.close();
            }
        } finally {
            br.close();
        }
    }

    /**
     * Adds gene information to the protein report
     *
     * @throws FileNotFoundException
     * @throws IOException
     * @throws InterruptedException
     * @throws ClassNotFoundException
     */
    public void annotateProteinGroupsWithGenes() throws FileNotFoundException, IOException, InterruptedException, ClassNotFoundException {
        File inFile = new File("D:\\projects\\heidrun TMT\\accessions.txt");
        File outFile = new File("D:\\projects\\heidrun TMT\\annotated.txt");
        BufferedReader br = new BufferedReader(new FileReader(inFile));
        try {
            BufferedWriter bw = new BufferedWriter(new FileWriter(outFile));
            try {
                bw.write("Accession(s)" + separator + "name(s)" + separator + "Gene(s)" + separator + "description(s)" + separator + "GO");
                bw.newLine();
                String line;
                while ((line = br.readLine()) != null) {
                    if (!(line = line.trim()).equals("")) {
                        String[] split = line.split(", ");
                        String description = "";
                        String name = "";
                        String genes = "";
                        ArrayList<String> goAccessions = new ArrayList<String>();
                        for (String accession : split) {
                            if (!description.equals("")) {
                                description += ", ";
                                name += ", ";
                                genes += ", ";
                            }
                            Header header = sequenceFactory.getHeader(accession);
                            if (header == null) {
                                throw new IllegalArgumentException("Header not found for accession " + accession + ".");
                            }
                            name += header.getDescriptionProteinName();
                            description += header.getDescription();
                            String geneName = header.getGeneName();
                            if (geneName != null) {
                                genes += geneName;
                            }
                            for (String goAccession : goFactory.getGoAccessions(accession)) {
                                if (!goAccessions.contains(goAccession)) {
                                    goAccessions.add(goAccession);
                                }
                            }
                        }
                        String go = "";
                        Collections.sort(goAccessions);
                        for (String goAccession : goAccessions) {
                            if (!go.equals("")) {
                                go += ", ";
                            }
                            String goDescription = goFactory.getTermDescription(goAccession);
                            go += goDescription;
                        }
                        bw.write(line + separator + name + separator + genes + separator + description + separator + go);
                        bw.newLine();
                    }
                }
            } finally {
                bw.close();
            }
        } finally {
            br.close();
        }
    }

}
