public class ProteinTree
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
class |
ProteinTree.PeptideIterator
Alphabetical iterator for the tree.
|
protected class |
ProteinTree.RawNodeProcessor
Runnable used to process raw nodes and store them in the database.
|
protected class |
ProteinTree.SequenceIndexer
Runnable used for the indexing of a protein sequence.
|
Modifier and Type | Field and Description |
---|---|
protected static long |
cacheScale
Approximate number of accession*node one can store in a GB of memory
(empirical value).
|
protected int |
cacheSize
Size of the cache of the most queried peptides.
|
protected ProteinTreeComponentsFactory |
componentsFactory
The node factory when operating in indexed mode.
|
static boolean |
debugSpeed
Indicates whether a debug file with speed metrics shall be created.
|
protected java.io.BufferedWriter |
debugSpeedWriter
The writer used to send the output to a debug file.
|
protected java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>>>> |
lastQueriedPeptidesCache
Cache of the last queried peptides.
|
protected java.util.ArrayList<java.lang.String> |
lastQueriedPeptidesCacheContent
Peptide sequences in cache.
|
protected java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>>>> |
lastSlowQueriedPeptidesCache
Cache of the last queried peptides where the query took long.
|
protected java.util.ArrayList<java.lang.String> |
lastSlowQueriedPeptidesCacheContent
Peptide sequences in slow cache.
|
protected boolean |
listening
indicates whether the main thread is listening or preparing to wait
|
protected java.lang.Double |
massToleranceInCache
The mass tolerance of the matches in cache.
|
protected ProteinMatch.MatchingType |
matchingTypeInCache
The matching type of the matches in cache.
|
protected int |
memoryAllocation
The memory allocation in MB.
|
protected int |
queryTimeThreshold
Time in ms after which a query is considered as slow.
|
protected SequenceFactory |
sequenceFactory
Instance of the sequence factory.
|
protected java.util.ArrayList<java.lang.String> |
tagsInTree
List of the nodes in tree.
|
protected java.util.concurrent.ConcurrentHashMap<java.lang.String,Node> |
tree
The tree containing the accessions indexed by sequence tags.
|
protected long |
treeSize
The size of the tree in memory in accession*node.
|
static java.lang.String |
version
The version of the protein tree.
|
Constructor and Description |
---|
ProteinTree(int memoryAllocation)
Creates a tree based on the proteins present in the sequence factory.
|
Modifier and Type | Method and Description |
---|---|
void |
close()
Closes all connections to files.
|
void |
emptyCache()
Empties the cache.
|
int |
getCacheSize()
Returns the size of the cache used for peptide mappings (note that there
are two of them).
|
protected java.util.ArrayList<java.lang.String> |
getInitialTags(java.lang.String peptideSequence,
ProteinMatch.MatchingType matchingType,
java.lang.Double massTolerance)
Returns a list of possible initial tags.
|
java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>> |
getMatchedPeptideSequences(java.lang.String peptideSequence,
java.lang.String proteinAccession,
ProteinMatch.MatchingType matchingType,
java.lang.Double massTolerance)
Returns a list of peptides matched using the given peptide sequence in
the given protein according the provided matching settings.
|
protected Node |
getNode(java.lang.String tag)
Returns a node related to a tag and updates the cache.
|
ProteinTree.PeptideIterator |
getPeptideIterator()
Returns a PeptideIterator which iterates alphabetically all peptides
corresponding to the end of a branch in the tree.
|
java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>> |
getProteinMapping(java.lang.String peptideSequence)
Returns the protein mapping in the sequence factory for the given peptide
sequence based on string matching only.
|
java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>>> |
getProteinMapping(java.lang.String peptideSequence,
ProteinMatch.MatchingType matchingType,
java.lang.Double massTolerance)
Returns the protein mapping in the sequence factory for the given peptide
sequence.
|
protected java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>>> |
getProteinMapping(java.lang.String peptideSequence,
ProteinMatch.MatchingType matchingType,
java.lang.Double massTolerance,
boolean reversed)
Returns the protein mapping in the sequence factory for the given peptide
sequence.
|
protected java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>>> |
getReversedResults(java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>>> forwardResults)
Reverts the indexes and the protein accessions of the given mapping.
|
protected void |
importDb(int initialTagSize,
int maxNodeSize,
int maxPeptideSize,
Enzyme enzyme,
WaitingHandler waitingHandler,
boolean printExpectedImportTime,
boolean displayProgress)
Imports the db which is in the sequence factory into the tree and saves
it in the nodeFactory.
|
void |
initiateTree(int initialTagSize,
int maxNodeSize,
int maxPeptideSize,
Enzyme enzyme,
WaitingHandler waitingHandler,
boolean printExpectedImportTime,
boolean displayProgress)
Initiates the tree.
|
void |
initiateTree(int initialTagSize,
int maxNodeSize,
int maxPeptideSize,
WaitingHandler waitingHandler,
boolean printExpectedImportTime,
boolean displayProgress)
Initiates the tree.
|
protected void |
loadTags(java.util.ArrayList<java.lang.String> tags,
java.util.ArrayList<java.lang.String> accessions,
WaitingHandler waitingHandler,
int initialTagSize,
int maxNodeSize,
int maxPeptideSize,
Enzyme enzyme,
java.util.ArrayList<java.lang.String> loadedAccessions,
boolean displayProgress)
Loads the tags found in the given proteins in the tree and saves the end
nodes in the NodeFactory if not null.
|
void |
setCacheSize(int cacheSize)
Sets the size of the cache used for peptide mappings (note that there are
two of them).
|
protected int memoryAllocation
protected static final long cacheScale
protected SequenceFactory sequenceFactory
protected java.util.concurrent.ConcurrentHashMap<java.lang.String,Node> tree
protected java.util.ArrayList<java.lang.String> tagsInTree
protected long treeSize
public static boolean debugSpeed
protected java.io.BufferedWriter debugSpeedWriter
protected ProteinTreeComponentsFactory componentsFactory
protected int cacheSize
protected java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>>>> lastQueriedPeptidesCache
protected java.util.ArrayList<java.lang.String> lastQueriedPeptidesCacheContent
protected int queryTimeThreshold
protected java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>>>> lastSlowQueriedPeptidesCache
protected java.util.ArrayList<java.lang.String> lastSlowQueriedPeptidesCacheContent
public static final java.lang.String version
protected ProteinMatch.MatchingType matchingTypeInCache
protected java.lang.Double massToleranceInCache
protected boolean listening
public ProteinTree(int memoryAllocation) throws java.io.IOException
memoryAllocation
- the number of MB available for the tree in
memory.java.io.IOException
public void initiateTree(int initialTagSize, int maxNodeSize, int maxPeptideSize, WaitingHandler waitingHandler, boolean printExpectedImportTime, boolean displayProgress) throws java.io.IOException, java.lang.IllegalArgumentException, java.lang.InterruptedException, java.lang.ClassNotFoundException, java.sql.SQLException
initialTagSize
- the initial tag sizemaxNodeSize
- the maximal size of a node. large nodes will be fast
to initiate but slow to query. I typically use 500 giving an approximate
query time <20ms.maxPeptideSize
- the maximum peptide sizewaitingHandler
- the waiting handler used to display progress to the
user. Can be null but strongly recommended :)printExpectedImportTime
- if true the expected import time will be
printed to the waiting handlerdisplayProgress
- display progressjava.io.IOException
java.lang.IllegalArgumentException
java.lang.InterruptedException
java.lang.ClassNotFoundException
java.sql.SQLException
public void initiateTree(int initialTagSize, int maxNodeSize, int maxPeptideSize, Enzyme enzyme, WaitingHandler waitingHandler, boolean printExpectedImportTime, boolean displayProgress) throws java.io.IOException, java.lang.IllegalArgumentException, java.lang.InterruptedException, java.io.IOException, java.lang.IllegalArgumentException, java.lang.InterruptedException, java.lang.ClassNotFoundException, java.sql.SQLException
initialTagSize
- the initial size of peptide tag. Large initial size
are fast to query, low initial size are fast to initiate. I typically use
3 for databases containing less than 100 000 proteins giving an
approximate initiation time of 60ms per accession.maxNodeSize
- the maximal size of a node. large nodes will be fast
to initiate but slow to query. I typically use 500 giving an approximate
query time <20ms.maxPeptideSize
- the maximum peptide sizeenzyme
- the enzyme used to select peptides. If null all possible
peptides will be indexedwaitingHandler
- the waiting handler used to display progress to the
user. Can be null.printExpectedImportTime
- if true the expected import time will be
printed to the waiting handlerdisplayProgress
- display progressjava.io.IOException
java.lang.IllegalArgumentException
java.lang.InterruptedException
java.lang.ClassNotFoundException
java.sql.SQLException
protected void importDb(int initialTagSize, int maxNodeSize, int maxPeptideSize, Enzyme enzyme, WaitingHandler waitingHandler, boolean printExpectedImportTime, boolean displayProgress) throws java.io.IOException, java.lang.IllegalArgumentException, java.lang.InterruptedException, java.io.IOException, java.lang.IllegalArgumentException, java.lang.InterruptedException, java.lang.ClassNotFoundException, java.sql.SQLException
initialTagSize
- the initial size of peptide tag. Large initial size
are slow to query, low initial size are slow to initiate. I typically use
3 for databases containing less than 100 000 proteins.maxNodeSize
- the maximal size of a node. large nodes will be fast
to initiate but slow to query. I typically use 5000.maxPeptideSize
- the maximum peptide sizeenzyme
- the enzyme used to select peptides. If null all possible
peptides will be indexedwaitingHandler
- the waiting handler used to display progress to the
user. Can be null.printExpectedImportTime
- if true the expected import time will be
printed to the waiting handlerjava.io.IOException
java.lang.IllegalArgumentException
java.lang.InterruptedException
java.lang.ClassNotFoundException
java.sql.SQLException
protected void loadTags(java.util.ArrayList<java.lang.String> tags, java.util.ArrayList<java.lang.String> accessions, WaitingHandler waitingHandler, int initialTagSize, int maxNodeSize, int maxPeptideSize, Enzyme enzyme, java.util.ArrayList<java.lang.String> loadedAccessions, boolean displayProgress) throws java.io.IOException, java.lang.IllegalArgumentException, java.lang.InterruptedException, java.lang.ClassNotFoundException, java.sql.SQLException
tags
- the tags of interestaccessions
- the accessions of the proteins of interestwaitingHandler
- waiting handler displaying progress to the user -
can be nullenzyme
- the enzyme restrictionsaveLength
- boolean indicating whether the length of the proteins
shall be saved (mandatory when computing reverse indexes on the fly)loadedAccessions
- the accessions already loaded in the factoryjava.io.IOException
java.lang.IllegalArgumentException
java.lang.InterruptedException
java.lang.ClassNotFoundException
java.sql.SQLException
public java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>> getProteinMapping(java.lang.String peptideSequence) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException, java.sql.SQLException
peptideSequence
- the peptide sequencejava.io.IOException
java.lang.InterruptedException
java.lang.ClassNotFoundException
java.sql.SQLException
public java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>>> getProteinMapping(java.lang.String peptideSequence, ProteinMatch.MatchingType matchingType, java.lang.Double massTolerance) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException, java.sql.SQLException
peptideSequence
- the peptide sequencematchingType
- the matching typemassTolerance
- the mass tolerance for matching type
'indistiguishibleAminoAcids'. Can be null otherwisejava.io.IOException
java.lang.InterruptedException
java.lang.ClassNotFoundException
java.sql.SQLException
protected java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>>> getProteinMapping(java.lang.String peptideSequence, ProteinMatch.MatchingType matchingType, java.lang.Double massTolerance, boolean reversed) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException, java.sql.SQLException
peptideSequence
- the peptide sequencereversed
- boolean indicating whether we are looking at a reversed
peptide sequencematchingType
- the matching typemassTolerance
- the mass tolerance for matching type
'indistiguishibleAminoAcids'. Can be null otherwisejava.io.IOException
java.lang.InterruptedException
java.lang.ClassNotFoundException
java.sql.SQLException
protected java.util.ArrayList<java.lang.String> getInitialTags(java.lang.String peptideSequence, ProteinMatch.MatchingType matchingType, java.lang.Double massTolerance) throws java.sql.SQLException, java.io.IOException, java.lang.ClassNotFoundException
peptideSequence
- the peptide sequencematchingType
- the matching typemassTolerance
- the mass tolerance for matching type
'indistiguishibleAminoAcids'. Can be null otherwise.java.sql.SQLException
java.io.IOException
java.lang.ClassNotFoundException
protected java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>>> getReversedResults(java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>>> forwardResults) throws java.sql.SQLException, java.lang.ClassNotFoundException, java.io.IOException
forwardResults
- the given mappingpeptideSequence
- the sequence of interestjava.sql.SQLException
java.lang.ClassNotFoundException
java.io.IOException
protected Node getNode(java.lang.String tag) throws java.sql.SQLException, java.lang.ClassNotFoundException, java.io.IOException
tag
- the tag of interestjava.sql.SQLException
java.lang.ClassNotFoundException
java.io.IOException
public void close() throws java.io.IOException, java.sql.SQLException
java.io.IOException
java.sql.SQLException
public int getCacheSize()
public void setCacheSize(int cacheSize)
cacheSize
- the size of the cache used for peptide mappingspublic void emptyCache()
public java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.Integer>> getMatchedPeptideSequences(java.lang.String peptideSequence, java.lang.String proteinAccession, ProteinMatch.MatchingType matchingType, java.lang.Double massTolerance) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException, java.sql.SQLException
peptideSequence
- the original peptide sequenceproteinAccession
- the accession of the protein of interestmatchingType
- the matching typemassTolerance
- the mass tolerance for indistinguishable amino acids
matching modejava.io.IOException
java.lang.InterruptedException
java.sql.SQLException
java.lang.ClassNotFoundException
public ProteinTree.PeptideIterator getPeptideIterator() throws java.sql.SQLException, java.io.IOException, java.lang.ClassNotFoundException
java.sql.SQLException
java.io.IOException
java.lang.ClassNotFoundException
Copyright © 2013. All Rights Reserved.