Neff Computation. More...
#include "flagHandler.h"#include "msaReader.h"#include "msaWriter.h"#include "multimerHandler.h"#include <iostream>#include <vector>#include <string>#include <unordered_map>#include <cmath>#include <sstream>#include <climits>#include "common.h"#include <fstream>#include <algorithm>#include <tuple>#include <random>#include <set>#include <thread>#include <future>#include <mutex>#include <chrono>Functions | |
| int | char2num (char c, const string &standardLetters, const string &nonStandardLetters, NonStandardHandler nonStandardOption) |
| Map char residues to digit based on given 'nonStandardOption'. | |
| void | removeGappyPositions (vector< vector< int > > &sequences, float gapCutoff) |
| Remove gappy positions from sequences based on given 'gapCutoff'. | |
| vector< vector< int > > | processSequences (vector< Sequence > sequences, string standardLetters, string nonStandardLetters, NonStandardHandler nonStandardOption, float gapCutoff) |
| Map chars to digits based on provided 'nonStandardOption' and also remove gappy positions based on given 'gapCutoff'. | |
| vector< int > | computeWeights (vector< vector< int > > sequences, float threshold, bool isSymmetric, string standardLetters, NonStandardHandler nonStandardOption) |
| Compute sequence weights based on given options. | |
| float | computeNeff (vector< int > sequenceWeights, Normalization norm, int length) |
| Cumpote NEFF values based on sequence weights and given normalization. | |
| Alphabet | getAlphabet (FlagHandler &flagHandler) |
| Get given alphabet by user. | |
| Normalization | getNormalization (FlagHandler &flagHandler) |
| Get given normalization option by user. | |
| NonStandardHandler | getNonStandardOption (FlagHandler &flagHandler) |
| Get given non_standard_option option by user. | |
| void | checkFlags (FlagHandler &flagHandler) |
| Check flags | |
| void | setDepth (vector< Sequence > &sequences, int depth) |
| Set MSA depth based on 'depth' flag. | |
| int | getNonGapStartPosition (string firstAlignement, int startPos) |
| Get the index of the given startPos in the original first sequence of MSA despite of gaps in the MSA. | |
| int | getNonGapEndPosition (string firstAlignement, int startPos, int length) |
| Get the end index in the original sequence after a 'length' number of non-gap positions. | |
| void | getPositions (vector< Sequence > &sequences, FlagHandler flagHandler) |
| Set desired positiones to compute NEFF for based on given 'pos_start' and 'pos_end' flags. | |
| void | integrateUniqueSequences (vector< Sequence > &integratedSequences, const vector< Sequence > &sequences) |
| to merge sequences and remove redundant sequences | |
| vector< float > | computeResidueNEFF (const vector< vector< int > > &sequences, const vector< int > &sequenceWeights, Normalization norm) |
| Compute per-residue (column-wise) NEFF. | |
| int | main (int argc, char **argv) |
Variables | |
| const char * | docstr |
| unordered_map< string, FlagInfo > | Flags |
Neff Computation.
This program computes Number of EFFective sequences (NEFF) for a multiple sequence alignment (MSA) file. NEFF is a measure of effective sequence number that takes into account the redundancy and similarity of sequences in the MSA. It is commonly used in bioinformatics to assess the diversity of a set of sequences.
Usage: ./neff –file=<input_file> [options]
Options: –file=<input_file> Input files (comma-separated, no spaces) containing multiple sequence alignments (required)
"
--format=<input_format> Input file formats (comma-separated, no spaces) containing formats of multiple sequence alignments (optional)\n" –alphabet=
Valid alphabet of MSA; alphabet option (0: Protein, 1: RNA, 2: DNA) (default: 0)
"
--check_validation=<true/false> Perform validation on sequences (default: false)\n" –threshold=
Threshold value of considering two sequences similar (default: 0.8)
"
--norm=<value> NEFF normalization option (0: sqrt(Length of alignment), 1: Length of alignment, 2: No normalization) (default: 0)\n" –omit_query_gaps=<true/false> Omit gap positions of query sequence from all sequences for NEFF computation (default: true)
"
--is_symmetric=<true/false> Consider gaps in similarity cutoff computation (asymmetric) or not (symmetric) (default: true)\n" –non_standard_option=
Handling non-standard letters in the given alphabet (0: AsStandard, 1: ConsiderGapInCutoff, 2: ConsiderGap)
"
--depth=<value> Depth of MSA to be cosidered in computation (default: depth of given MSA)\n" –gap_cutoff=
Cutoff value for removing gappy positions, when #gaps in position >= gap_cutoff (default=1 : does not remove anything)
"
--pos_start=<value> Start position of each sequence to be considered in neff (inclusive (default: 1))\n" –pos_end=
Last position of each sequence to be considered in neff (inclusive (default: length of MSA sequence))
"
--only_weights=<true/false> Return only sequence weights, as # similar sequence, rather than the final NEFF (default: false)\n" –multimer_MSA=<true/false> Compute NEFF for a multimer MSA (default: false)
"
–stoichiom=<value> Multimer stoichiometry (default: empty)
–chain_length=<list of values> Length of the chains in heteromer multimer (default: 0)\n" –residue_neff=<true/false> Compute per-resiue (column-wise) NEFF (default: false)
For more comprehensive instructions, please refer to the documentation at https://maryam-haghani.github.io/NEFFy.
| int char2num | ( | char | c, |
| const string & | standardLetters, | ||
| const string & | nonStandardLetters, | ||
| NonStandardHandler | nonStandardOption ) |
Map char residues to digit based on given 'nonStandardOption'.
| c | input letter |
| standardLetters | |
| nonStandardLetters | |
| nonStandardOption |
| void checkFlags | ( | FlagHandler & | flagHandler | ) |
Check flags
| flagHandler |
| float computeNeff | ( | vector< int > | sequenceWeights, |
| Normalization | norm, | ||
| int | length ) |
Cumpote NEFF values based on sequence weights and given normalization.
| sequenceWeights | |
| norm | |
| sequenceLength |
| vector< float > computeResidueNEFF | ( | const vector< vector< int > > & | sequences, |
| const vector< int > & | sequenceWeights, | ||
| Normalization | norm ) |
Compute per-residue (column-wise) NEFF.
| sequences | |
| sequenceWeights | |
| norm |
| vector< int > computeWeights | ( | vector< vector< int > > | sequences, |
| float | threshold, | ||
| bool | isSymmetric, | ||
| string | standardLetters, | ||
| NonStandardHandler | nonStandardOption ) |
Compute sequence weights based on given options.
| sequences | |
| threshold | |
| norm | |
| isSymmetric | |
| nonStandardOption |
| Alphabet getAlphabet | ( | FlagHandler & | flagHandler | ) |
Get given alphabet by user.
| flagHandler |
| int getNonGapEndPosition | ( | string | firstAlignement, |
| int | startPos, | ||
| int | length ) |
Get the end index in the original sequence after a 'length' number of non-gap positions.
| firstAlignement | |
| startPos | |
| length |
| int getNonGapStartPosition | ( | string | firstAlignement, |
| int | startPos ) |
Get the index of the given startPos in the original first sequence of MSA despite of gaps in the MSA.
| firstAlignement | |
| startPos |
| NonStandardHandler getNonStandardOption | ( | FlagHandler & | flagHandler | ) |
Get given non_standard_option option by user.
| flagHandler |
| Normalization getNormalization | ( | FlagHandler & | flagHandler | ) |
Get given normalization option by user.
| flagHandler |
| void getPositions | ( | vector< Sequence > & | sequences, |
| FlagHandler | flagHandler ) |
Set desired positiones to compute NEFF for based on given 'pos_start' and 'pos_end' flags.
| sequences | |
| flagHandler |
| void integrateUniqueSequences | ( | vector< Sequence > & | integratedSequences, |
| const vector< Sequence > & | sequences ) |
to merge sequences and remove redundant sequences
| integratedSequences | |
| sequences |
| int main | ( | int | argc, |
| char ** | argv ) |
omit gap positions of query sequence in all sequences if omitGapsInQuery=true
| vector< vector< int > > processSequences | ( | vector< Sequence > | sequences, |
| string | standardLetters, | ||
| string | nonStandardLetters, | ||
| NonStandardHandler | nonStandardOption, | ||
| float | gapCutoff ) |
Map chars to digits based on provided 'nonStandardOption' and also remove gappy positions based on given 'gapCutoff'.
| omitGapsInQuery | |
| alphabet | |
| nonStandardOption | |
| gapCutoff |
| void removeGappyPositions | ( | vector< vector< int > > & | sequences, |
| float | gapCutoff ) |
Remove gappy positions from sequences based on given 'gapCutoff'.
| sequences | |
| gapCutoff |
| void setDepth | ( | vector< Sequence > & | sequences, |
| int | depth ) |
Set MSA depth based on 'depth' flag.
| sequences | |
| flagHandler |
| const char* docstr |
| unordered_map<string, FlagInfo> Flags |