Neff Computation. More...
#include "flagHandler.h"#include "msaReader.h"#include "msaWriter.h"#include "multimerHandler.h"#include <iostream>#include <vector>#include <string>#include <unordered_map>#include <cmath>#include <sstream>#include <climits>#include "common.h"#include <fstream>#include <algorithm>#include <tuple>#include <random>#include <set>#include <thread>#include <future>#include <mutex>#include <chrono>| Functions | |
| int | char2num (char c, const string &standardLetters, const string &nonStandardLetters, NonStandardHandler nonStandardOption) | 
| Map char residues to digit based on given 'nonStandardOption'. | |
| void | removeGappyPositions (vector< vector< int > > &sequences, float gapCutoff) | 
| Remove gappy positions from sequences based on given 'gapCutoff'. | |
| vector< vector< int > > | processSequences (vector< Sequence > sequences, string standardLetters, string nonStandardLetters, NonStandardHandler nonStandardOption, float gapCutoff) | 
| Map chars to digits based on provided 'nonStandardOption' and also remove gappy positions based on given 'gapCutoff'. | |
| vector< int > | computeWeights (vector< vector< int > > sequences, float threshold, bool isSymmetric, string standardLetters, NonStandardHandler nonStandardOption) | 
| Compute sequence weights based on given options. | |
| float | computeNeff (vector< int > sequenceWeights, Normalization norm, int length) | 
| Cumpote NEFF values based on sequence weights and given normalization. | |
| Alphabet | getAlphabet (FlagHandler &flagHandler) | 
| Get given alphabet by user. | |
| Normalization | getNormalization (FlagHandler &flagHandler) | 
| Get given normalization option by user. | |
| NonStandardHandler | getNonStandardOption (FlagHandler &flagHandler) | 
| Get given non_standard_option option by user. | |
| void | checkFlags (FlagHandler &flagHandler) | 
| Check flags | |
| void | setDepth (vector< Sequence > &sequences, int depth) | 
| Set MSA depth based on 'depth' flag. | |
| int | getNonGapStartPosition (string firstAlignement, int startPos) | 
| Get the index of the given startPos in the original first sequence of MSA despite of gaps in the MSA. | |
| int | getNonGapEndPosition (string firstAlignement, int startPos, int length) | 
| Get the end index in the original sequence after a 'length' number of non-gap positions. | |
| void | getPositions (vector< Sequence > &sequences, FlagHandler flagHandler) | 
| Set desired positiones to compute NEFF for based on given 'pos_start' and 'pos_end' flags. | |
| void | integrateUniqueSequences (vector< Sequence > &integratedSequences, const vector< Sequence > &sequences) | 
| to merge sequences and remove redundant sequences | |
| vector< float > | computeResidueNEFF (const vector< vector< int > > &sequences, const vector< int > &sequenceWeights, Normalization norm) | 
| Compute per-residue (column-wise) NEFF. | |
| int | main (int argc, char **argv) | 
| Variables | |
| const char * | docstr | 
| unordered_map< string, FlagInfo > | Flags | 
Neff Computation.
This program computes Number of EFFective sequences (NEFF) for a multiple sequence alignment (MSA) file. NEFF is a measure of effective sequence number that takes into account the redundancy and similarity of sequences in the MSA. It is commonly used in bioinformatics to assess the diversity of a set of sequences.
Usage: ./neff –file=<input_file> [options]
Options: –file=<input_file> Input files (comma-separated, no spaces) containing multiple sequence alignments (required)
"
  --format=<input_format>           Input file formats (comma-separated, no spaces) containing formats of multiple sequence alignments (optional)\n" –alphabet=
Valid alphabet of MSA; alphabet option (0: Protein, 1: RNA, 2: DNA) (default: 0)
"
  --check_validation=<true/false>   Perform validation on sequences (default: false)\n" –threshold=
Threshold value of considering two sequences similar (default: 0.8)
"
  --norm=<value>                    NEFF normalization option (0: sqrt(Length of alignment), 1: Length of alignment, 2: No normalization) (default: 0)\n" –omit_query_gaps=<true/false> Omit gap positions of query sequence from all sequences for NEFF computation (default: true)
"
  --is_symmetric=<true/false>       Consider gaps in similarity cutoff computation (asymmetric) or not (symmetric) (default: true)\n" –non_standard_option=
Handling non-standard letters in the given alphabet (0: AsStandard, 1: ConsiderGapInCutoff, 2: ConsiderGap)
"
  --depth=<value>                   Depth of MSA to be cosidered in computation (default: depth of given MSA)\n" –gap_cutoff=
Cutoff value for removing gappy positions, when #gaps in position >= gap_cutoff (default=1 : does not remove anything)
"
  --pos_start=<value>               Start position of each sequence to be considered in neff (inclusive (default: 1))\n" –pos_end=
Last position of each sequence to be considered in neff (inclusive (default: length of MSA sequence))
"
  --only_weights=<true/false>       Return only sequence weights, as # similar sequence, rather than the final NEFF (default: false)\n" –multimer_MSA=<true/false> Compute NEFF for a multimer MSA (default: false)
"
  –stoichiom=<value>               Multimer stoichiometry (default: empty)
  –chain_length=<list of values>   Length of the chains in heteromer multimer (default: 0)\n" –residue_neff=<true/false> Compute per-resiue (column-wise) NEFF (default: false)
For more comprehensive instructions, please refer to the documentation at https://maryam-haghani.github.io/NEFFy.
| int char2num | ( | char | c, | 
| const string & | standardLetters, | ||
| const string & | nonStandardLetters, | ||
| NonStandardHandler | nonStandardOption ) | 
Map char residues to digit based on given 'nonStandardOption'.
| c | input letter | 
| standardLetters | |
| nonStandardLetters | |
| nonStandardOption | 
| void checkFlags | ( | FlagHandler & | flagHandler | ) | 
Check flags 
 
| flagHandler | 
| float computeNeff | ( | vector< int > | sequenceWeights, | 
| Normalization | norm, | ||
| int | length ) | 
Cumpote NEFF values based on sequence weights and given normalization.
| sequenceWeights | |
| norm | |
| sequenceLength | 
| vector< float > computeResidueNEFF | ( | const vector< vector< int > > & | sequences, | 
| const vector< int > & | sequenceWeights, | ||
| Normalization | norm ) | 
Compute per-residue (column-wise) NEFF.
| sequences | |
| sequenceWeights | |
| norm | 
| vector< int > computeWeights | ( | vector< vector< int > > | sequences, | 
| float | threshold, | ||
| bool | isSymmetric, | ||
| string | standardLetters, | ||
| NonStandardHandler | nonStandardOption ) | 
Compute sequence weights based on given options.
| sequences | |
| threshold | |
| norm | |
| isSymmetric | |
| nonStandardOption | 
| Alphabet getAlphabet | ( | FlagHandler & | flagHandler | ) | 
Get given alphabet by user.
| flagHandler | 
| int getNonGapEndPosition | ( | string | firstAlignement, | 
| int | startPos, | ||
| int | length ) | 
Get the end index in the original sequence after a 'length' number of non-gap positions.
| firstAlignement | |
| startPos | |
| length | 
| int getNonGapStartPosition | ( | string | firstAlignement, | 
| int | startPos ) | 
Get the index of the given startPos in the original first sequence of MSA despite of gaps in the MSA.
| firstAlignement | |
| startPos | 
| NonStandardHandler getNonStandardOption | ( | FlagHandler & | flagHandler | ) | 
Get given non_standard_option option by user.
| flagHandler | 
| Normalization getNormalization | ( | FlagHandler & | flagHandler | ) | 
Get given normalization option by user.
| flagHandler | 
| void getPositions | ( | vector< Sequence > & | sequences, | 
| FlagHandler | flagHandler ) | 
Set desired positiones to compute NEFF for based on given 'pos_start' and 'pos_end' flags.
| sequences | |
| flagHandler | 
| void integrateUniqueSequences | ( | vector< Sequence > & | integratedSequences, | 
| const vector< Sequence > & | sequences ) | 
to merge sequences and remove redundant sequences
| integratedSequences | |
| sequences | 
| int main | ( | int | argc, | 
| char ** | argv ) | 
omit gap positions of query sequence in all sequences if omitGapsInQuery=true
| vector< vector< int > > processSequences | ( | vector< Sequence > | sequences, | 
| string | standardLetters, | ||
| string | nonStandardLetters, | ||
| NonStandardHandler | nonStandardOption, | ||
| float | gapCutoff ) | 
Map chars to digits based on provided 'nonStandardOption' and also remove gappy positions based on given 'gapCutoff'.
| omitGapsInQuery | |
| alphabet | |
| nonStandardOption | |
| gapCutoff | 
| void removeGappyPositions | ( | vector< vector< int > > & | sequences, | 
| float | gapCutoff ) | 
Remove gappy positions from sequences based on given 'gapCutoff'.
| sequences | |
| gapCutoff | 
| void setDepth | ( | vector< Sequence > & | sequences, | 
| int | depth ) | 
Set MSA depth based on 'depth' flag.
| sequences | |
| flagHandler | 
| const char* docstr | 
| unordered_map<string, FlagInfo> Flags |