Neff Computation. More...
#include "flagHandler.h"
#include "msaReader.h"
#include "msaWriter.h"
#include "multimerHandler.h"
#include <iostream>
#include <vector>
#include <string>
#include <unordered_map>
#include <cmath>
#include <sstream>
#include <climits>
#include "common.h"
#include <fstream>
#include <algorithm>
#include <tuple>
#include <random>
#include <set>
#include <thread>
#include <future>
#include <mutex>
#include <chrono>
Functions | |
int | char2num (char c, const string &standardLetters, const string &nonStandardLetters, NonStandardHandler nonStandardOption) |
Map char residues to digit based on given 'nonStandardOption'. | |
void | removeGappyPositions (vector< vector< int > > &sequences, float gapCutoff) |
Remove gappy positions from sequences based on given 'gapCutoff'. | |
vector< vector< int > > | processSequences (vector< Sequence > sequences, string standardLetters, string nonStandardLetters, NonStandardHandler nonStandardOption, float gapCutoff) |
Map chars to digits based on provided 'nonStandardOption' and also remove gappy positions based on given 'gapCutoff'. | |
vector< int > | computeWeights (vector< vector< int > > sequences, float threshold, bool isSymmetric, string standardLetters, NonStandardHandler nonStandardOption) |
Compute sequence weights based on given options. | |
float | computeNeff (vector< int > sequenceWeights, Normalization norm, int length) |
Cumpote NEFF values based on sequence weights and given normalization. | |
Alphabet | getAlphabet (FlagHandler &flagHandler) |
Get given alphabet by user. | |
Normalization | getNormalization (FlagHandler &flagHandler) |
Get given normalization option by user. | |
NonStandardHandler | getNonStandardOption (FlagHandler &flagHandler) |
Get given non_standard_option option by user. | |
void | checkFlags (FlagHandler &flagHandler) |
Check flags | |
void | setDepth (vector< Sequence > &sequences, int depth) |
Set MSA depth based on 'depth' flag. | |
int | getNonGapStartPosition (string firstAlignement, int startPos) |
Get the index of the given startPos in the original first sequence of MSA despite of gaps in the MSA. | |
int | getNonGapEndPosition (string firstAlignement, int startPos, int length) |
Get the end index in the original sequence after a 'length' number of non-gap positions. | |
void | getPositions (vector< Sequence > &sequences, FlagHandler flagHandler) |
Set desired positiones to compute NEFF for based on given 'pos_start' and 'pos_end' flags. | |
void | integrateUniqueSequences (vector< Sequence > &integratedSequences, const vector< Sequence > &sequences) |
to merge sequences and remove redundant sequences | |
vector< float > | computeResidueNEFF (const vector< vector< int > > &sequences, const vector< int > &sequenceWeights, Normalization norm) |
Compute per-residue (column-wise) NEFF. | |
int | main (int argc, char **argv) |
Variables | |
const char * | docstr |
unordered_map< string, FlagInfo > | Flags |
Neff Computation.
This program computes Number of EFFective sequences (NEFF) for a multiple sequence alignment (MSA) file. NEFF is a measure of effective sequence number that takes into account the redundancy and similarity of sequences in the MSA. It is commonly used in bioinformatics to assess the diversity of a set of sequences.
Usage: ./neff –file=<input_file> [options]
Options: –file=<input_file> Input files (comma-separated, no spaces) containing multiple sequence alignments (required)
"
--alphabet=<value> Valid alphabet of MSA; alphabet option (0: Protein, 1: RNA, 2: DNA) (default: 0)\n" –check_validation=<true/false> Perform validation on sequences (default: false)
"
--threshold=<value> Threshold value of considering two sequences similar (default: 0.8)\n" –norm=
NEFF normalization option (0: sqrt(Length of alignment), 1: Length of alignment, 2: No normalization) (default: 0)
"
--omit_query_gaps=<true/false> Omit gap positions of query sequence from all sequences for NEFF computation (default: true)\n" –is_symmetric=<true/false> Consider gaps in similarity cutoff computation (asymmetric) or not (symmetric) (default: true)
"
--non_standard_option=<value> Handling non-standard letters in the given alphabet (0: AsStandard, 1: ConsiderGapInCutoff, 2: ConsiderGap)\n" –depth=
Depth of MSA to be cosidered in computation (default: depth of given MSA)
"
--gap_cutoff=<value> Cutoff value for removing gappy positions, when #gaps in position >= gap_cutoff (default=1 : does not remove anything)\n" –pos_start=
Start position of each sequence to be considered in neff (inclusive (default: 1))
"
--pos_end=<value> Last position of each sequence to be considered in neff (inclusive (default: length of MSA sequence))\n" –only_weights=<true/false> Return only sequence weights, as # similar sequence, rather than the final NEFF (default: false)
"
--multimer_MSA=<true/false> Compute NEFF for a multimer MSA (default: false)\n" –stoichiom=
Multimer stoichiometry (default: empty) –chain_length=
of the chains in heteromer multimer (default: 0)
" –residue_neff=<true/false> Compute per-resiue (column-wise) NEFF (default: false)
For detailed instructions, please refer to the documentation at https://maryam-haghani.github.io/NEFFy.
int char2num | ( | char | c, |
const string & | standardLetters, | ||
const string & | nonStandardLetters, | ||
NonStandardHandler | nonStandardOption ) |
Map char residues to digit based on given 'nonStandardOption'.
c | input letter |
standardLetters | |
nonStandardLetters | |
nonStandardOption |
void checkFlags | ( | FlagHandler & | flagHandler | ) |
Check flags
flagHandler |
float computeNeff | ( | vector< int > | sequenceWeights, |
Normalization | norm, | ||
int | length ) |
Cumpote NEFF values based on sequence weights and given normalization.
sequenceWeights | |
norm | |
sequenceLength |
vector< float > computeResidueNEFF | ( | const vector< vector< int > > & | sequences, |
const vector< int > & | sequenceWeights, | ||
Normalization | norm ) |
Compute per-residue (column-wise) NEFF.
sequences | |
sequenceWeights | |
norm |
vector< int > computeWeights | ( | vector< vector< int > > | sequences, |
float | threshold, | ||
bool | isSymmetric, | ||
string | standardLetters, | ||
NonStandardHandler | nonStandardOption ) |
Compute sequence weights based on given options.
sequences | |
threshold | |
norm | |
isSymmetric | |
nonStandardOption |
Alphabet getAlphabet | ( | FlagHandler & | flagHandler | ) |
Get given alphabet by user.
flagHandler |
int getNonGapEndPosition | ( | string | firstAlignement, |
int | startPos, | ||
int | length ) |
Get the end index in the original sequence after a 'length' number of non-gap positions.
firstAlignement | |
startPos | |
length |
int getNonGapStartPosition | ( | string | firstAlignement, |
int | startPos ) |
Get the index of the given startPos in the original first sequence of MSA despite of gaps in the MSA.
firstAlignement | |
startPos |
NonStandardHandler getNonStandardOption | ( | FlagHandler & | flagHandler | ) |
Get given non_standard_option option by user.
flagHandler |
Normalization getNormalization | ( | FlagHandler & | flagHandler | ) |
Get given normalization option by user.
flagHandler |
void getPositions | ( | vector< Sequence > & | sequences, |
FlagHandler | flagHandler ) |
Set desired positiones to compute NEFF for based on given 'pos_start' and 'pos_end' flags.
sequences | |
flagHandler |
void integrateUniqueSequences | ( | vector< Sequence > & | integratedSequences, |
const vector< Sequence > & | sequences ) |
to merge sequences and remove redundant sequences
integratedSequences | |
sequences |
int main | ( | int | argc, |
char ** | argv ) |
omit gap positions of query sequence in all sequences if omitGapsInQuery=true
vector< vector< int > > processSequences | ( | vector< Sequence > | sequences, |
string | standardLetters, | ||
string | nonStandardLetters, | ||
NonStandardHandler | nonStandardOption, | ||
float | gapCutoff ) |
Map chars to digits based on provided 'nonStandardOption' and also remove gappy positions based on given 'gapCutoff'.
omitGapsInQuery | |
alphabet | |
nonStandardOption | |
gapCutoff |
void removeGappyPositions | ( | vector< vector< int > > & | sequences, |
float | gapCutoff ) |
Remove gappy positions from sequences based on given 'gapCutoff'.
sequences | |
gapCutoff |
void setDepth | ( | vector< Sequence > & | sequences, |
int | depth ) |
Set MSA depth based on 'depth' flag.
sequences | |
flagHandler |
const char* docstr |
unordered_map<string, FlagInfo> Flags |