CMAPLE 1
CMaple phylogenetic software
Loading...
Searching...
No Matches
alignment.h
1#include "../utils/timeutil.h"
2#include "sequence.h"
3
4#ifndef CMAPLE_ALIGNMENT_H
5#define CMAPLE_ALIGNMENT_H
6
7namespace cmaple {
9class Alignment {
10 public:
14 enum InputType {
17 IN_MAPLE,
21 };
22
23 // ----------------- BEGIN OF PUBLIC APIs ------------------------------------
24 // //
28
45 std::istream& aln_stream,
46 const std::string& ref_seq = "",
47 const InputType format = IN_AUTO,
49
68 const std::string& aln_filename,
69 const std::string& ref_seq = "",
70 const InputType format = IN_AUTO,
72
76
91 void read(
92 std::istream& aln_stream,
93 const std::string& ref_seq = "",
94 const InputType format = IN_AUTO,
96
113 void read(
114 const std::string& aln_filename,
115 const std::string& ref_seq = "",
116 const InputType format = IN_AUTO,
118
127 void write(std::ostream& aln_stream, const InputType& format = IN_MAPLE);
128
143 void write(const std::string& aln_filename,
144 const InputType& format = IN_MAPLE,
145 const bool overwrite = false);
146
147 // ----------------- END OF PUBLIC APIs ------------------------------------
148 // //
149
154 inline cmaple::SeqRegion::SeqType getSeqType() const {
155 return seq_type_;
156 }
157
161 inline void setSeqType(cmaple::SeqRegion::SeqType seq_type) {
162 seq_type_ = seq_type;
163 updateNumStates();
164 }
165
175 auto readRefSeq(const std::string& ref_filename,
176 const std::string& ref_name) -> std::string;
177
184 static char convertState2Char(const cmaple::StateType& state,
185 const cmaple::SeqRegion::SeqType& seqtype);
186
192 static InputType parseAlnFormat(const std::string& n_format);
193
197 std::vector<Sequence>
198 data; // note: this is inefficient, but only used briefly
199
203 std::vector<cmaple::StateType> ref_seq;
204
208 cmaple::StateType num_states;
209
213 InputType aln_format = IN_AUTO;
214
218 std::unordered_set<void*> attached_trees;
219
220 private:
225
229 void reset();
230
234 void updateNumStates();
235
241 cmaple::SeqRegion::SeqType detectSequenceType(cmaple::StrVector& sequences);
242
250 cmaple::PositionType computeSeqDistance(Sequence& sequence,
251 cmaple::RealNumType hamming_weight);
252
259 void sortSeqsByDistances();
260
267 cmaple::StateType convertChar2State(char state);
268
278 void extractMutations(const cmaple::StrVector& sequences,
279 const cmaple::StrVector& seq_names,
280 const std::string& ref_sequence);
281
290 void readMaple(std::istream& aln_stream);
291
300 void readFastaOrPhylip(std::istream& aln_stream,
301 const std::string& ref_seq = "");
302
308 void parseRefSeq(std::string& ref_sequence);
309
318 void readFasta(std::istream& aln_stream,
319 cmaple::StrVector& sequences,
320 cmaple::StrVector& seq_names,
321 bool check_min_seqs = true);
322
331 void readPhylip(std::istream& aln_stream,
332 cmaple::StrVector& sequences,
333 cmaple::StrVector& seq_names,
334 bool check_min_seqs = true);
335
345 void readSequences(std::istream& aln_stream,
346 cmaple::StrVector& sequences,
347 cmaple::StrVector& seq_names,
348 InputType aln_format = IN_AUTO,
349 bool check_min_seqs = true);
350
359 std::string generateRef(cmaple::StrVector& sequences);
360
365 void processSeq(std::string& sequence,
366 std::string& line,
367 cmaple::PositionType line_num);
368
373 void addMutation(Sequence* sequence,
374 char state_char,
375 cmaple::PositionType pos,
376 cmaple::PositionType length = -1);
377
382 void writeMAPLE(std::ostream& aln_stream);
383
388 void writeFASTA(std::ostream& aln_stream);
389
394 void writePHYLIP(std::ostream& aln_stream);
395
399 auto getRefSeqStr() -> std::string;
400
404 auto getSeqString(const std::string& ref_seq_str, Sequence* sequence) -> std::string;
405
413 InputType detectMAPLEorFASTA(std::istream& aln_stream);
414
424 InputType detectInputFile(std::istream& aln_stream);
425
427};
428
437auto operator<<(std::ostream& out_stream, cmaple::Alignment& aln) -> std::ostream&;
438
441auto operator>>(std::istream& in_stream, cmaple::Alignment& aln) -> std::istream&;
442
447extern char symbols_protein[];
448extern char symbols_dna[];
449extern char symbols_rna[];
450extern char symbols_morph[];
451} // namespace cmaple
452#endif
Definition alignment.h:9
Alignment(const std::string &aln_filename, const std::string &ref_seq="", const InputType format=IN_AUTO, const cmaple::SeqRegion::SeqType seqtype=cmaple::SeqRegion::SEQ_AUTO)
Constructor from an alignment file in FASTA, PHYLIP, or MAPLE format.
void read(std::istream &aln_stream, const std::string &ref_seq="", const InputType format=IN_AUTO, const cmaple::SeqRegion::SeqType seqtype=cmaple::SeqRegion::SEQ_AUTO)
Read an alignment from a stream in FASTA, PHYLIP, or MAPLE format.
~Alignment()
Destructor.
Alignment()
Default constructor.
void read(const std::string &aln_filename, const std::string &ref_seq="", const InputType format=IN_AUTO, const cmaple::SeqRegion::SeqType seqtype=cmaple::SeqRegion::SEQ_AUTO)
Read an alignment from a file in FASTA, PHYLIP, or MAPLE format.
void write(const std::string &aln_filename, const InputType &format=IN_MAPLE, const bool overwrite=false)
Write the alignment to a file in FASTA, PHYLIP, or MAPLE format.
void write(std::ostream &aln_stream, const InputType &format=IN_MAPLE)
Write the alignment to a stream in FASTA, PHYLIP, or MAPLE format.
Alignment(std::istream &aln_stream, const std::string &ref_seq="", const InputType format=IN_AUTO, const cmaple::SeqRegion::SeqType seqtype=cmaple::SeqRegion::SEQ_AUTO)
Constructor from a stream of an alignment in FASTA, PHYLIP, or MAPLE format.
InputType
Definition alignment.h:14
@ IN_UNKNOWN
Definition alignment.h:20
@ IN_FASTA
Definition alignment.h:15
@ IN_AUTO
Definition alignment.h:19
@ IN_PHYLIP
Definition alignment.h:16
@ IN_MAPLE
Definition alignment.h:17
SeqType
Definition seqregion.h:25
@ SEQ_AUTO
Definition seqregion.h:28
std::istream & operator>>(std::istream &in_stream, cmaple::Tree &tree)
Customized >> operator to read a tree from a stream.
std::ostream & operator<<(std::ostream &out_stream, cmaple::Tree &tree)
Customized << operator to output the tree string in a (bifurcating) NEWICK format to a stream.