CMAPLE 1
CMaple phylogenetic software
Loading...
Searching...
No Matches
tree.h
1#include "../alignment/alignment.h"
2#include "../model/model.h"
3#include "updatingnode.h"
4#ifdef _OPENMP
5#include <omp.h>
6#endif
7
8#pragma once
9
10namespace cmaple {
12class Tree {
13 public:
23 };
24
28 enum TreeType {
32 };
33
34 // ----------------- BEGIN OF PUBLIC APIs ------------------------------------
35 // //
62 Model* model,
63 std::istream& tree_stream,
64 const bool fixed_blengths = false,
65 std::unique_ptr<cmaple::Params>&& params = nullptr);
66
95 Model* model,
96 const std::string& tree_filename = "",
97 const bool fixed_blengths = false,
98 std::unique_ptr<cmaple::Params>&& params = nullptr);
99
103
119 void load(std::istream& tree_stream, const bool fixed_blengths = false);
120
137 void load(const std::string& tree_filename,
138 const bool fixed_blengths = false);
139
149
157 void changeModel(Model* model);
158
172 void doPlacement(std::ostream& out_stream = std::cout);
173
184 void applySPR(const TreeSearchType tree_search_type,
185 const bool shallow_tree_search, std::ostream& out_stream = std::cout);
186
194 void optimizeBranch(std::ostream& out_stream = std::cout);
195
225 const TreeSearchType tree_search_type = NORMAL_TREE_SEARCH,
226 const bool shallow_tree_search = false, std::ostream& out_stream = std::cout);
227
236 RealNumType computeLh();
237
260 void computeBranchSupport(const int num_threads = 1,
261 const int num_replicates = 1000,
262 const double epsilon = 0.1,
263 const bool allow_replacing_ML_tree = true,
264 std::ostream& out_stream = std::cout);
265
277 std::string exportNewick(const TreeType tree_type = BIN_TREE,
278 const bool show_branch_supports = false);
279
280 // ----------------- END OF PUBLIC APIs ------------------------------------
281 // //
282
287 bool fixed_blengths = false;
288
292 cmaple::RealNumType default_blength, min_blength, max_blength,
293 min_blength_mid, min_blength_sensitivity, half_max_blength,
294 half_min_blength_mid, double_min_blength;
295
299 // std::optional<cmaple::Params> params;
300 std::unique_ptr<cmaple::Params> params = nullptr;
301
305 Alignment* aln = nullptr;
306
310 ModelBase* model = nullptr;
311
315 cmaple::RealNumType* cumulative_rate = nullptr;
316
320 std::vector<std::vector<cmaple::PositionType>> cumulative_base;
321
325 std::vector<PhyloNode> nodes;
326
330 std::vector<NodeLh> node_lhs;
331
335 cmaple::NumSeqsType root_vector_index;
336
342 std::vector<std::string> seq_names;
343
348 std::vector<bool> sequence_added;
349
357 void makeTreeInOutConsistent();
358
364 static TreeSearchType parseTreeSearchType(
365 const std::string& tree_search_type);
366
372 static std::string getTreeSearchStr(const TreeSearchType tree_search_type);
373
379 static TreeType parseTreeType(const std::string& tree_type_str);
380
383 private:
387 typedef void (Tree::*LoadTreePtrType)(std::istream&, const bool);
388 LoadTreePtrType loadTreePtr;
389
393 typedef void (Tree::*ChangeModelPtrType)(Model*);
394 ChangeModelPtrType changeModelPtr;
395
399 typedef void (Tree::*ChangeAlnPtrType)(Alignment*);
400 ChangeAlnPtrType changeAlnPtr;
401
405 typedef void (Tree::*DoInferencePtrType)(const TreeSearchType,
406 const bool, std::ostream&);
407 DoInferencePtrType doInferencePtr;
408
412 typedef void (Tree::*DoPlacementPtrType)(std::ostream&);
413 DoPlacementPtrType doPlacementPtr;
414
418 typedef void (Tree::*ApplySPRPtrType)(const TreeSearchType,
419 const bool, std::ostream&);
420 ApplySPRPtrType applySPRPtr;
421
425 typedef void (Tree::*OptimizeBranchPtrType)(std::ostream&);
426 OptimizeBranchPtrType optimizeBranchPtr;
427
431 typedef RealNumType (Tree::*computeLhPtrType)();
432 computeLhPtrType computeLhPtr;
433
437 typedef void (Tree::*computeBranchSupportPtrType)(const int,
438 const int,
439 const double,
440 const bool, std::ostream&);
441 computeBranchSupportPtrType computeBranchSupportPtr;
442
443 typedef void (Tree::*MakeTreeInOutConsistentPtrType)();
444 MakeTreeInOutConsistentPtrType makeTreeInOutConsistentPtr;
445
451 template <const cmaple::StateType num_states>
452 void loadTreeTemplate(std::istream& tree_stream, const bool fixed_blengths);
453
456 template <const cmaple::StateType num_states>
457 void changeAlnTemplate(Alignment* aln);
458
461 template <const cmaple::StateType num_states>
462 void changeModelTemplate(Model* model);
463
466 template <const cmaple::StateType num_states>
467 void doInferenceTemplate(const TreeSearchType tree_search_type,
468 const bool shallow_tree_search, std::ostream& out_stream);
469
472 template <const cmaple::StateType num_states>
473 void doPlacementTemplate(std::ostream& out_stream);
474
477 template <const cmaple::StateType num_states>
478 void applySPRTemplate(const TreeSearchType tree_search_type,
479 const bool shallow_tree_search, std::ostream& out_stream);
480
483 template <const cmaple::StateType num_states>
484 void optimizeBranchTemplate(std::ostream& out_stream);
485
488 template <const cmaple::StateType num_states>
489 RealNumType computeLhTemplate();
490
493 template <const cmaple::StateType num_states>
494 void computeBranchSupportTemplate(const int num_threads,
495 const int num_replicates,
496 const double epsilon,
497 const bool allow_replacing_ML_tree,
498 std::ostream& out_stream);
499
502 template <const cmaple::StateType num_states>
503 void makeTreeInOutConsistentTemplate();
504
509 void setupFuncPtrs();
510
514 void setupBlengthThresh();
515
528 void initTree(Alignment* aln,
529 Model* model,
530 std::unique_ptr<cmaple::Params>&& params);
531
536 void computeCumulativeRate();
537
542 template <const cmaple::StateType num_states>
543 void optimizeTreeTopology(bool short_range_search = false);
544
551 template <const cmaple::StateType num_states>
552 void refreshAllNonLowerLhs();
553
560 template <const cmaple::StateType num_states>
561 cmaple::RealNumType improveSubTree(const cmaple::Index index,
562 PhyloNode& node,
563 bool short_range_search);
564
569 cmaple::RealNumType calculateDerivative(
570 const std::vector<cmaple::RealNumType>& coefficient_vec,
571 const cmaple::RealNumType delta_t);
572
576 template <const cmaple::StateType num_states>
577 void examineSamplePlacementMidBranch(
578 cmaple::Index& selected_node_index,
579 const std::unique_ptr<SeqRegions>& mid_branch_lh,
580 cmaple::RealNumType& best_lh_diff,
581 bool& is_mid_branch,
582 cmaple::RealNumType& lh_diff_mid_branch,
583 TraversingNode& current_extended_node,
584 const std::unique_ptr<SeqRegions>& sample_regions);
585
589 template <const cmaple::StateType num_states>
590 void examineSamplePlacementAtNode(
591 cmaple::Index& selected_node_index,
592 const std::unique_ptr<SeqRegions>& total_lh,
593 cmaple::RealNumType& best_lh_diff,
594 bool& is_mid_branch,
595 cmaple::RealNumType& lh_diff_at_node,
596 cmaple::RealNumType& lh_diff_mid_branch,
597 cmaple::RealNumType& best_up_lh_diff,
598 cmaple::RealNumType& best_down_lh_diff,
599 cmaple::Index& best_child_index,
600 TraversingNode& current_extended_node,
601 const std::unique_ptr<SeqRegions>& sample_regions);
602
608 template <const cmaple::StateType num_states>
609 void finetuneSamplePlacementAtNode(
610 const PhyloNode& selected_node,
611 cmaple::RealNumType& best_down_lh_diff,
612 cmaple::Index& best_child_index,
613 const std::unique_ptr<SeqRegions>& sample_regions);
614
620 template <const cmaple::StateType num_states>
621 void addStartingNodes(const cmaple::Index& node_index,
622 PhyloNode& node,
623 const cmaple::Index& other_child_node_index,
624 const cmaple::RealNumType best_lh_diff,
625 std::stack<std::unique_ptr<UpdatingNode>>& node_stack);
626
632 template <const cmaple::StateType num_states>
633 bool examineSubtreePlacementMidBranch(
634 cmaple::Index& best_node_index,
635 PhyloNode& current_node,
636 cmaple::RealNumType& best_lh_diff,
637 bool& is_mid_branch,
638 cmaple::RealNumType& lh_diff_at_node,
639 cmaple::RealNumType& lh_diff_mid_branch,
640 cmaple::RealNumType& best_up_lh_diff,
641 cmaple::RealNumType& best_down_lh_diff,
642 std::unique_ptr<UpdatingNode>& updating_node,
643 const std::unique_ptr<SeqRegions>& subtree_regions,
644 const cmaple::RealNumType threshold_prob,
645 const cmaple::RealNumType removed_blength,
646 const cmaple::Index top_node_index,
647 std::unique_ptr<SeqRegions>& bottom_regions);
648
654 template <const cmaple::StateType num_states>
655 bool examineSubTreePlacementAtNode(
656 cmaple::Index& best_node_index,
657 PhyloNode& current_node,
658 cmaple::RealNumType& best_lh_diff,
659 bool& is_mid_branch,
660 cmaple::RealNumType& lh_diff_at_node,
661 cmaple::RealNumType& lh_diff_mid_branch,
662 cmaple::RealNumType& best_up_lh_diff,
663 cmaple::RealNumType& best_down_lh_diff,
664 std::unique_ptr<UpdatingNode>& updating_node,
665 const std::unique_ptr<SeqRegions>& subtree_regions,
666 const cmaple::RealNumType threshold_prob,
667 const cmaple::RealNumType removed_blength,
668 const cmaple::Index top_node_index);
669
675 template <const cmaple::StateType num_states>
676 void addChildSeekSubtreePlacement(
677 const cmaple::Index child_1_index,
678 PhyloNode& child_1,
679 PhyloNode& child_2,
680 const cmaple::RealNumType& lh_diff_at_node,
681 const std::unique_ptr<UpdatingNode>& updating_node,
682 std::stack<std::unique_ptr<UpdatingNode>>& node_stack,
683 const cmaple::RealNumType threshold_prob);
684
691 template <const cmaple::StateType num_states>
692 bool addNeighborsSeekSubtreePlacement(
693 PhyloNode& current_node,
694 const cmaple::Index other_child_index,
695 std::unique_ptr<SeqRegions>&& bottom_regions,
696 const cmaple::RealNumType& lh_diff_at_node,
697 const std::unique_ptr<UpdatingNode>& updating_node,
698 std::stack<std::unique_ptr<UpdatingNode>>& node_stack,
699 const cmaple::RealNumType threshold_prob);
700
707 template <const cmaple::StateType num_states,
708 cmaple::RealNumType (Tree::*calculatePlacementCost)(
709 const std::unique_ptr<SeqRegions>&,
710 const std::unique_ptr<SeqRegions>&,
711 const cmaple::RealNumType)>
712 bool tryShorterBranch(
713 const cmaple::RealNumType current_blength,
714 std::unique_ptr<SeqRegions>& best_child_regions,
715 const std::unique_ptr<SeqRegions>& sample,
716 const std::unique_ptr<SeqRegions>& upper_left_right_regions,
717 const std::unique_ptr<SeqRegions>& lower_regions,
718 cmaple::RealNumType& best_split_lh,
719 cmaple::RealNumType& best_branch_length_split,
720 const cmaple::RealNumType new_branch_length,
721 const bool try_first_branch);
722
728 template <const cmaple::StateType num_states>
729 void tryShorterBranchAtRoot(const std::unique_ptr<SeqRegions>& sample,
730 const std::unique_ptr<SeqRegions>& lower_regions,
731 std::unique_ptr<SeqRegions>& best_parent_regions,
732 cmaple::RealNumType& best_root_blength,
733 cmaple::RealNumType& best_parent_lh,
734 const cmaple::RealNumType fixed_blength);
735
740 template <const cmaple::StateType num_states>
741 bool tryShorterNewBranchAtRoot(
742 const std::unique_ptr<SeqRegions>& sample,
743 const std::unique_ptr<SeqRegions>& lower_regions,
744 std::unique_ptr<SeqRegions>& best_parent_regions,
745 cmaple::RealNumType& best_length,
746 cmaple::RealNumType& best_parent_lh,
747 const cmaple::RealNumType fixed_blength);
748
755 template <const cmaple::StateType num_states>
756 bool tryLongerNewBranchAtRoot(
757 const std::unique_ptr<SeqRegions>& sample,
758 const std::unique_ptr<SeqRegions>& lower_regions,
759 std::unique_ptr<SeqRegions>& best_parent_regions,
760 cmaple::RealNumType& best_length,
761 cmaple::RealNumType& best_parent_lh,
762 const cmaple::RealNumType fixed_blength);
763
769 template <const cmaple::StateType num_states>
770 void estimateLengthNewBranchAtRoot(
771 const std::unique_ptr<SeqRegions>& sample,
772 const std::unique_ptr<SeqRegions>& lower_regions,
773 std::unique_ptr<SeqRegions>& best_parent_regions,
774 cmaple::RealNumType& best_length,
775 cmaple::RealNumType& best_parent_lh,
776 const cmaple::RealNumType fixed_blength,
777 const cmaple::RealNumType short_blength_thresh,
778 const bool optional_check);
779
784 template <cmaple::RealNumType (Tree::*calculatePlacementCost)(
785 const std::unique_ptr<SeqRegions>&,
786 const std::unique_ptr<SeqRegions>&,
787 const cmaple::RealNumType)>
788 bool tryShorterNewBranch(
789 const std::unique_ptr<SeqRegions>& best_child_regions,
790 const std::unique_ptr<SeqRegions>& sample,
791 cmaple::RealNumType& best_blength,
792 cmaple::RealNumType& new_branch_lh,
793 const cmaple::RealNumType short_blength_thresh);
794
799 template <cmaple::RealNumType (Tree::*calculatePlacementCost)(
800 const std::unique_ptr<SeqRegions>&,
801 const std::unique_ptr<SeqRegions>&,
802 const cmaple::RealNumType)>
803 void tryLongerNewBranch(const std::unique_ptr<SeqRegions>& best_child_regions,
804 const std::unique_ptr<SeqRegions>& sample,
805 cmaple::RealNumType& best_blength,
806 cmaple::RealNumType& new_branch_lh,
807 const cmaple::RealNumType long_blength_thresh);
808
812 template <cmaple::RealNumType (Tree::*calculatePlacementCost)(
813 const std::unique_ptr<SeqRegions>&,
814 const std::unique_ptr<SeqRegions>&,
815 const cmaple::RealNumType)>
816 void estimateLengthNewBranch(
817 const cmaple::RealNumType best_split_lh,
818 const std::unique_ptr<SeqRegions>& best_child_regions,
819 const std::unique_ptr<SeqRegions>& sample,
820 cmaple::RealNumType& best_blength,
821 const cmaple::RealNumType long_blength_thresh,
822 const cmaple::RealNumType short_blength_thresh,
823 const bool optional_check);
824
830 template <const cmaple::StateType num_states>
831 void connectNewSample2Branch(
832 std::unique_ptr<SeqRegions>& sample,
833 const cmaple::NumSeqsType seq_name_index,
834 const cmaple::Index sibling_node_index,
835 PhyloNode& sibling_node,
836 const cmaple::RealNumType top_distance,
837 const cmaple::RealNumType down_distance,
838 const cmaple::RealNumType best_blength,
839 std::unique_ptr<SeqRegions>& best_child_regions,
840 const std::unique_ptr<SeqRegions>& upper_left_right_regions);
841
847 template <const cmaple::StateType num_states>
848 void connectNewSample2Root(std::unique_ptr<SeqRegions>& sample,
849 const cmaple::NumSeqsType seq_name_index,
850 const cmaple::Index sibling_node_index,
851 PhyloNode& sibling_node,
852 const cmaple::RealNumType best_root_blength,
853 const cmaple::RealNumType best_length2,
854 std::unique_ptr<SeqRegions>& best_parent_regions);
855
861 template <const cmaple::StateType num_states>
862 void placeSubTreeAtNode(const cmaple::Index selected_node_index,
863 const cmaple::Index subtree_index,
864 PhyloNode& subtree,
865 const std::unique_ptr<SeqRegions>& subtree_regions,
866 const cmaple::RealNumType new_branch_length,
867 const cmaple::RealNumType new_lh);
868
874 template <const cmaple::StateType num_states>
875 void placeSubTreeMidBranch(const cmaple::Index selected_node_index,
876 const cmaple::Index subtree_index,
877 PhyloNode& subtree,
878 const std::unique_ptr<SeqRegions>& subtree_regions,
879 const cmaple::RealNumType new_branch_length,
880 const cmaple::RealNumType new_lh);
881
887 template <
888 const cmaple::StateType num_states,
889 void (Tree::*updateRegionsSubTree)(PhyloNode&,
890 PhyloNode&,
891 PhyloNode&,
892 std::unique_ptr<SeqRegions>&&,
893 const std::unique_ptr<SeqRegions>&,
894 const std::unique_ptr<SeqRegions>&,
895 const std::unique_ptr<SeqRegions>&,
896 cmaple::RealNumType&)>
897 void connectSubTree2Branch(
898 const std::unique_ptr<SeqRegions>& subtree_regions,
899 const std::unique_ptr<SeqRegions>& lower_regions,
900 const cmaple::Index subtree_index,
901 PhyloNode& subtree,
902 const cmaple::Index sibling_node_index,
903 PhyloNode& sibling_node,
904 const cmaple::RealNumType top_distance,
905 const cmaple::RealNumType down_distance,
906 cmaple::RealNumType& best_blength,
907 std::unique_ptr<SeqRegions>&& best_child_regions,
908 const std::unique_ptr<SeqRegions>& upper_left_right_regions);
909
915 template <const cmaple::StateType num_states>
916 void connectSubTree2Root(const cmaple::Index subtree_index,
917 PhyloNode& subtree,
918 const std::unique_ptr<SeqRegions>& subtree_regions,
919 const std::unique_ptr<SeqRegions>& lower_regions,
920 const cmaple::Index sibling_node_index,
921 PhyloNode& sibling_node,
922 const cmaple::RealNumType best_root_blength,
923 const cmaple::RealNumType best_length2,
924 std::unique_ptr<SeqRegions>&& best_parent_regions);
925
933 template <const cmaple::StateType num_states>
934 void updateRegionsPlaceSubTree(
935 PhyloNode& subtree,
936 PhyloNode& sibling_node,
937 PhyloNode& internal,
938 std::unique_ptr<SeqRegions>&& best_child_regions,
939 const std::unique_ptr<SeqRegions>& subtree_regions,
940 const std::unique_ptr<SeqRegions>& upper_left_right_regions,
941 const std::unique_ptr<SeqRegions>& lower_regions,
942 cmaple::RealNumType& best_blength);
943
951 template <const cmaple::StateType num_states>
952 void updateRegionsPlaceSubTreeAbove(
953 PhyloNode& subtree,
954 PhyloNode& sibling_node,
955 PhyloNode& internal,
956 std::unique_ptr<SeqRegions>&& best_child_regions,
957 const std::unique_ptr<SeqRegions>& subtree_regions,
958 const std::unique_ptr<SeqRegions>& upper_left_right_regions,
959 const std::unique_ptr<SeqRegions>& lower_regions,
960 cmaple::RealNumType& best_blength);
961
967 template <const cmaple::StateType num_states>
968 void handlePolytomyPlaceSubTree(
969 const cmaple::Index selected_node_index,
970 PhyloNode& selected_node,
971 const std::unique_ptr<SeqRegions>& subtree_regions,
972 const cmaple::RealNumType new_branch_length,
973 cmaple::RealNumType& best_down_lh_diff,
974 cmaple::Index& best_child_index,
975 cmaple::RealNumType& best_child_blength_split,
976 std::unique_ptr<SeqRegions>& best_child_regions);
977
983 template <const cmaple::StateType num_states>
984 void updateMidBranchLh(
985 const cmaple::Index node_index,
986 PhyloNode& node,
987 const std::unique_ptr<SeqRegions>& parent_upper_regions,
988 std::stack<cmaple::Index>& node_stack,
989 bool& update_blength);
990
997 template <const cmaple::StateType num_states>
998 std::unique_ptr<SeqRegions> computeUpperLeftRightRegions(
999 const cmaple::Index node_index,
1000 PhyloNode& node,
1001 const cmaple::MiniIndex next_node_mini,
1002 const std::unique_ptr<SeqRegions>& parent_upper_regions,
1003 std::stack<cmaple::Index>& node_stack,
1004 bool& update_blength);
1005
1010 bool updateNewPartialIfDifferent(
1011 PhyloNode& node,
1012 const cmaple::MiniIndex next_node_mini,
1013 std::unique_ptr<SeqRegions>& upper_left_right_regions,
1014 std::stack<cmaple::Index>& node_stack,
1015 const cmaple::PositionType seq_length);
1016
1023 template <const cmaple::StateType num_states>
1024 void inline handleNullNewRegions(const cmaple::Index index,
1025 PhyloNode& node,
1026 const bool do_update_zeroblength,
1027 std::stack<cmaple::Index>& node_stack,
1028 bool& update_blength,
1029 const std::string err_msg) {
1030 if (do_update_zeroblength) {
1031 updateZeroBlength<num_states>(index, node, node_stack);
1032 update_blength = true;
1033 } else
1034 throw std::logic_error(err_msg);
1035 }
1036
1043 template <const cmaple::StateType num_states>
1044 void updatePartialLhFromParent(
1045 const cmaple::Index index,
1046 PhyloNode& node,
1047 std::stack<cmaple::Index>& node_stack,
1048 const std::unique_ptr<SeqRegions>& parent_upper_regions,
1049 const cmaple::PositionType seq_length);
1050
1057 template <const cmaple::StateType num_states>
1058 void updatePartialLhFromChildren(
1059 const cmaple::Index index,
1060 PhyloNode& node,
1061 std::stack<cmaple::Index>& node_stack,
1062 const std::unique_ptr<SeqRegions>& parent_upper_regions,
1063 const bool is_non_root,
1064 const cmaple::PositionType seq_length);
1065
1071 template <const cmaple::StateType num_states>
1072 inline void computeMidBranchRegions(
1073 PhyloNode& node,
1074 std::unique_ptr<SeqRegions>& regions_2_update,
1075 const SeqRegions& parent_upper_lr_lh) {
1076 std::unique_ptr<SeqRegions>& lower_lh = node.getPartialLh(cmaple::TOP);
1077 cmaple::RealNumType half_branch_length = node.getUpperLength() * 0.5;
1078 parent_upper_lr_lh.mergeUpperLower<num_states>(
1079 regions_2_update, half_branch_length, *lower_lh, half_branch_length,
1080 aln, model, params->threshold_prob);
1081 }
1082
1088 template <const cmaple::StateType num_states>
1089 void refreshNonLowerLhsFromParent(cmaple::Index& node_index,
1090 cmaple::Index& last_node_index);
1091
1097 template <const cmaple::StateType num_states>
1098 void refreshUpperLR(const cmaple::Index node_index,
1099 PhyloNode& node,
1100 const cmaple::Index neighbor_index,
1101 std::unique_ptr<SeqRegions>& replaced_regions,
1102 const SeqRegions& parent_upper_lr_lh);
1103
1107 template <const cmaple::StateType num_states>
1108 void estimateBlength_R_O(const SeqRegion& seq1_region,
1109 const SeqRegion& seq2_region,
1110 const cmaple::RealNumType total_blength,
1111 const cmaple::PositionType end_pos,
1112 cmaple::RealNumType& coefficient,
1113 std::vector<cmaple::RealNumType>& coefficient_vec);
1114
1118 void estimateBlength_R_ACGT(
1119 const SeqRegion& seq1_region,
1120 const cmaple::StateType seq2_state,
1121 const cmaple::RealNumType total_blength,
1122 const cmaple::PositionType end_pos,
1123 std::vector<cmaple::RealNumType>& coefficient_vec);
1124
1129 template <const cmaple::StateType num_states>
1130 void estimateBlength_O_X(const SeqRegion& seq1_region,
1131 const SeqRegion& seq2_region,
1132 const cmaple::RealNumType total_blength,
1133 const cmaple::PositionType end_pos,
1134 cmaple::RealNumType& coefficient,
1135 std::vector<cmaple::RealNumType>& coefficient_vec);
1136
1140 template <const cmaple::StateType num_states>
1141 void estimateBlength_ACGT_O(
1142 const SeqRegion& seq1_region,
1143 const SeqRegion& seq2_region,
1144 const cmaple::RealNumType total_blength,
1145 cmaple::RealNumType& coefficient,
1146 std::vector<cmaple::RealNumType>& coefficient_vec);
1147
1152 void estimateBlength_ACGT_RACGT(
1153 const SeqRegion& seq1_region,
1154 const SeqRegion& seq2_region,
1155 const cmaple::RealNumType total_blength,
1156 const cmaple::PositionType end_pos,
1157 std::vector<cmaple::RealNumType>& coefficient_vec);
1158
1162 cmaple::RealNumType estimateBlengthFromCoeffs(
1163 cmaple::RealNumType& coefficient,
1164 const std::vector<cmaple::RealNumType> coefficient_vec);
1165
1171 template <const cmaple::StateType num_states>
1172 void handleBlengthChanged(PhyloNode& node,
1173 const cmaple::Index node_index,
1174 const cmaple::RealNumType best_blength);
1175
1179 template <const cmaple::StateType num_states>
1180 void optimizeBlengthBeforeSeekingSPR(
1181 PhyloNode& node,
1182 cmaple::RealNumType& best_blength,
1183 cmaple::RealNumType& best_lh,
1184 bool& blength_changed,
1185 const std::unique_ptr<SeqRegions>& parent_upper_lr_lh,
1186 const std::unique_ptr<SeqRegions>& lower_lh);
1187
1193 template <const cmaple::StateType num_states>
1194 void checkAndApplySPR(const cmaple::RealNumType best_lh_diff,
1195 const cmaple::RealNumType best_blength,
1196 const cmaple::RealNumType best_lh,
1197 const cmaple::Index node_index,
1198 PhyloNode& node,
1199 const cmaple::Index best_node_index,
1200 const cmaple::Index parent_node_index,
1201 const bool is_mid_node,
1202 cmaple::RealNumType& total_improvement,
1203 bool& topology_updated);
1204
1208 inline void createAnInternalNode() { nodes.emplace_back(InternalNode()); }
1209
1213 inline void createALeafNode(const cmaple::NumSeqsType new_seq_name_index) {
1214 nodes.emplace_back(LeafNode(
1215 new_seq_name_index)); //(PhyloNode(std::move(LeafNode(new_seq_name_index))));
1216 }
1217
1221 std::unique_ptr<SeqRegions>& getPartialLhAtNode(const cmaple::Index index);
1222
1228 template <const cmaple::StateType num_states>
1229 bool calculateNNILh(std::stack<cmaple::Index>& node_stack_aLRT,
1230 cmaple::RealNumType& lh_diff,
1231 PhyloNode& current_node,
1232 PhyloNode& child_1,
1233 PhyloNode& child_2,
1234 PhyloNode& sibling,
1235 PhyloNode& parent,
1236 const cmaple::Index parent_index,
1237 cmaple::RealNumType& lh_at_root,
1238 const bool allow_replacing_ML_tree);
1239
1245 template <const cmaple::StateType num_states>
1246 bool calculateNNILhRoot(std::stack<cmaple::Index>& node_stack_aLRT,
1247 cmaple::RealNumType& lh_diff,
1248 std::unique_ptr<SeqRegions>& parent_new_lower_lh,
1249 const cmaple::RealNumType& child_2_new_blength,
1250 PhyloNode& current_node,
1251 PhyloNode& child_1,
1252 PhyloNode& child_2,
1253 PhyloNode& sibling,
1254 PhyloNode& parent,
1255 const cmaple::Index parent_index,
1256 cmaple::RealNumType& lh_at_root,
1257 const bool allow_replacing_ML_tree);
1258
1265 template <const cmaple::StateType num_states>
1266 bool calculateNNILhNonRoot(std::stack<cmaple::Index>& node_stack_aLRT,
1267 cmaple::RealNumType& lh_diff,
1268 std::unique_ptr<SeqRegions>& parent_new_lower_lh,
1269 const cmaple::RealNumType& child_2_new_blength,
1270 PhyloNode& current_node,
1271 PhyloNode& child_1,
1272 PhyloNode& child_2,
1273 PhyloNode& sibling,
1274 PhyloNode& parent,
1275 const cmaple::Index parent_index,
1276 cmaple::RealNumType& lh_at_root,
1277 const bool allow_replacing_ML_tree);
1278
1284 template <const cmaple::StateType num_states>
1285 void replaceMLTreebyNNIRoot(std::stack<cmaple::Index>& node_stack_aLRT,
1286 cmaple::RealNumType& lh_diff,
1287 PhyloNode& current_node,
1288 PhyloNode& child_1,
1289 PhyloNode& child_2,
1290 PhyloNode& sibling,
1291 PhyloNode& parent,
1292 cmaple::RealNumType& lh_at_root,
1293 const cmaple::RealNumType child_1_best_blength,
1294 const cmaple::RealNumType child_2_best_blength,
1295 const cmaple::RealNumType sibling_best_blength,
1296 const cmaple::RealNumType parent_best_blength);
1297
1304 template <const cmaple::StateType num_states>
1305 void replaceMLTreebyNNINonRoot(
1306 std::stack<cmaple::Index>& node_stack_aLRT,
1307 cmaple::RealNumType& lh_diff,
1308 PhyloNode& current_node,
1309 PhyloNode& child_1,
1310 PhyloNode& child_2,
1311 PhyloNode& sibling,
1312 PhyloNode& parent,
1313 cmaple::RealNumType& lh_at_root,
1314 const cmaple::RealNumType child_1_best_blength,
1315 const cmaple::RealNumType child_2_best_blength,
1316 const cmaple::RealNumType sibling_best_blength,
1317 const cmaple::RealNumType parent_best_blength,
1318 const cmaple::RealNumType new_parent_best_blength);
1319
1327 template <const cmaple::StateType num_states>
1328 void updateUpperLR(std::stack<cmaple::Index>& node_stack,
1329 std::stack<cmaple::Index>& node_stack_aLRT);
1330
1335 void recompute_aLRT_GrandChildren(PhyloNode& parent,
1336 std::stack<cmaple::Index>& node_stack_aLRT);
1337
1345 template <const cmaple::StateType num_states>
1346 void calculate_aRLT(const bool allow_replacing_ML_tree);
1347
1353 template <const cmaple::StateType num_states>
1354 cmaple::RealNumType calculateSiteLhs(
1355 std::vector<cmaple::RealNumType>& site_lh_contributions,
1356 std::vector<cmaple::RealNumType>& site_lh_root);
1357
1363 template <const cmaple::StateType num_states>
1364 void calculate_aRLT_SH(
1365 std::vector<cmaple::RealNumType>& site_lh_contributions,
1366 std::vector<cmaple::RealNumType>& site_lh_root,
1367 const cmaple::RealNumType& LT1);
1368
1374 template <const cmaple::StateType num_states>
1375 cmaple::PositionType count_aRLT_SH_branch(
1376 std::vector<cmaple::RealNumType>& site_lh_contributions,
1377 std::vector<cmaple::RealNumType>& site_lh_root,
1378 PhyloNode& node,
1379 const cmaple::RealNumType& LT1);
1380
1387 template <const cmaple::StateType num_states>
1388 void calSiteLhDiffRoot(std::vector<cmaple::RealNumType>& site_lh_diff,
1389 std::vector<cmaple::RealNumType>& site_lh_root_diff,
1390 const std::vector<cmaple::RealNumType>& site_lh_root,
1391 std::unique_ptr<SeqRegions>& parent_new_lower_lh,
1392 const cmaple::RealNumType& child_2_new_blength,
1393 PhyloNode& current_node,
1394 PhyloNode& child_1,
1395 PhyloNode& child_2,
1396 PhyloNode& sibling,
1397 PhyloNode& parent,
1398 const cmaple::Index parent_index);
1399
1406 template <const cmaple::StateType num_states>
1407 void calSiteLhDiffNonRoot(
1408 std::vector<cmaple::RealNumType>& site_lh_diff,
1409 std::vector<cmaple::RealNumType>& site_lh_root_diff,
1410 const std::vector<cmaple::RealNumType>& site_lh_root,
1411 std::unique_ptr<SeqRegions>& parent_new_lower_lh,
1412 const cmaple::RealNumType& child_2_new_blength,
1413 PhyloNode& current_node,
1414 PhyloNode& child_1,
1415 PhyloNode& child_2,
1416 PhyloNode& sibling,
1417 PhyloNode& parent,
1418 const cmaple::Index parent_index);
1419
1425 template <const cmaple::StateType num_states>
1426 void calSiteLhDiff(std::vector<cmaple::RealNumType>& site_lh_diff,
1427 std::vector<cmaple::RealNumType>& site_lh_root_diff,
1428 const std::vector<cmaple::RealNumType>& site_lh_root,
1429 PhyloNode& current_node,
1430 PhyloNode& child_1,
1431 PhyloNode& child_2,
1432 PhyloNode& sibling,
1433 PhyloNode& parent,
1434 const cmaple::Index parent_index);
1435
1439 const char readNextChar(std::istream& in,
1440 cmaple::PositionType& in_line,
1441 cmaple::PositionType& in_column,
1442 const char& current_ch = 0) const;
1443
1450 cmaple::NumSeqsType parseFile(
1451 std::istream& infile,
1452 char& ch,
1453 cmaple::RealNumType& branch_len,
1454 cmaple::PositionType& in_line,
1455 cmaple::PositionType& in_column,
1456 const std::map<std::string, cmaple::NumSeqsType>& map_seqname_index,
1457 bool& missing_blengths);
1458
1463 void collapseAllZeroLeave();
1464
1469 void collapseOneZeroLeaf(PhyloNode& node,
1470 cmaple::Index& node_index,
1471 PhyloNode& neighbor_1,
1472 const cmaple::Index neighbor_1_index,
1473 PhyloNode& neighbor_2);
1474
1478 template <const cmaple::StateType num_states>
1479 void updatePesudoCountModel(PhyloNode& node,
1480 const cmaple::Index node_index,
1481 const cmaple::Index parent_index);
1482
1488 template <const cmaple::StateType num_states>
1489 void expandTreeByOneLessInfoSeq(PhyloNode& node,
1490 const cmaple::Index node_index,
1491 const cmaple::Index parent_index);
1492
1501 template <const cmaple::StateType num_states>
1502 void updateBlengthReplaceMLTree(std::stack<cmaple::Index>& node_stack_aLRT,
1503 cmaple::RealNumType& lh_diff,
1504 PhyloNode& node,
1505 const cmaple::Index node_index,
1506 const cmaple::RealNumType best_blength);
1507
1515 template <const cmaple::StateType num_states>
1516 void addLessInfoSeqReplacingMLTree(std::stack<cmaple::Index>& node_stack_aLRT,
1517 cmaple::RealNumType& lh_diff,
1518 PhyloNode& node,
1519 const cmaple::Index node_index,
1520 const cmaple::Index parent_index);
1521
1527 std::string exportNodeString(const bool binary,
1528 const cmaple::NumSeqsType node_vec_index,
1529 const bool show_branch_supports);
1530
1541 bool readTree(std::istream& tree_stream);
1542
1548 bool isComplete();
1549
1554 void updateModelByAln();
1555
1561 template <const cmaple::StateType num_states>
1562 void updateModelLhAfterLoading();
1563
1567 std::map<std::string, NumSeqsType> initMapSeqNameIndex();
1568
1575 void remarkExistingSeqs();
1576
1586 NumSeqsType markAnExistingSeq(
1587 const std::string& seq_name,
1588 const std::map<std::string, NumSeqsType>& map_name_index);
1589
1593 void resetSeqAdded();
1594
1601 void attachAlnModel(Alignment* aln, ModelBase* model);
1602
1608 std::string exportNewick(const bool binary, const bool show_branch_supports);
1609
1614 template <const cmaple::StateType num_states>
1615 void updateZeroBlength(const cmaple::Index index,
1616 PhyloNode& node,
1617 std::stack<cmaple::Index>& node_stack);
1618
1626 template <const cmaple::StateType num_states>
1627 void updatePartialLh(std::stack<cmaple::Index>& node_stack);
1628
1635 template <const cmaple::StateType num_states>
1636 void seekSamplePlacement(const cmaple::Index start_node_index,
1637 const cmaple::NumSeqsType seq_name_index,
1638 const std::unique_ptr<SeqRegions>& sample_regions,
1639 cmaple::Index& selected_node_index,
1640 cmaple::RealNumType& best_lh_diff,
1641 bool& is_mid_branch,
1642 cmaple::RealNumType& best_up_lh_diff,
1643 cmaple::RealNumType& best_down_lh_diff,
1644 cmaple::Index& best_child_index);
1645
1652 template <const cmaple::StateType num_states>
1653 void seekSubTreePlacement(
1654 cmaple::Index& best_node_index,
1655 cmaple::RealNumType& best_lh_diff,
1656 bool& is_mid_branch,
1657 cmaple::RealNumType& best_up_lh_diff,
1658 cmaple::RealNumType& best_down_lh_diff,
1659 cmaple::Index& best_child_index,
1660 const bool short_range_search,
1661 const cmaple::Index child_node_index,
1662 cmaple::RealNumType&
1663 removed_blength); //, bool search_subtree_placement = true,
1664 // SeqRegions* sample_regions = NULL);
1665
1671 template <const cmaple::StateType num_states>
1672 void placeNewSampleMidBranch(const cmaple::Index& selected_node_index,
1673 std::unique_ptr<SeqRegions>& sample,
1674 const cmaple::NumSeqsType seq_name_index,
1675 const cmaple::RealNumType best_lh_diff);
1676
1682 template <const cmaple::StateType num_states>
1683 void placeNewSampleAtNode(const cmaple::Index selected_node_index,
1684 std::unique_ptr<SeqRegions>& sample,
1685 const cmaple::NumSeqsType seq_name_index,
1686 const cmaple::RealNumType best_lh_diff,
1687 const cmaple::RealNumType best_up_lh_diff,
1688 const cmaple::RealNumType best_down_lh_diff,
1689 const cmaple::Index best_child_index);
1690
1697 template <const cmaple::StateType num_states>
1698 void applyOneSPR(const cmaple::Index subtree_index,
1699 PhyloNode& subtree,
1700 const cmaple::Index best_node_index,
1701 const bool is_mid_branch,
1702 const cmaple::RealNumType branch_length,
1703 const cmaple::RealNumType best_lh_diff);
1704
1711 template <const cmaple::StateType num_states>
1712 void refreshAllLhs(bool avoid_using_upper_lr_lhs = false);
1713
1719 void resetSPRFlags(const bool update_outdated,
1720 const bool n_outdated);
1721
1728 template <const cmaple::StateType num_states>
1729 cmaple::RealNumType improveEntireTree(bool short_range_search);
1730
1737 template <const cmaple::StateType num_states>
1738 cmaple::PositionType optimizeBranchIter();
1739
1744 template <const cmaple::StateType num_states>
1745 cmaple::RealNumType estimateBranchLength(
1746 const std::unique_ptr<SeqRegions>& parent_regions,
1747 const std::unique_ptr<SeqRegions>& child_regions);
1748
1753 template <const cmaple::StateType num_states>
1754 cmaple::RealNumType estimateBranchLengthWithCheck(
1755 const std::unique_ptr<SeqRegions>& upper_lr_regions,
1756 const std::unique_ptr<SeqRegions>& lower_regions,
1757 const cmaple::RealNumType current_blength);
1758
1763 template <const cmaple::StateType num_states>
1764 cmaple::RealNumType calculateSamplePlacementCost(
1765 const std::unique_ptr<SeqRegions>& parent_regions,
1766 const std::unique_ptr<SeqRegions>& child_regions,
1767 const cmaple::RealNumType blength);
1768
1773 template <const cmaple::StateType num_states>
1774 cmaple::RealNumType calculateSubTreePlacementCost(
1775 const std::unique_ptr<SeqRegions>& parent_regions,
1776 const std::unique_ptr<SeqRegions>& child_regions,
1777 const cmaple::RealNumType blength);
1778
1784 template <const cmaple::StateType num_states>
1785 void updateLowerLh(cmaple::RealNumType& total_lh,
1786 std::unique_ptr<SeqRegions>& new_lower_lh,
1787 PhyloNode& node,
1788 const std::unique_ptr<SeqRegions>& lower_lh_1,
1789 const std::unique_ptr<SeqRegions>& lower_lh_2,
1790 const cmaple::Index neighbor_1_index,
1791 PhyloNode& neighbor_1,
1792 const cmaple::Index neighbor_2_index,
1793 PhyloNode& neighbor_2,
1794 const cmaple::PositionType& seq_length);
1795
1803 template <const cmaple::StateType num_states>
1804 void updateLowerLhAvoidUsingUpperLRLh(
1805 cmaple::RealNumType& total_lh,
1806 std::unique_ptr<SeqRegions>& new_lower_lh,
1807 PhyloNode& node,
1808 const std::unique_ptr<SeqRegions>& lower_lh_1,
1809 const std::unique_ptr<SeqRegions>& lower_lh_2,
1810 const cmaple::Index neighbor_1_index,
1811 PhyloNode& neighbor_1,
1812 const cmaple::Index neighbor_2_index,
1813 PhyloNode& neighbor_2,
1814 const cmaple::PositionType& seq_length);
1815
1821 template <const cmaple::StateType num_states>
1822 void computeLhContribution(cmaple::RealNumType& total_lh,
1823 std::unique_ptr<SeqRegions>& new_lower_lh,
1824 PhyloNode& node,
1825 const std::unique_ptr<SeqRegions>& lower_lh_1,
1826 const std::unique_ptr<SeqRegions>& lower_lh_2,
1827 const cmaple::Index neighbor_1_index,
1828 PhyloNode& neighbor_1,
1829 const cmaple::Index neighbor_2_index,
1830 PhyloNode& neighbor_2,
1831 const cmaple::PositionType& seq_length);
1832
1836 template <void (Tree::*task)(cmaple::RealNumType&,
1837 std::unique_ptr<SeqRegions>&,
1838 PhyloNode&,
1839 const std::unique_ptr<SeqRegions>&,
1840 const std::unique_ptr<SeqRegions>&,
1841 const cmaple::Index,
1842 PhyloNode&,
1843 const cmaple::Index,
1844 PhyloNode&,
1845 const cmaple::PositionType&)>
1846 cmaple::RealNumType performDFS();
1847
1852 template <const cmaple::StateType num_states>
1853 void updateModelParams();
1854
1858 template <
1859 void (Tree::*task)(PhyloNode&, const cmaple::Index, const cmaple::Index)>
1860 void performDFSAtLeave();
1861
1865 bool keepTraversing(const RealNumType& best_lh_diff,
1866 const RealNumType& lh_diff_at_node,
1867 const bool& strict_stop_seeking_placement_subtree,
1868 const std::unique_ptr<UpdatingNode>& updating_node,
1869 const int& failure_limit_subtree,
1870 const RealNumType& thresh_log_lh_subtree,
1871 const bool able2traverse = true);
1872
1873 // NHANLT: Debug aLRT
1874 // void log_current(std::stack<cmaple::Index>& node_stack_aLRT);
1875};
1876
1885std::ostream& operator<<(std::ostream& out_stream, cmaple::Tree& tree);
1886
1889std::istream& operator>>(std::istream& in_stream, cmaple::Tree& tree);
1890
1894template <const StateType num_states>
1895void cmaple::Tree::refreshAllLhs(bool avoid_using_upper_lr_lhs) {
1896 assert(aln);
1897 assert(model);
1898 assert(cumulative_rate);
1899
1900 // 1. update all the lower lhs along the tree
1901 if (avoid_using_upper_lr_lhs) {
1902 performDFS<&cmaple::Tree::updateLowerLhAvoidUsingUpperLRLh<num_states>>();
1903 } else {
1904 performDFS<&cmaple::Tree::updateLowerLh<num_states>>();
1905 }
1906
1907 // 2. update all the non-lower lhs along the tree
1908 refreshAllNonLowerLhs<num_states>();
1909}
1910
1911template <const StateType num_states>
1912RealNumType cmaple::Tree::improveEntireTree(bool short_range_search) {
1913 assert(aln);
1914 assert(model);
1915 assert(cumulative_rate);
1916 assert(nodes.size() > 0);
1917
1918 // start from the root
1919 std::stack<Index> node_stack;
1920 node_stack.push(Index(root_vector_index, TOP));
1921
1922 // dummy variables
1923 RealNumType total_improvement = 0;
1924 PositionType num_nodes = 0;
1925 PositionType count_node_1K = 0;
1926
1927 // traverse downward the tree
1928 while (!node_stack.empty()) {
1929 // pick the top node from the stack
1930 Index index = node_stack.top();
1931 node_stack.pop();
1932 PhyloNode& node = nodes[index.getVectorIndex()];
1933 // MiniIndex mini_index = index.getMiniIndex();
1934
1935 // add all children of the current nodes to the stack for further traversing
1936 // later
1937 /*for (Index neighbor_index:node.getNeighborIndexes(mini_index))
1938 node_stack.push(neighbor_index);*/
1939 assert(index.getMiniIndex() == TOP);
1940 if (node.isInternal()) {
1941 node_stack.push(node.getNeighborIndex(RIGHT));
1942 node_stack.push(node.getNeighborIndex(LEFT));
1943 }
1944
1945 // only process outdated node to avoid traversing the same part of the tree
1946 // multiple times
1947 if (node.isOutdated() && node.getSPRCount() <= 5) {
1948 node.setOutdated(false);
1949
1950 // if checkEachSPR:
1951 // root=node
1952 // while root.up!=None:
1953 // root=root.up
1954 // #print("Pre-SPR tree: "+createBinaryNewick(root))
1955 // oldTreeLK=calculateTreeLikelihood(root,mutMatrix,checkCorrectness=True)
1956 // #print("Pre-SPR tree likelihood: "+str(oldTreeLK))
1957 // reCalculateAllGenomeLists(root,mutMatrix, checkExistingAreCorrect=True)
1958
1959 // do SPR moves to improve the tree
1960 RealNumType improvement =
1961 improveSubTree<num_states>(index, node, short_range_search);
1962
1963 // if checkEachSPR:
1964 // #print(" apparent improvement "+str(improvement))
1965 // root=node
1966 // while root.up!=None:
1967 // root=root.up
1968 // #print("Post-SPR tree: "+createBinaryNewick(root))
1969 // newTreeLK=calculateTreeLikelihood(root,mutMatrix)
1970 // reCalculateAllGenomeLists(root,mutMatrix,
1971 // checkExistingAreCorrect=True)
1972 // #print("Post-SPR tree likelihood: "+str(newTreeLK))
1973 // if newTreeLK-oldTreeLK < improvement-1.0:
1974 // print("In startTopologyUpdates, LK score of improvement
1975 // "+str(newTreeLK)+" - "+str(oldTreeLK)+" =
1976 // "+str(newTreeLK-oldTreeLK)+" is less than //what is
1977 // supposd to be "+str(improvement))
1978 // exit()
1979 //
1980
1981 // update total_improvement
1982 total_improvement += improvement;
1983
1984 // NHANLT: LOGS FOR DEBUGGING
1985 /*if (params->debug && improvement > 0)
1986 std::cout << num_nodes << ": " << std::setprecision(20) <<
1987 total_improvement << std::endl;*/
1988
1989 // Show log every 1000 nodes
1990 ++num_nodes;
1991 if (cmaple::verbose_mode >= cmaple::VB_MED && num_nodes - count_node_1K >= 1000) {
1992 std::cout << "Processed topology for " << convertIntToString(num_nodes)
1993 << " nodes." << std::endl;
1994 count_node_1K = num_nodes;
1995 }
1996 }
1997 }
1998
1999 return total_improvement;
2000}
2001
2002template <const StateType num_states>
2003void cmaple::Tree::updateModelParams() {
2004 assert(aln);
2005 assert(model);
2006
2007 // perform a DFS -> at each leaf, update the pesudoCount of the model based on
2008 // the sequence of that leaf
2009 performDFSAtLeave<&cmaple::Tree::updatePesudoCountModel<num_states>>();
2010
2011 // update model params based on the pseudo count
2012 if (model->updateMutationMatEmpirical()) {
2013 computeCumulativeRate();
2014 }
2015}
2016
2017template <const StateType num_states>
2018void cmaple::Tree::seekSamplePlacement(
2019 const Index start_node_index,
2020 const NumSeqsType seq_name_index,
2021 const std::unique_ptr<SeqRegions>& sample_regions,
2022 Index& selected_node_index,
2023 RealNumType& best_lh_diff,
2024 bool& is_mid_branch,
2025 RealNumType& best_up_lh_diff,
2026 RealNumType& best_down_lh_diff,
2027 Index& best_child_index) {
2028 assert(sample_regions && sample_regions->size() > 0);
2029 assert(seq_name_index >= 0);
2030 assert(aln);
2031 assert(model);
2032 assert(cumulative_rate);
2033
2034 // init variables
2035 // output variables
2036 selected_node_index = start_node_index;
2037 // dummy variables
2038 RealNumType lh_diff_mid_branch = 0;
2039 RealNumType lh_diff_at_node = 0;
2040 PositionType seq_length = static_cast<PositionType>(aln->ref_seq.size());
2041 // stack of nodes to examine positions
2042 std::stack<TraversingNode> extended_node_stack;
2043 extended_node_stack.push(TraversingNode(start_node_index, 0, MIN_NEGATIVE));
2044
2045 // recursively examine positions for placing the new sample
2046 while (!extended_node_stack.empty()) {
2047 TraversingNode current_extended_node = std::move(extended_node_stack.top());
2048 extended_node_stack.pop();
2049 const NumSeqsType current_node_vec =
2050 current_extended_node.getIndex().getVectorIndex();
2051 PhyloNode& current_node = nodes[current_node_vec];
2052 const bool& is_internal = current_node.isInternal();
2053
2054 // NHANLT: debug
2055 // if (current_node->next && ((current_node->next->neighbor &&
2056 // current_node->next->neighbor->seq_name == "25")
2057 // || (current_node->next->next->neighbor &&
2058 // current_node->next->next->neighbor->seq_name ==
2059 // "25")))
2060 // cout << "fdsfsd";
2061
2062 // if the current node is a leaf AND the new sample/sequence is strictly
2063 // less informative than the current node
2064 // -> add the new sequence into the list of minor sequences of the current
2065 // node + stop seeking the placement
2066 if ((!is_internal) &&
2067 (current_node.getPartialLh(TOP)->compareWithSample(
2068 *sample_regions, seq_length, aln) == 1)) {
2069 current_node.addLessInfoSeqs(seq_name_index);
2070 selected_node_index = Index();
2071 return;
2072 }
2073
2074 const RealNumType current_node_blength = current_node.getUpperLength();
2075
2076 // 1. try first placing as a descendant of the mid-branch point of the
2077 // branch above the current node
2078 if (root_vector_index != current_node_vec && current_node_blength > 0) {
2079 examineSamplePlacementMidBranch<num_states>(
2080 selected_node_index, current_node.getMidBranchLh(), best_lh_diff,
2081 is_mid_branch, lh_diff_mid_branch, current_extended_node,
2082 sample_regions);
2083 }
2084 // otherwise, don't consider mid-branch point
2085 else {
2086 lh_diff_mid_branch = MIN_NEGATIVE;
2087 }
2088
2089 // 2. try to place as descendant of the current node (this is skipped if the
2090 // node has top branch length 0 and so is part of a polytomy).
2091 if (root_vector_index == current_node_vec || current_node_blength > 0) {
2092 examineSamplePlacementAtNode<num_states>(
2093 selected_node_index, current_node.getTotalLh(), best_lh_diff,
2094 is_mid_branch, lh_diff_at_node, lh_diff_mid_branch, best_up_lh_diff,
2095 best_down_lh_diff, best_child_index, current_extended_node,
2096 sample_regions);
2097 } else {
2098 lh_diff_at_node = current_extended_node.getLhDiff();
2099 }
2100
2101 // keep trying to place at children nodes, unless the number of attempts has
2102 // reaches the failure limit
2103 const short int failure_count = current_extended_node.getFailureCount();
2104 if ((params->strict_stop_seeking_placement_sample &&
2105 failure_count <= params->failure_limit_sample &&
2106 lh_diff_at_node > (best_lh_diff - params->thresh_log_lh_sample)) ||
2107 (!params->strict_stop_seeking_placement_sample &&
2108 (failure_count <= params->failure_limit_sample ||
2109 lh_diff_at_node > (best_lh_diff - params->thresh_log_lh_sample)))) {
2110 /*for (Index neighbor_index:current_node.getNeighborIndexes(TOP))
2111 extended_node_stack.push(TraversingNode(neighbor_index,
2112 current_extended_node.getFailureCount(), lh_diff_at_node));*/
2113 if (is_internal) {
2114 extended_node_stack.push(
2115 TraversingNode(current_node.getNeighborIndex(RIGHT), failure_count,
2116 lh_diff_at_node));
2117 extended_node_stack.push(
2118 TraversingNode(current_node.getNeighborIndex(LEFT), failure_count,
2119 lh_diff_at_node));
2120 }
2121 }
2122 }
2123
2124 // exploration of the tree is finished, and we are left with the node found so
2125 // far with the best appending likelihood cost. Now we explore placement just
2126 // below this node for more fine-grained placement within its descendant
2127 // branches.
2128 best_down_lh_diff = MIN_NEGATIVE;
2129 best_child_index = Index();
2130
2131 // if best position so far is the descendant of a node -> explore further at
2132 // its children
2133 if (!is_mid_branch) {
2134 finetuneSamplePlacementAtNode<num_states>(
2135 nodes[selected_node_index.getVectorIndex()], best_down_lh_diff,
2136 best_child_index, sample_regions);
2137 }
2138}
2139
2140template <const StateType num_states>
2141void cmaple::Tree::placeNewSampleAtNode(const Index selected_node_index,
2142 std::unique_ptr<SeqRegions>& sample,
2143 const NumSeqsType seq_name_index,
2144 const RealNumType best_lh_diff,
2145 const RealNumType best_up_lh_diff,
2146 const RealNumType best_down_lh_diff,
2147 const Index best_child_index) {
2148 // dummy variables
2149 RealNumType best_child_lh = MIN_NEGATIVE;
2150 RealNumType best_child_blength_split = 0;
2151 RealNumType best_parent_lh;
2152 RealNumType best_parent_blength_split = 0;
2153 RealNumType best_root_blength = -1;
2154 const RealNumType threshold_prob = params->threshold_prob;
2155 std::unique_ptr<SeqRegions> best_parent_regions = nullptr;
2156 std::unique_ptr<SeqRegions> best_child_regions = nullptr;
2157
2158 assert(selected_node_index.getMiniIndex() == TOP);
2159 assert(sample && sample->size() > 0);
2160 assert(seq_name_index >= 0);
2161 assert(aln);
2162 assert(model);
2163 assert(cumulative_rate);
2164
2165 const NumSeqsType selected_node_vec_index =
2166 selected_node_index.getVectorIndex();
2167 PhyloNode& selected_node = nodes[selected_node_vec_index];
2168
2169 // place the new sample as a descendant of an existing node
2170 if (best_child_index.getMiniIndex() != UNDEFINED) {
2171 PhyloNode& best_child = nodes[best_child_index.getVectorIndex()];
2172 best_child_lh = best_down_lh_diff;
2173 assert(best_child_index.getMiniIndex() == TOP);
2174 best_child_blength_split =
2175 0.5 * best_child.getUpperLength(); // best_child->length;
2176 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2177 getPartialLhAtNode(best_child.getNeighborIndex(
2178 TOP)); // best_child->neighbor->getPartialLhAtNode(aln,
2179 // model, threshold_prob);
2180 const std::unique_ptr<SeqRegions>& lower_regions = best_child.getPartialLh(
2181 TOP); // ->getPartialLhAtNode(aln, model, threshold_prob);
2182 // best_child_regions = new SeqRegions(best_child->mid_branch_lh);
2183 // SeqRegions best_child_mid_clone =
2184 // SeqRegions(best_child.getMidBranchLh());
2185 best_child_regions =
2186 nullptr; // cmaple::make_unique<SeqRegions>(std::move(best_child_mid_clone));
2187
2188 // try a shorter split
2189 // tryShorterBranch<&cmaple::Tree::calculateSamplePlacementCost<num_states>>(best_child->length,
2190 // best_child_regions, sample, upper_left_right_regions, lower_regions,
2191 // best_child_lh, best_child_blength_split, default_blength, true);
2192 tryShorterBranch<num_states,
2193 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2194 best_child.getUpperLength(), best_child_regions, sample,
2195 upper_left_right_regions, lower_regions, best_child_lh,
2196 best_child_blength_split, default_blength, true);
2197
2198 // Delay cloning SeqRegions
2199 if (!best_child_regions) {
2200 best_child_regions =
2201 cmaple::make_unique<SeqRegions>(SeqRegions(best_child.getMidBranchLh()));
2202 }
2203 }
2204
2205 // if node is root, try to place as sibling of the current root.
2206 RealNumType old_root_lh = MIN_NEGATIVE;
2207 if (root_vector_index == selected_node_vec_index) {
2208 /*old_root_lh = selected_node->getPartialLhAtNode(aln, model,
2209 threshold_prob)->computeAbsoluteLhAtRoot(num_states, model); SeqRegions*
2210 lower_regions = selected_node->getPartialLhAtNode(aln, model,
2211 threshold_prob);*/
2212 const std::unique_ptr<SeqRegions>& lower_regions =
2213 selected_node.getPartialLh(TOP);
2214 old_root_lh = lower_regions->computeAbsoluteLhAtRoot<num_states>(
2215 model, cumulative_base);
2216
2217 // merge 2 lower vector into one
2218 RealNumType new_root_lh = lower_regions->mergeTwoLowers<num_states>(
2219 best_parent_regions, default_blength, *sample, default_blength, aln,
2220 model, cumulative_rate, threshold_prob, true);
2221
2222 new_root_lh += best_parent_regions->computeAbsoluteLhAtRoot<num_states>(
2223 model, cumulative_base);
2224 best_parent_lh = new_root_lh;
2225
2226 // try shorter branch lengths
2227 best_root_blength = default_blength;
2228 tryShorterBranchAtRoot<num_states>(sample, lower_regions,
2229 best_parent_regions, best_root_blength,
2230 best_parent_lh, default_blength);
2231
2232 // update best_parent_lh (taking into account old_root_lh)
2233 best_parent_lh -= old_root_lh;
2234 }
2235 // selected_node is not root
2236 else {
2237 best_parent_lh = best_up_lh_diff;
2238 best_parent_blength_split =
2239 0.5 * selected_node.getUpperLength(); // selected_node->length;
2240 /*SeqRegions* upper_left_right_regions =
2241 selected_node->neighbor->getPartialLhAtNode(aln, model, threshold_prob);
2242 SeqRegions* lower_regions = selected_node->getPartialLhAtNode(aln, model,
2243 threshold_prob); best_parent_regions = new
2244 SeqRegions(selected_node->mid_branch_lh);*/
2245 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2246 getPartialLhAtNode(selected_node.getNeighborIndex(TOP));
2247 const std::unique_ptr<SeqRegions>& lower_regions =
2248 selected_node.getPartialLh(TOP);
2249 // SeqRegions seq_regions_clone =
2250 // SeqRegions(selected_node.getMidBranchLh());
2251 best_parent_regions =
2252 nullptr; // cmaple::make_unique<SeqRegions>(std::move(seq_regions_clone));
2253
2254 // try a shorter split
2255 // tryShorterBranch<&cmaple::Tree::calculateSamplePlacementCost<num_states>>(selected_node->length,
2256 // best_parent_regions, sample, upper_left_right_regions, lower_regions,
2257 // best_parent_lh, best_parent_blength_split, default_blength, false);
2258 tryShorterBranch<num_states,
2259 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2260 selected_node.getUpperLength(), best_parent_regions, sample,
2261 upper_left_right_regions, lower_regions, best_parent_lh,
2262 best_parent_blength_split, default_blength, false);
2263
2264 // Delay cloning SeqRegions
2265 if (!best_parent_regions) {
2266 best_parent_regions = cmaple::make_unique<SeqRegions>(
2267 SeqRegions(selected_node.getMidBranchLh()));
2268 }
2269 }
2270
2271 // if the best placement is below the selected_node => add an internal node
2272 // below the selected_node
2273 if (best_child_lh >= best_parent_lh && best_child_lh >= best_lh_diff) {
2274 assert(best_child_index.getMiniIndex() == TOP);
2275 PhyloNode& best_child = nodes[best_child_index.getVectorIndex()];
2276 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2277 getPartialLhAtNode(best_child.getNeighborIndex(
2278 TOP)); // best_child->neighbor->getPartialLhAtNode(aln,
2279 // model, threshold_prob);
2280
2281 // Estimate the length for the new branch
2282 RealNumType best_length = default_blength;
2283 estimateLengthNewBranch<
2284 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2285 best_child_lh, best_child_regions, sample, best_length, max_blength,
2286 min_blength, false);
2287
2288 // create new internal node and append child to it
2289 // connectNewSample2Branch(sample, seq_name_index, best_child,
2290 // best_child_blength_split, best_child->length - best_child_blength_split,
2291 // best_length, best_child_regions, upper_left_right_regions);
2292 connectNewSample2Branch<num_states>(
2293 sample, seq_name_index, best_child_index, best_child,
2294 best_child_blength_split,
2295 best_child.getUpperLength() - best_child_blength_split, best_length,
2296 best_child_regions, upper_left_right_regions);
2297 }
2298 // otherwise, add new parent to the selected_node
2299 else {
2300 // new parent is actually part of a polytomy since best placement is exactly
2301 // at the node
2302 if (best_lh_diff >= best_parent_lh) {
2303 best_root_blength = -1;
2304 best_parent_blength_split = -1;
2305 best_parent_lh = best_lh_diff;
2306 /*if (best_parent_regions) delete best_parent_regions;
2307 best_parent_regions = NULL;*/
2308 best_parent_regions = nullptr;
2309
2310 if (root_vector_index == selected_node_vec_index) {
2311 // selected_node->getPartialLhAtNode(aln, model,
2312 // threshold_prob)->mergeTwoLowers<num_states>(best_parent_regions,
2313 // -1, *sample, default_blength, aln, model, threshold_prob);
2314 selected_node.getPartialLh(TOP)->mergeTwoLowers<num_states>(
2315 best_parent_regions, -1, *sample, default_blength, aln, model,
2316 cumulative_rate, threshold_prob);
2317 } else {
2318 // best_parent_regions = new SeqRegions(selected_node->total_lh);
2319 SeqRegions seq_regions_clone = SeqRegions(selected_node.getTotalLh());
2320 best_parent_regions =
2321 cmaple::make_unique<SeqRegions>(std::move(seq_regions_clone));
2322 }
2323 }
2324
2325 // add parent to the root
2326 if (root_vector_index == selected_node_vec_index) {
2327 // now try different lengths for right branch
2328 best_parent_lh += old_root_lh;
2329 RealNumType best_length2 = default_blength;
2330 const std::unique_ptr<SeqRegions>& lower_regions =
2331 selected_node.getPartialLh(
2332 TOP); // ->getPartialLhAtNode(aln, model, threshold_prob);
2333
2334 estimateLengthNewBranchAtRoot<num_states>(
2335 sample, lower_regions, best_parent_regions, best_length2,
2336 best_parent_lh, best_root_blength, min_blength, false);
2337
2338 // update best_parent_lh (taking into account old_root_lh)
2339 best_parent_lh -= old_root_lh;
2340
2341 // add new sample to a new root
2342 connectNewSample2Root<num_states>(
2343 sample, seq_name_index, selected_node_index, selected_node,
2344 best_root_blength, best_length2, best_parent_regions);
2345 }
2346 // add parent to non-root node
2347 else {
2348 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2349 getPartialLhAtNode(selected_node.getNeighborIndex(
2350 TOP)); // selected_node->neighbor->getPartialLhAtNode(aln,
2351 // model, threshold_prob);
2352
2353 // now try different lengths for the new branch
2354 RealNumType best_length = default_blength;
2355 estimateLengthNewBranch<
2356 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2357 best_parent_lh, best_parent_regions, sample, best_length, max_blength,
2358 min_blength, false);
2359
2360 // now create new internal node and append child to it
2361 RealNumType down_distance = best_parent_blength_split;
2362 RealNumType top_distance =
2363 selected_node.getUpperLength() -
2364 down_distance; // selected_node->length - down_distance;
2365 if (best_parent_blength_split < 0) {
2366 down_distance = -1;
2367 top_distance =
2368 selected_node.getUpperLength(); // selected_node->length;
2369
2370 /*if (selected_node->total_lh) delete selected_node->total_lh;
2371 selected_node->total_lh = NULL;
2372
2373 if (selected_node->mid_branch_lh) delete selected_node->mid_branch_lh;
2374 selected_node->mid_branch_lh = NULL;*/
2375 selected_node.setTotalLh(nullptr);
2376 selected_node.setMidBranchLh(nullptr);
2377
2378 // node.furtherMidNodes=None
2379 }
2380 connectNewSample2Branch<num_states>(
2381 sample, seq_name_index, selected_node_index, selected_node,
2382 top_distance, down_distance, best_length, best_parent_regions,
2383 upper_left_right_regions);
2384 }
2385 }
2386
2387 // delete best_parent_regions and best_child_regions
2388 /*if (best_parent_regions)
2389 delete best_parent_regions;
2390 if (best_child_regions)
2391 delete best_child_regions;*/
2392}
2393
2394template <const StateType num_states>
2395void cmaple::Tree::placeNewSampleMidBranch(const Index& selected_node_index,
2396 std::unique_ptr<SeqRegions>& sample,
2397 const NumSeqsType seq_name_index,
2398 const RealNumType best_lh_diff) {
2399 // dummy variables
2400 // const RealNumType threshold_prob = params->threshold_prob;
2401 std::unique_ptr<SeqRegions> best_child_regions = nullptr;
2402
2403 // selected_node->neighbor->getPartialLhAtNode(aln, model, threshold_prob);
2404 // const MiniIndex seleted_node_mini_index =
2405 // selected_node_index.getMiniIndex();
2406 assert(selected_node_index.getMiniIndex() == TOP);
2407 assert(sample && sample->size() > 0);
2408 assert(seq_name_index >= 0);
2409 assert(aln);
2410 assert(model);
2411 assert(cumulative_rate);
2412
2413 PhyloNode& selected_node = nodes[selected_node_index.getVectorIndex()];
2414 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2415 getPartialLhAtNode(selected_node.getNeighborIndex(TOP));
2416 RealNumType best_split_lh = best_lh_diff;
2417 const RealNumType selected_node_blength = selected_node.getUpperLength();
2418 RealNumType best_branch_length_split = 0.5 * selected_node_blength;
2419 best_child_regions =
2420 nullptr; // cmaple::make_unique<SeqRegions>(SeqRegions(selected_node.getMidBranchLh()));
2421 // selected_node->getPartialLhAtNode(aln, model, threshold_prob);
2422 const std::unique_ptr<SeqRegions>& lower_regions =
2423 selected_node.getPartialLh(TOP);
2424
2425 // try different positions on the existing branch
2426 bool found_new_split =
2427 tryShorterBranch<num_states,
2428 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2429 selected_node_blength, best_child_regions, sample,
2430 upper_left_right_regions, lower_regions, best_split_lh,
2431 best_branch_length_split, default_blength, true);
2432
2433 if (!found_new_split) {
2434 // try on the second branch
2435 found_new_split = tryShorterBranch<
2436 num_states, &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2437 selected_node_blength, best_child_regions, sample,
2438 upper_left_right_regions, lower_regions, best_split_lh,
2439 best_branch_length_split, default_blength, false);
2440
2441 if (found_new_split) {
2442 best_branch_length_split =
2443 selected_node_blength - best_branch_length_split;
2444 }
2445 }
2446
2447 // Delay cloning SeqRegions
2448 if (!best_child_regions) {
2449 best_child_regions = cmaple::make_unique<SeqRegions>(
2450 SeqRegions(selected_node.getMidBranchLh()));
2451 }
2452
2453 // now try different lengths for the new branch
2454 RealNumType best_blength = default_blength;
2455 estimateLengthNewBranch<
2456 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2457 best_split_lh, best_child_regions, sample, best_blength, max_blength,
2458 min_blength, false);
2459
2460 // create new internal node and append child to it
2461 connectNewSample2Branch<num_states>(
2462 sample, seq_name_index, selected_node_index, selected_node,
2463 best_branch_length_split,
2464 selected_node_blength - best_branch_length_split, best_blength,
2465 best_child_regions, upper_left_right_regions);
2466}
2468} // namespace cmaple
Definition alignment.h:9
Definition modelbase.h:17
Definition model.h:9
Definition tree.h:12
void doPlacement(std::ostream &out_stream=std::cout)
Do placement (using stepwise addition) to build an initial tree. Model parameters (if not fixed) will...
void load(const std::string &tree_filename, const bool fixed_blengths=false)
Load a tree from a (bifurcating or multifurcating) tree (with/without branch lengths) in NEWICK forma...
void applySPR(const TreeSearchType tree_search_type, const bool shallow_tree_search, std::ostream &out_stream=std::cout)
Apply SPR moves to optimize the tree.
TreeSearchType
Definition tree.h:17
@ UNKNOWN_TREE_SEARCH
Definition tree.h:22
@ FAST_TREE_SEARCH
Definition tree.h:18
@ NORMAL_TREE_SEARCH
Definition tree.h:19
@ ACCURATE_TREE_SEARCH
Definition tree.h:21
void computeBranchSupport(const int num_threads=1, const int num_replicates=1000, const double epsilon=0.1, const bool allow_replacing_ML_tree=true, std::ostream &out_stream=std::cout)
Compute branch supports (aLRT-SH) of the current tree, which may or may not contain all taxa in the a...
TreeType
Definition tree.h:28
@ MUL_TREE
Definition tree.h:30
@ BIN_TREE
Definition tree.h:29
@ UNKNOWN_TREE
Definition tree.h:31
std::string exportNewick(const TreeType tree_type=BIN_TREE, const bool show_branch_supports=false)
Export the phylogenetic tree to a string in NEWICK format.
RealNumType computeLh()
Compute the log likelihood of the current tree, which may or may not contain all taxa in the alignmen...
void changeModel(Model *model)
Change the substitution model.
void autoProceedMAPLE(const TreeSearchType tree_search_type=NORMAL_TREE_SEARCH, const bool shallow_tree_search=false, std::ostream &out_stream=std::cout)
Infer a phylogenetic tree by executing doPlacement(), applySPR(), and optimizeBranch()
void changeAln(Alignment *aln)
Change the alignment.
Tree(Alignment *aln, Model *model, std::istream &tree_stream, const bool fixed_blengths=false, std::unique_ptr< cmaple::Params > &&params=nullptr)
Constructor from a stream of a (bifurcating or multifurcating) tree (with/without branch lengths in N...
Tree(Alignment *aln, Model *model, const std::string &tree_filename="", const bool fixed_blengths=false, std::unique_ptr< cmaple::Params > &&params=nullptr)
Constructor from an optional (bifurcating or multifurcating) tree (with/without branch lengths in NEW...
void load(std::istream &tree_stream, const bool fixed_blengths=false)
Load a tree from a stream of a (bifurcating or multifurcating) tree (with/without branch lengths) in ...
void optimizeBranch(std::ostream &out_stream=std::cout)
Optimize the branch lengths of the tree.
std::istream & operator>>(std::istream &in_stream, cmaple::Tree &tree)
Customized >> operator to read a tree from a stream.
std::ostream & operator<<(std::ostream &out_stream, cmaple::Tree &tree)
Customized << operator to output the tree string in a (bifurcating) NEWICK format to a stream.