SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
edit_distance_unbanded.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <algorithm>
16 #include <seqan3/std/algorithm>
17 #include <bitset>
18 #include <seqan3/std/ranges>
19 #include <utility>
20 
28 
29 namespace seqan3::detail
30 {
34 template <typename derived_t, typename edit_traits>
35 class edit_distance_unbanded_max_errors_policy :
37  edit_traits
39 {
40 protected:
41  static_assert(edit_traits::use_max_errors, "This policy assumes that edit_traits::use_max_errors is true.");
42 
44  friend derived_t;
45 
49  edit_distance_unbanded_max_errors_policy() noexcept = default;
50  edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy const &) noexcept
51  = default;
52  edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy &&) noexcept
53  = default;
54  edit_distance_unbanded_max_errors_policy & operator=(edit_distance_unbanded_max_errors_policy const &) noexcept
55  = default;
56  edit_distance_unbanded_max_errors_policy & operator=(edit_distance_unbanded_max_errors_policy &&) noexcept
57  = default;
58  ~edit_distance_unbanded_max_errors_policy() noexcept = default;
60 
61  using typename edit_traits::word_type;
62  using typename edit_traits::score_type;
63 
69  score_type max_errors{255};
71  size_t last_block{0u};
73  word_type last_score_mask{};
75 
81  void max_errors_init(size_t block_count) noexcept
82  {
83  derived_t * self = static_cast<derived_t *>(this);
84 
85  max_errors = -get<align_cfg::min_score>(self->config).score;
86 
87  assert(max_errors >= score_type{0});
88 
89  if (std::ranges::empty(self->query)) // [[unlikely]]
90  {
91  last_block = 0u;
92  self->score_mask = 0u;
93  last_score_mask = self->score_mask;
94  return;
95  }
96 
97  last_block = block_count - 1u;
98  last_score_mask = self->score_mask;
99 
100  // local_max_errors either stores the maximal number of _score (me.max_errors) or the needle size minus one. It
101  // is used for the mask computation and setting the initial score (the minus one is there because of the Ukkonen
102  // trick).
103  size_t const local_max_errors = std::min<size_t>(max_errors, std::ranges::size(self->query) - 1u);
104  self->score_mask = word_type{1u} << (local_max_errors % self->word_size);
105  last_block = std::min(local_max_errors / self->word_size, last_block);
106  self->_score = local_max_errors + 1u;
107  }
108 
110  bool is_last_active_cell_within_last_row() const noexcept
111  {
112  derived_t const * self = static_cast<derived_t const *>(this);
113  return (self->score_mask == this->last_score_mask) && (this->last_block == self->vp.size() - 1u);
114  }
115 
117  bool prev_last_active_cell() noexcept
118  {
119  derived_t * self = static_cast<derived_t *>(this);
120  self->score_mask >>= 1u;
121  if (self->score_mask != 0u)
122  return true;
123 
124  if constexpr (edit_traits::is_global)
125  {
126  if (last_block == 0u) // [[unlikely]]
127  return false;
128  }
129 
130  last_block--;
131 
132  self->score_mask = word_type{1u} << (edit_traits::word_size - 1u);
133  return true;
134  }
135 
137  void next_last_active_cell() noexcept
138  {
139  derived_t * self = static_cast<derived_t *>(this);
140  self->score_mask <<= 1u;
141  if (self->score_mask)
142  return;
143 
144  self->score_mask = 1u;
145  last_block++;
146  }
147 
151  bool update_last_active_cell() noexcept
152  {
153  derived_t * self = static_cast<derived_t *>(this);
154  // update the last active cell
155  while (!(self->_score <= max_errors))
156  {
157  self->advance_score(self->vn[last_block], self->vp[last_block], self->score_mask);
158  if (!prev_last_active_cell())
159  {
160  // prev_last_active_cell = false can only happen for global alignments
161  assert(edit_traits::is_global);
162  // we abort here if we don't need to compute a matrix, because the continued
163  // computation can't produce an alignment.
164  return !edit_traits::compute_matrix;
165  }
166  }
167 
168  if (is_last_active_cell_within_last_row())
169  {
170  assert(self->_score <= max_errors);
171 
172  if constexpr(edit_traits::is_semi_global)
173  self->update_best_score();
174 
175  return self->on_hit();
176  }
177  else
178  {
179  next_last_active_cell();
180  self->advance_score(self->vp[last_block], self->vn[last_block], self->score_mask);
181  }
182 
183  return false;
184  }
186 
188  static size_t max_rows(word_type const score_mask, unsigned const last_block,
189  score_type const score, score_type const max_errors) noexcept
190  {
191  using score_matrix_type = typename edit_traits::score_matrix_type;
192  return score_matrix_type::max_rows(score_mask,
193  last_block,
194  score,
195  max_errors);
196  }
197 };
198 
202 template <typename derived_t, typename edit_traits>
203 class edit_distance_unbanded_global_policy :
205  edit_traits
207 {
208 protected:
209  static_assert(edit_traits::is_global || edit_traits::is_semi_global,
210  "This policy assumes that edit_traits::is_global or edit_traits::is_semi_global is true.");
211 
213  friend derived_t;
214 
218  edit_distance_unbanded_global_policy() noexcept = default;
219  edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy const &) noexcept
220  = default;
221  edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy &&) noexcept
222  = default;
223  edit_distance_unbanded_global_policy & operator=(edit_distance_unbanded_global_policy const &) noexcept
224  = default;
225  edit_distance_unbanded_global_policy & operator=(edit_distance_unbanded_global_policy &&) noexcept
226  = default;
227  ~edit_distance_unbanded_global_policy() noexcept = default;
229 
231  using typename edit_traits::score_type;
232 
241  score_type _best_score{};
243 
249  void score_init() noexcept
250  {
251  derived_t const * self = static_cast<derived_t const *>(this);
252  _best_score = self->_score;
253  }
254 
256  bool is_valid() const noexcept
257  {
258  [[maybe_unused]] derived_t const * self = static_cast<derived_t const *>(this);
259  // This condition uses the observation that after each computation of a column, _score has either the initial
260  // value of the first row (i.e. the entire column consist of INF's), has the value _score = max_errors + 1
261  // (there exists a cell within the column that has value <= max_errors, but is not on the last row) or _score <=
262  // max_errors (the score of the last active cell is <= max_errors)
263  if constexpr(edit_traits::use_max_errors)
264  return _best_score <= self->max_errors;
265 
266  // When not using max_errors there is always a valid alignment, because the last row will always be updated and
267  // with it the score.
268  return true;
269  }
270 
272  alignment_coordinate invalid_coordinate() const noexcept
273  {
274  derived_t const * self = static_cast<derived_t const *>(this);
275  return {column_index_type{std::ranges::size(self->database)}, row_index_type{std::ranges::size(self->query)}};
276  }
277 
279  void update_best_score() noexcept
280  {
281  derived_t const * self = static_cast<derived_t const *>(this);
282  _best_score = self->_score;
283  }
284 
286  size_t end_positions_first() const noexcept
287  {
288  derived_t const * self = static_cast<derived_t const *>(this);
289  return std::ranges::size(self->database);
290  }
292 
293 public:
300  std::optional<score_type> score() const noexcept
301  {
302  derived_t const * self = static_cast<derived_t const *>(this);
303  static_assert(edit_traits::compute_score, "score() can only be computed if you specify the result type within "
304  "your alignment config.");
305  if (!self->is_valid())
306  return std::nullopt;
307 
308  return -_best_score;
309  }
310 
313  alignment_coordinate end_positions() const noexcept
314  {
315  derived_t const * self = static_cast<derived_t const *>(this);
316  static_assert(edit_traits::compute_end_positions, "end_positions() can only be computed if you specify the "
317  "result type within your alignment config.");
318  if (!self->is_valid())
319  return self->invalid_coordinate();
320 
321  column_index_type const first{self->end_positions_first()};
322  row_index_type const second{std::ranges::size(self->query)};
323  return {first, second};
324  }
326 };
327 
329 template <typename derived_t, typename edit_traits>
330 class edit_distance_unbanded_semi_global_policy :
331  public edit_distance_unbanded_global_policy<derived_t, edit_traits>
332 {
333 protected:
334  static_assert(edit_traits::is_semi_global, "This policy assumes that edit_traits::is_semi_global is true.");
335 
337  friend derived_t;
338 
342  edit_distance_unbanded_semi_global_policy() noexcept = default;
343  edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy const &) noexcept
344  = default;
345  edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy &&) noexcept
346  = default;
347  edit_distance_unbanded_semi_global_policy & operator=(edit_distance_unbanded_semi_global_policy const &) noexcept
348  = default;
349  edit_distance_unbanded_semi_global_policy & operator=(edit_distance_unbanded_semi_global_policy &&) noexcept
350  = default;
351  ~edit_distance_unbanded_semi_global_policy() noexcept = default;
353 
355  using base_t = edit_distance_unbanded_global_policy<derived_t, edit_traits>;
357  using database_iterator = typename edit_traits::database_iterator;
358  using base_t::_best_score;
360 
372  database_iterator _best_score_col{};
374 
380  void score_init() noexcept
381  {
382  derived_t const * self = static_cast<derived_t const *>(this);
383  base_t::score_init();
384  _best_score_col = self->database_it_end;
385  }
386 
388  void update_best_score() noexcept
389  {
390  derived_t const * self = static_cast<derived_t const *>(this);
391  // we have to make sure that update_best_score is only called after a score update within the last row.
392  if constexpr(edit_traits::use_max_errors)
393  {
394  assert(std::ranges::empty(self->query) || self->is_last_active_cell_within_last_row());
395  }
396 
397  _best_score_col = (self->_score <= _best_score) ? self->database_it : _best_score_col;
398  _best_score = (self->_score <= _best_score) ? self->_score : _best_score;
399  }
400 
402  size_t end_positions_first() const noexcept
403  {
404  derived_t const * self = static_cast<derived_t const *>(this);
405  // offset == 0u is a special case if database sequence is empty, because in this case the best column is zero.
406  size_t offset = std::ranges::empty(self->database) ? 0u : 1u;
407  return std::ranges::distance(std::ranges::begin(self->database), _best_score_col) + offset;
408  }
410 };
411 
415 template <typename derived_t, typename edit_traits>
416 class edit_distance_unbanded_score_matrix_policy :
418  edit_traits
420 {
421 protected:
422  static_assert(edit_traits::compute_score_matrix,
423  "This policy assumes that edit_traits::compute_score_matrix is true.");
424 
426  friend derived_t;
427 
431  edit_distance_unbanded_score_matrix_policy() noexcept = default;
432  edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy const &) noexcept
433  = default;
434  edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy &&) noexcept
435  = default;
436  edit_distance_unbanded_score_matrix_policy & operator=(edit_distance_unbanded_score_matrix_policy const &) noexcept
437  = default;
438  edit_distance_unbanded_score_matrix_policy & operator=(edit_distance_unbanded_score_matrix_policy &&) noexcept
439  = default;
440  ~edit_distance_unbanded_score_matrix_policy() noexcept = default;
442 
443  using typename edit_traits::score_matrix_type;
444 
450  score_matrix_type _score_matrix{};
452 
465  void score_matrix_init()
466  {
467  derived_t const * self = static_cast<derived_t const *>(this);
468 
469  _score_matrix = score_matrix_type{std::ranges::size(self->query) + 1u};
470  _score_matrix.reserve(std::ranges::size(self->database) + 1u);
471  }
473 
474 public:
482  score_matrix_type const & score_matrix() const noexcept
483  {
484  static_assert(edit_traits::compute_score_matrix, "score_matrix() can only be computed if you specify the "
485  "result type within your alignment config.");
486  return _score_matrix;
487  }
489 };
490 
494 template <typename derived_t, typename edit_traits>
495 class edit_distance_unbanded_trace_matrix_policy :
497  edit_traits
499 {
500 protected:
501  static_assert(edit_traits::compute_trace_matrix,
502  "This policy assumes that edit_traits::compute_trace_matrix is true.");
503 
505  friend derived_t;
506 
510  edit_distance_unbanded_trace_matrix_policy() noexcept = default;
511  edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy const &) noexcept
512  = default;
513  edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy &&) noexcept
514  = default;
515  edit_distance_unbanded_trace_matrix_policy & operator=(edit_distance_unbanded_trace_matrix_policy const &) noexcept
516  = default;
517  edit_distance_unbanded_trace_matrix_policy & operator=(edit_distance_unbanded_trace_matrix_policy &&) noexcept
518  = default;
519  ~edit_distance_unbanded_trace_matrix_policy() noexcept = default;
521 
522  using typename edit_traits::word_type;
523  using typename edit_traits::trace_matrix_type;
524  using typename edit_traits::alignment_result_type;
525 
531  std::vector<word_type> hp{};
534 
536  trace_matrix_type _trace_matrix{};
538 
551  void trace_matrix_init(size_t block_count)
552  {
553  derived_t const * self = static_cast<derived_t const *>(this);
554 
555  _trace_matrix = trace_matrix_type{std::ranges::size(self->query) + 1u};
556  _trace_matrix.reserve(std::ranges::size(self->database) + 1u);
557 
558  hp.resize(block_count, 0u);
559  db.resize(block_count, 0u);
560  }
562 
563 public:
569  trace_matrix_type const & trace_matrix() const noexcept
570  {
571  static_assert(edit_traits::compute_trace_matrix, "trace_matrix() can only be computed if you specify the "
572  "result type within your alignment config.");
573  return _trace_matrix;
574  }
575 
578  alignment_coordinate begin_positions() const noexcept
579  {
580  derived_t const * self = static_cast<derived_t const *>(this);
581  static_assert(edit_traits::compute_begin_positions, "begin_positions() can only be computed if you specify the "
582  "result type within your alignment config.");
583  if (!self->is_valid())
584  return self->invalid_coordinate();
585 
586  alignment_coordinate const back = self->end_positions();
587  auto trace_path = self->trace_matrix().trace_path(back);
588  auto trace_path_it = std::ranges::begin(trace_path);
589  std::ranges::advance(trace_path_it, std::ranges::end(trace_path));
590  matrix_coordinate coordinate = trace_path_it.coordinate();
591  return {column_index_type{coordinate.col}, row_index_type{coordinate.row}};
592  }
593 
596  auto alignment() const noexcept
597  {
598  using alignment_t = std::remove_cvref_t<decltype(std::declval<alignment_result_type &>().alignment())>;
599 
600  derived_t const * self = static_cast<derived_t const *>(this);
601  static_assert(edit_traits::compute_sequence_alignment, "alignment() can only be computed if you specify the "
602  "result type within your alignment config.");
603 
604  if (!self->is_valid())
605  return alignment_t{};
606 
607  alignment_coordinate const back = self->end_positions();
608  aligned_sequence_builder builder{self->database, self->query};
609  auto trace_path = self->trace_matrix().trace_path(back);
610  return builder(trace_path).alignment;
611  }
613 };
614 
618 template <typename value_t>
619 class proxy_reference
620 {
621 public:
625  proxy_reference() noexcept = default;
626  proxy_reference(proxy_reference const &) noexcept = default;
627  proxy_reference(proxy_reference &&) noexcept = default;
628  proxy_reference & operator=(proxy_reference const &) noexcept = default;
629  proxy_reference & operator=(proxy_reference &&) noexcept = default;
630  ~proxy_reference() = default;
631 
633  proxy_reference(value_t & t) noexcept
634  : ptr(std::addressof(t))
635  {}
636 
637  proxy_reference(value_t &&) = delete;
638 
640  template <typename other_value_t>
642  requires std::convertible_to<other_value_t, value_t>
644  proxy_reference & operator=(other_value_t && u) noexcept
645  {
646  get() = std::forward<other_value_t>(u);
647  return *this;
648  }
650 
652  value_t & get() const noexcept
653  {
654  assert(ptr != nullptr);
655  return *ptr;
656  }
657 
659  operator value_t & () const noexcept
660  {
661  return get();
662  }
663 
664 private:
666  value_t * ptr{nullptr};
667 };
668 
680 template <std::ranges::viewable_range database_t,
681  std::ranges::viewable_range query_t,
682  typename align_config_t,
683  typename edit_traits>
684 class edit_distance_unbanded :
686 // Hide this section in doxygen, because it messes up the inheritance.
687  public edit_distance_base<
688  edit_traits::use_max_errors,
689  edit_distance_unbanded_max_errors_policy,
690  edit_traits,
691  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
692  public edit_distance_base<
693  edit_traits::is_global,
694  edit_distance_unbanded_global_policy,
695  edit_traits,
696  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
697  public edit_distance_base<
698  edit_traits::is_semi_global,
699  edit_distance_unbanded_semi_global_policy,
700  edit_traits,
701  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
702  public edit_distance_base<
703  edit_traits::compute_score_matrix,
704  edit_distance_unbanded_score_matrix_policy,
705  edit_traits,
706  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
707  public edit_distance_base<
708  edit_traits::compute_trace_matrix,
709  edit_distance_unbanded_trace_matrix_policy,
710  edit_traits,
711  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>
713 {
714 public:
715  using typename edit_traits::word_type;
716  using typename edit_traits::score_type;
717  using typename edit_traits::database_type;
718  using typename edit_traits::query_type;
719  using typename edit_traits::align_config_type;
720  using edit_traits::word_size;
721 
722 private:
724  template <typename other_derived_t, typename other_edit_traits>
725  friend class edit_distance_unbanded_max_errors_policy;
727  template <typename other_derived_t, typename other_edit_traits>
728  friend class edit_distance_unbanded_global_policy;
730  template <typename other_derived_t, typename other_edit_traits>
731  friend class edit_distance_unbanded_semi_global_policy;
733  template <typename other_derived_t, typename other_edit_traits>
734  friend class edit_distance_unbanded_score_matrix_policy;
736  template <typename other_derived_t, typename other_edit_traits>
737  friend class edit_distance_unbanded_trace_matrix_policy;
738 
739  using typename edit_traits::database_iterator;
740  using typename edit_traits::query_alphabet_type;
741  using typename edit_traits::alignment_result_type;
742  using edit_traits::use_max_errors;
743  using edit_traits::is_semi_global;
744  using edit_traits::is_global;
745  using edit_traits::compute_score;
746  using edit_traits::compute_end_positions;
747  using edit_traits::compute_begin_positions;
748  using edit_traits::compute_sequence_alignment;
749  using edit_traits::compute_score_matrix;
750  using edit_traits::compute_trace_matrix;
751  using edit_traits::compute_matrix;
752  using typename edit_traits::score_matrix_type;
753  using typename edit_traits::trace_matrix_type;
754 
756  database_t database;
758  query_t query;
760  align_config_t config;
761 
763  static constexpr word_type hp0 = is_global ? 1u : 0u;
765  static constexpr word_type hn0 = 0u;
767  static constexpr word_type vp0 = ~word_type{0u};
769  static constexpr word_type vn0 = 0u;
770 
772  score_type _score{};
779  word_type score_mask{0u};
790  std::vector<word_type> bit_masks{};
791 
793  database_iterator database_it{};
795  database_iterator database_it_end{};
796 
798  struct compute_state_trace_matrix
799  {
801  proxy_reference<word_type> db{};
802  };
803 
805  struct compute_state : enable_state_t<compute_trace_matrix, compute_state_trace_matrix>
806  {
809 
811  word_type b{};
813  word_type d0{};
815  hp_type hp{};
817  word_type hn{};
819  proxy_reference<word_type> vp{};
821  proxy_reference<word_type> vn{};
823  word_type carry_d0{};
825  word_type carry_hp{hp0};
827  word_type carry_hn{};
828  };
829 
831  void add_state()
832  {
833  if constexpr(!use_max_errors && compute_score_matrix)
834  this->_score_matrix.add_column(vp, vn);
835 
836  if constexpr(!use_max_errors && compute_trace_matrix)
837  this->_trace_matrix.add_column(this->hp, this->db, vp);
838 
839  if constexpr(use_max_errors && compute_matrix)
840  {
841  size_t max_rows = this->max_rows(score_mask, this->last_block, _score, this->max_errors);
842  if constexpr(compute_score_matrix)
843  this->_score_matrix.add_column(vp, vn, max_rows);
844 
845  if constexpr(compute_trace_matrix)
846  this->_trace_matrix.add_column(this->hp, this->db, vp, max_rows);
847  }
848  }
849 
850 public:
855  edit_distance_unbanded() = delete;
856  edit_distance_unbanded(edit_distance_unbanded const &) = default;
857  edit_distance_unbanded(edit_distance_unbanded &&) = default;
858  edit_distance_unbanded & operator=(edit_distance_unbanded const &) = default;
859  edit_distance_unbanded & operator=(edit_distance_unbanded &&) = default;
860  ~edit_distance_unbanded() = default;
861 
868  edit_distance_unbanded(database_t _database,
869  query_t _query,
870  align_config_t _config,
871  edit_traits const & SEQAN3_DOXYGEN_ONLY(_traits)) :
872  database{std::forward<database_t>(_database)},
873  query{std::forward<query_t>(_query)},
874  config{std::forward<align_config_t>(_config)},
875  _score{static_cast<score_type>(std::ranges::size(query))},
876  database_it{ranges::begin(database)},
877  database_it_end{ranges::end(database)}
878  {
879  static constexpr size_t alphabet_size_ = alphabet_size<query_alphabet_type>;
880 
881  size_t const block_count = (std::ranges::size(query) - 1u + word_size) / word_size;
882  score_mask = word_type{1u} << ((std::ranges::size(query) - 1u + word_size) % word_size);
883 
884  this->score_init();
885  if constexpr(use_max_errors)
886  this->max_errors_init(block_count);
887 
888  if constexpr(compute_score_matrix)
889  this->score_matrix_init();
890 
891  if constexpr(compute_trace_matrix)
892  this->trace_matrix_init(block_count);
893 
894  vp.resize(block_count, vp0);
895  vn.resize(block_count, vn0);
896  bit_masks.resize((alphabet_size_ + 1u) * block_count, 0u);
897 
898  // encoding the letters as bit-vectors
899  for (size_t j = 0u; j < std::ranges::size(query); j++)
900  {
901  size_t const i = block_count * seqan3::to_rank(query[j]) + j / word_size;
902  bit_masks[i] |= word_type{1u} << (j % word_size);
903  }
904 
905  add_state();
906  }
908 
909 private:
911  template <bool with_carry>
912  static void compute_step(compute_state & state) noexcept
913  {
914  word_type x, t;
915  assert(state.carry_d0 <= 1u);
916  assert(state.carry_hp <= 1u);
917  assert(state.carry_hn <= 1u);
918 
919  x = state.b | state.vn;
920  t = state.vp + (x & state.vp) + state.carry_d0;
921 
922  state.d0 = (t ^ state.vp) | x;
923  state.hn = state.vp & state.d0;
924  state.hp = state.vn | ~(state.vp | state.d0);
925 
926  if constexpr(with_carry)
927  state.carry_d0 = (state.carry_d0 != 0u) ? t <= state.vp : t < state.vp;
928 
929  x = (state.hp << 1u) | state.carry_hp;
930  state.vn = x & state.d0;
931  state.vp = (state.hn << 1u) | ~(x | state.d0) | state.carry_hn;
932 
933  if constexpr(with_carry)
934  {
935  state.carry_hp = state.hp >> (word_size - 1u);
936  state.carry_hn = state.hn >> (word_size - 1u);
937  }
938  }
939 
941  template <bool with_carry>
942  void compute_kernel(compute_state & state, size_t const block_offset, size_t const current_block) noexcept
943  {
944  state.vp = proxy_reference<word_type>{this->vp[current_block]};
945  state.vn = proxy_reference<word_type>{this->vn[current_block]};
946  if constexpr(compute_trace_matrix)
947  {
948  state.hp = proxy_reference<word_type>{this->hp[current_block]};
949  state.db = proxy_reference<word_type>{this->db[current_block]};
950  }
951  state.b = bit_masks[block_offset + current_block];
952 
953  compute_step<with_carry>(state);
954  if constexpr(compute_trace_matrix)
955  state.db = ~(state.b ^ state.d0);
956  }
957 
959  void advance_score(word_type P, word_type N, word_type mask) noexcept
960  {
961  if ((P & mask) != word_type{0u})
962  _score++;
963  else if ((N & mask) != word_type{0u})
964  _score--;
965  }
966 
968  bool on_hit() noexcept
969  {
970  // TODO: call external on_hit functor
971  return false;
972  }
973 
975  inline bool small_patterns();
976 
978  inline bool large_patterns();
979 
981  inline void compute_empty_query_sequence()
982  {
983  assert(std::ranges::empty(query));
984 
985  bool abort_computation = false;
986 
987  for (; database_it != database_it_end; ++database_it)
988  {
989  if constexpr(is_global)
990  ++_score;
991  else // is_semi_global
992  this->update_best_score();
993 
994  // call on_hit
995  if constexpr(use_max_errors)
996  abort_computation = on_hit();
997 
998  this->add_state();
999  if (abort_computation)
1000  break;
1001  }
1002  }
1003 
1005  void compute()
1006  {
1007  // limit search width for prefix search (if no matrix needs to be computed)
1008  if constexpr(use_max_errors && is_global && !compute_matrix)
1009  {
1010  // Note: For global alignments we know that the database can only be max_length long to have a score less
1011  // than or equal max_errors in the last cell.
1012  //
1013  // The following matrix shows a minimal value for each entry (because a diagonal must be +0 or +1, each
1014  // diagonal is at least the value of the initial value in the first row)
1015  // 0 1 2 3 4 5 6...
1016  // 1 0 1 2 3 4 5...
1017  // ...
1018  // m ... 3 2 1 0 1 2 3 4 5
1019  // Thus, after |query| + max_errors entries the score will always be higher than max_errors.
1020  size_t const max_length = std::ranges::size(query) + this->max_errors + 1u;
1021  size_t const haystack_length = std::min(std::ranges::size(database), max_length);
1022  database_it_end -= std::ranges::size(database) - haystack_length;
1023  }
1024 
1025  // distinguish between the version for needles not longer than
1026  // one machine word and the version for longer needles
1027  // A special cases is if the second sequence is empty (vp.size() == 0u).
1028  if (vp.size() == 0u) // [[unlikely]]
1029  compute_empty_query_sequence();
1030  else if (vp.size() == 1u)
1031  small_patterns();
1032  else
1033  large_patterns();
1034 
1035  if constexpr(is_global)
1036  this->update_best_score();
1037  }
1038 
1039 public:
1044  template <typename callback_t>
1045  void operator()([[maybe_unused]] size_t const idx, callback_t && callback)
1046  {
1047  using traits_type = alignment_configuration_traits<align_config_t>;
1048  using result_value_type = typename alignment_result_value_type_accessor<alignment_result_type>::type;
1049 
1050  compute();
1051 
1052  // First cache the begin and end positions if enabled by the edit distance traits.
1053  // Note, that they might be activated even if the user did not configure them, but in order to
1054  // compute for example the alignment both information are required and enabled internally.
1055  auto cached_end_positions = this->invalid_coordinate();
1056  auto cached_begin_positions = this->invalid_coordinate();
1057 
1058  if constexpr (compute_end_positions)
1059  cached_end_positions = this->end_positions();
1060 
1061  if constexpr (compute_begin_positions && !compute_sequence_alignment)
1062  {
1063  static_assert(compute_end_positions, "End positions required to compute the begin positions.");
1064  cached_begin_positions = this->begin_positions();
1065  }
1066 
1067  // Fill the result value type. Note we need to ask what was enabled on the user side in order to store
1068  // the correct information in the alignment result type. This information is stored in the alignment
1069  // configuration traits and not in the edit distance traits.
1070  result_value_type res_vt{};
1071 
1072  if constexpr (traits_type::output_sequence1_id)
1073  res_vt.sequence1_id = idx;
1074 
1075  if constexpr (traits_type::output_sequence2_id)
1076  res_vt.sequence2_id = idx;
1077 
1078  if constexpr (traits_type::compute_score)
1079  res_vt.score = this->score().value_or(matrix_inf<score_type>);
1080 
1081  if constexpr (traits_type::compute_sequence_alignment)
1082  {
1083  if (this->is_valid())
1084  {
1085  aligned_sequence_builder builder{this->database, this->query};
1086  auto trace_res = builder(this->trace_matrix().trace_path(cached_end_positions));
1087  res_vt.alignment = std::move(trace_res.alignment);
1088  cached_begin_positions.first = trace_res.first_sequence_slice_positions.first;
1089  cached_begin_positions.second = trace_res.second_sequence_slice_positions.first;
1090  }
1091  }
1092 
1093  if constexpr (traits_type::compute_end_positions)
1094  res_vt.end_positions = std::move(cached_end_positions);
1095 
1096  if constexpr (traits_type::compute_begin_positions)
1097  res_vt.begin_positions = std::move(cached_begin_positions);
1098 
1099  callback(alignment_result_type{std::move(res_vt)});
1100  }
1101 };
1102 
1103 template <typename database_t, typename query_t, typename align_config_t, typename traits_t>
1104 bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::small_patterns()
1105 {
1106  bool abort_computation = false;
1107 
1108  // computing the blocks
1109  while (database_it != database_it_end)
1110  {
1111  compute_state state{};
1112  size_t const block_offset = seqan3::to_rank((query_alphabet_type) *database_it);
1113 
1114  compute_kernel<false>(state, block_offset, 0u);
1115  advance_score(state.hp, state.hn, score_mask);
1116 
1117  // semi-global without max_errors guarantees that the score stays within the last row
1118  if constexpr(is_semi_global && !use_max_errors)
1119  this->update_best_score();
1120 
1121  // updating the last active cell
1122  if constexpr(use_max_errors)
1123  abort_computation = this->update_last_active_cell();
1124 
1125  add_state();
1126  ++database_it;
1127  if (abort_computation)
1128  return true;
1129  }
1130 
1131  return false;
1132 }
1133 
1134 template <typename database_t, typename query_t, typename align_config_t, typename traits_t>
1135 bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::large_patterns()
1136 {
1137  bool abort_computation = false;
1138 
1139  while (database_it != database_it_end)
1140  {
1141  compute_state state{};
1142  size_t const block_offset = vp.size() * seqan3::to_rank((query_alphabet_type) *database_it);
1143 
1144  size_t block_count = vp.size();
1145  if constexpr(use_max_errors)
1146  block_count = this->last_block + 1;
1147 
1148  // compute each block in the current column; carries between blocks will be propagated.
1149  for (size_t current_block = 0u; current_block < block_count; current_block++)
1150  compute_kernel<true>(state, block_offset, current_block);
1151 
1152  advance_score(state.hp, state.hn, score_mask);
1153 
1154  // semi-global without max_errors guarantees that the score stays within the last row
1155  if constexpr(is_semi_global && !use_max_errors)
1156  this->update_best_score();
1157 
1158  if constexpr(use_max_errors)
1159  {
1160  // if the last active cell reached the end within the current block we have to compute the next block.
1161  bool additional_block = score_mask >> (word_size - 1u);
1162  bool reached_last_block = this->last_block + 1u == vp.size();
1163  // If there is no next block we skip the computation.
1164  if (reached_last_block)
1165  additional_block = false;
1166 
1167  if (additional_block)
1168  {
1169  size_t const current_block = this->last_block + 1u;
1170  // this might not be necessary, but carry_d0 = 1u might have an influence on the result of vn and vp.
1171  vp[current_block] = vp0;
1172  vn[current_block] = vn0;
1173  compute_kernel<false>(state, block_offset, current_block);
1174  }
1175 
1176  // updating the last active cell
1177  abort_computation = this->update_last_active_cell();
1178  }
1179 
1180  add_state();
1181  ++database_it;
1182 
1183  if (abort_computation)
1184  return true;
1185  }
1186 
1187  return false;
1188 }
1189 
1196 template <typename database_t, typename query_t, typename config_t, typename traits_t>
1197 edit_distance_unbanded(database_t && database, query_t && query, config_t config, traits_t)
1198  -> edit_distance_unbanded<database_t, query_t, config_t, traits_t>;
1200 
1201 } // namespace seqan3::detail
Provides seqan3::detail::alignment_coordinate.
Provides seqan3::alignment_result.
T begin(T... args)
Provides seqan3::configuration and utility functions.
Forwards for seqan3::edit_distance_unbanded related types.
Provides seqan3::detail::edit_distance_score_matrix_full.
Provides seqan3::detail::edit_distance_trace_matrix_full.
T end(T... args)
T forward(T... args)
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:146
@ alignment
The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
typename decltype(detail::back(list_t{}))::type back
Return the last type from the type list.
Definition: traits.hpp:264
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:150
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:70
Provides seqan3::detail::matrix.
T min(T... args)
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:434
SeqAn specific customisations in the standard namespace.
Adaptations of concepts from the Ranges TS.