149 template <
typename stream_type,
150 typename seq_legal_alph_type,
bool seq_qual_combined,
158 qual_type & qualities);
160 template <
typename stream_type,
168 qual_type && qualities);
170 template <
typename stream_type,
171 typename seq_legal_alph_type,
172 typename ref_seqs_type,
173 typename ref_ids_type,
176 typename offset_type,
177 typename ref_seq_type,
178 typename ref_id_type,
179 typename ref_offset_type,
186 typename tag_dict_type,
187 typename e_value_type,
188 typename bit_score_type>
191 ref_seqs_type & ref_seqs,
196 offset_type & offset,
197 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
198 ref_id_type & ref_id,
199 ref_offset_type & ref_offset,
201 cigar_type & cigar_vector,
205 tag_dict_type & tag_dict,
206 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
207 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score));
209 template <
typename stream_type,
210 typename header_type,
213 typename ref_seq_type,
214 typename ref_id_type,
218 typename tag_dict_type,
219 typename e_value_type,
220 typename bit_score_type>
223 header_type && header,
227 int32_t
const offset,
228 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
229 ref_id_type && ref_id,
236 tag_dict_type && tag_dict,
237 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
238 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score));
248 alignment_file_header<> default_header{};
251 bool ref_info_present_in_header{
false};
260 template <
typename t>
261 decltype(
auto) default_or(t && v) const noexcept
263 return std::forward<t>(v);
266 using format_sam_base::read_field;
268 template <
typename stream_view_type,
typename value_type>
270 stream_view_type && stream_view,
273 template <
typename stream_view_type>
274 void read_field(stream_view_type && stream_view, sam_tag_dictionary & target);
276 template <
typename stream_it_t, std::ranges::forward_range field_type>
277 void write_range_or_asterisk(stream_it_t & stream_it, field_type && field_value);
279 template <
typename stream_it_t>
280 void write_range_or_asterisk(stream_it_t & stream_it,
char const *
const field_value);
282 template <
typename stream_it_t>
283 void write_tag_fields(stream_it_t & stream, sam_tag_dictionary
const & tag_dict,
char const separator);
287 template <
typename stream_type,
288 typename seq_legal_alph_type,
bool seq_qual_combined,
296 qual_type & qualities)
300 if constexpr (seq_qual_combined)
304 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
305 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
307 for (
auto sit = tmp_qual.
begin(), dit = std::ranges::begin(
sequence); sit != tmp_qual.
end(); ++sit, ++dit)
308 get<1>(*dit).assign_char(*sit);
313 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
314 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
317 if constexpr (!detail::decays_to_ignore_v<seq_type>)
318 if (std::ranges::distance(
sequence) == 0)
319 throw parse_error{
"The sequence information must not be empty."};
320 if constexpr (!detail::decays_to_ignore_v<id_type>)
321 if (std::ranges::distance(
id) == 0)
322 throw parse_error{
"The id information must not be empty."};
329 template <
typename stream_type,
337 qual_type && qualities)
348 default_or(qualities),
365 template <
typename stream_type,
366 typename seq_legal_alph_type,
367 typename ref_seqs_type,
368 typename ref_ids_type,
371 typename offset_type,
372 typename ref_seq_type,
373 typename ref_id_type,
374 typename ref_offset_type,
381 typename tag_dict_type,
382 typename e_value_type,
383 typename bit_score_type>
386 ref_seqs_type & ref_seqs,
391 offset_type & offset,
392 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
393 ref_id_type & ref_id,
394 ref_offset_type & ref_offset,
396 cigar_type & cigar_vector,
400 tag_dict_type & tag_dict,
401 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
402 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
404 static_assert(detail::decays_to_ignore_v<ref_offset_type> ||
405 detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
406 "The ref_offset must be a specialisation of std::optional.");
412 int32_t ref_offset_tmp{};
413 std::ranges::range_value_t<decltype(header.
ref_ids())> ref_id_tmp{};
414 [[maybe_unused]] int32_t offset_tmp{};
415 [[maybe_unused]] int32_t soft_clipping_end{};
417 [[maybe_unused]] int32_t ref_length{0}, seq_length{0};
421 if (is_char<'@'>(*std::ranges::begin(stream_view)))
423 read_header(stream_view, header, ref_seqs);
425 if (std::ranges::begin(stream_view) == std::ranges::end(stream_view))
431 read_field(field_view,
id);
433 uint16_t flag_integral{};
434 read_field(field_view, flag_integral);
437 read_field(field_view, ref_id_tmp);
438 check_and_assign_ref_id(
ref_id, ref_id_tmp, header, ref_seqs);
440 read_field(field_view, ref_offset_tmp);
443 if (ref_offset_tmp == -1)
445 else if (ref_offset_tmp > -1)
447 else if (ref_offset_tmp < -1)
448 throw format_error{
"No negative values are allowed for field::ref_offset."};
450 read_field(field_view,
mapq);
454 if constexpr (!detail::decays_to_ignore_v<align_type> || !detail::decays_to_ignore_v<cigar_type>)
456 if (!is_char<'*'>(*std::ranges::begin(stream_view)))
458 std::tie(tmp_cigar_vector, ref_length, seq_length) = parse_cigar(field_view);
459 transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
464 std::ranges::next(std::ranges::begin(field_view));
469 detail::consume(field_view);
476 if constexpr (!detail::decays_to_ignore_v<mate_type>)
478 std::ranges::range_value_t<decltype(header.
ref_ids())> tmp_mate_ref_id{};
479 read_field(field_view, tmp_mate_ref_id);
481 if (tmp_mate_ref_id ==
"=")
483 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
486 check_and_assign_ref_id(get<0>(
mate), ref_id_tmp, header, ref_seqs);
490 check_and_assign_ref_id(get<0>(
mate), tmp_mate_ref_id, header, ref_seqs);
494 read_field(field_view, tmp_pnext);
497 get<1>(
mate) = --tmp_pnext;
498 else if (tmp_pnext < 0)
499 throw format_error{
"No negative values are allowed at the mate mapping position."};
502 read_field(field_view, get<2>(
mate));
506 for (
size_t i = 0; i < 3u; ++i)
508 detail::consume(field_view);
514 if (!is_char<'*'>(*std::ranges::begin(stream_view)))
516 auto constexpr is_legal_alph = char_is_valid_for<seq_legal_alph_type>;
519 if (!is_legal_alph(c))
521 "char_is_valid_for<" +
522 detail::type_name_as_string<seq_legal_alph_type> +
523 "> evaluated to false on " +
524 detail::make_printable(c)};
528 if constexpr (detail::decays_to_ignore_v<seq_type>)
530 if constexpr (!detail::decays_to_ignore_v<align_type>)
533 "If you want to read ALIGNMENT but not SEQ, the alignment"
534 " object must store a sequence container at the second (query) position.");
536 if (!tmp_cigar_vector.empty())
539 auto tmp_iter = std::ranges::begin(seq_stream);
540 std::ranges::advance(tmp_iter, offset_tmp);
542 for (; seq_length > 0; --seq_length)
544 get<1>(align).push_back(std::ranges::range_value_t<decltype(get<1>(align))>{}.assign_char(*tmp_iter));
548 std::ranges::advance(tmp_iter, soft_clipping_end);
557 detail::consume(seq_stream);
562 read_field(seq_stream,
seq);
564 if constexpr (!detail::decays_to_ignore_v<align_type>)
566 if (!tmp_cigar_vector.empty())
568 assign_unaligned(get<1>(align),
577 std::ranges::next(std::ranges::begin(field_view));
582 auto const tab_or_end = is_char<'\t'> || is_char<'\r'> || is_char<'\n'>;
585 if constexpr (!detail::decays_to_ignore_v<seq_type> && !detail::decays_to_ignore_v<qual_type>)
587 if (std::ranges::distance(
seq) != 0 && std::ranges::distance(
qual) != 0 &&
588 std::ranges::distance(
seq) != std::ranges::distance(
qual))
590 throw format_error{detail::to_string(
"Sequence length (", std::ranges::distance(
seq),
591 ") and quality length (", std::ranges::distance(
qual),
592 ") must be the same.")};
598 while (is_char<'\t'>(*std::ranges::begin(stream_view)))
600 std::ranges::next(std::ranges::begin(stream_view));
604 detail::consume(stream_view |
views::take_until(!(is_char<'\r'> || is_char<'\n'>)));
610 if constexpr (!detail::decays_to_ignore_v<align_type>)
612 int32_t ref_idx{(ref_id_tmp.empty()) ? -1 : 0};
614 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
616 if (!ref_id_tmp.empty())
618 assert(header.
ref_dict.count(ref_id_tmp) != 0);
619 ref_idx = header.
ref_dict[ref_id_tmp];
623 construct_alignment(align, tmp_cigar_vector, ref_idx, ref_seqs, ref_offset_tmp, ref_length);
626 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
627 std::swap(cigar_vector, tmp_cigar_vector);
631 template <
typename stream_type,
632 typename header_type,
635 typename ref_seq_type,
636 typename ref_id_type,
640 typename tag_dict_type,
641 typename e_value_type,
642 typename bit_score_type>
645 header_type && header,
649 int32_t
const offset,
650 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
651 ref_id_type && ref_id,
658 tag_dict_type && tag_dict,
659 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
660 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score))
678 static_assert((std::ranges::forward_range<seq_type> &&
679 alphabet<std::ranges::range_reference_t<seq_type>>),
680 "The seq object must be a std::ranges::forward_range over "
681 "letters that model seqan3::alphabet.");
683 static_assert((std::ranges::forward_range<id_type> &&
684 alphabet<std::ranges::range_reference_t<id_type>>),
685 "The id object must be a std::ranges::forward_range over "
686 "letters that model seqan3::alphabet.");
688 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
690 static_assert((std::ranges::forward_range<ref_id_type> ||
693 "The ref_id object must be a std::ranges::forward_range "
694 "over letters that model seqan3::alphabet.");
698 static_assert(!detail::decays_to_ignore_v<header_type>,
699 "If you give indices as reference id information the header must also be present.");
703 "The align object must be a std::pair of two ranges whose "
704 "value_type is comparable to seqan3::gap");
707 std::equality_comparable_with<
gap, std::ranges::range_reference_t<decltype(std::get<0>(align))>> &&
708 std::equality_comparable_with<
gap, std::ranges::range_reference_t<decltype(std::get<1>(align))>>),
709 "The align object must be a std::pair of two ranges whose "
710 "value_type is comparable to seqan3::gap");
712 static_assert((std::ranges::forward_range<qual_type> &&
713 alphabet<std::ranges::range_reference_t<qual_type>>),
714 "The qual object must be a std::ranges::forward_range "
715 "over letters that model seqan3::alphabet.");
718 "The mate object must be a std::tuple of size 3 with "
719 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
720 "2) a std::integral or std::optional<std::integral>, and "
721 "3) a std::integral.");
723 static_assert(((std::ranges::forward_range<decltype(std::get<0>(
mate))> ||
729 "The mate object must be a std::tuple of size 3 with "
730 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
731 "2) a std::integral or std::optional<std::integral>, and "
732 "3) a std::integral.");
736 static_assert(!detail::decays_to_ignore_v<header_type>,
737 "If you give indices as mate reference id information the header must also be present.");
740 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
745 if constexpr (!detail::decays_to_ignore_v<header_type> &&
746 !detail::decays_to_ignore_v<ref_id_type> &&
755 if constexpr (std::ranges::contiguous_range<decltype(
ref_id)> &&
756 std::ranges::sized_range<decltype(
ref_id)> &&
757 std::ranges::borrowed_range<decltype(
ref_id)>)
766 "The ref_id type is not convertible to the reference id information stored in the "
767 "reference dictionary of the header object.");
773 throw format_error{detail::to_string(
"The ref_id '",
ref_id,
"' was not in the list of references:",
779 throw format_error{
"The ref_offset object must be an std::integral >= 0."};
784 if constexpr (!detail::decays_to_ignore_v<header_type>)
788 write_header(stream, options, header);
789 header_was_written =
true;
797 detail::fast_ostreambuf_iterator stream_it{*stream.rdbuf()};
798 constexpr
char separator{
'\t'};
800 write_range_or_asterisk(stream_it,
id);
801 *stream_it = separator;
803 stream_it.write_number(
static_cast<uint16_t
>(
flag));
804 *stream_it = separator;
806 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
810 write_range_or_asterisk(stream_it, (header.
ref_ids())[
ref_id]);
815 write_range_or_asterisk(stream_it, (header.
ref_ids())[
ref_id.value()]);
821 write_range_or_asterisk(stream_it,
ref_id);
829 *stream_it = separator;
832 stream_it.write_number(
ref_offset.value_or(-1) + 1);
833 *stream_it = separator;
835 stream_it.write_number(
static_cast<unsigned>(
mapq));
836 *stream_it = separator;
838 if (!std::ranges::empty(cigar_vector))
840 for (
auto & c : cigar_vector)
841 stream_it.write_range(c.to_string());
843 else if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align)))
850 for (
auto chr : get<1>(align))
858 write_range_or_asterisk(stream_it, detail::get_cigar_string(align,
offset, off_end));
865 *stream_it = separator;
869 write_range_or_asterisk(stream_it, (header.
ref_ids())[get<0>(
mate)]);
873 if (get<0>(
mate).has_value())
876 write_range_or_asterisk(stream_it, header.
ref_ids()[get<0>(
mate).value_or(0)]);
882 write_range_or_asterisk(stream_it, get<0>(
mate));
885 *stream_it = separator;
890 stream_it.write_number(get<1>(
mate).value_or(-1) + 1);
891 *stream_it = separator;
895 stream_it.write_number(get<1>(
mate));
896 *stream_it = separator;
899 stream_it.write_number(get<2>(
mate));
900 *stream_it = separator;
902 write_range_or_asterisk(stream_it,
seq);
903 *stream_it = separator;
905 write_range_or_asterisk(stream_it,
qual);
907 write_tag_fields(stream_it, tag_dict, separator);
930 template <
typename stream_view_type,
typename value_type>
932 stream_view_type && stream_view,
936 while (std::ranges::begin(stream_view) != ranges::end(stream_view))
941 if (is_char<','>(*std::ranges::begin(stream_view)))
942 std::ranges::next(std::ranges::begin(stream_view));
964 template <
typename stream_view_type>
965 inline void format_sam::read_field(stream_view_type && stream_view, sam_tag_dictionary & target)
973 std::ranges::next(std::ranges::begin(stream_view));
975 std::ranges::next(std::ranges::begin(stream_view));
976 std::ranges::next(std::ranges::begin(stream_view));
978 std::ranges::next(std::ranges::begin(stream_view));
979 std::ranges::next(std::ranges::begin(stream_view));
986 std::ranges::next(std::ranges::begin(stream_view));
992 read_field(stream_view, tmp);
999 read_field(stream_view, tmp);
1005 target[tag] = stream_view | views::to<std::string>;
1016 std::ranges::next(std::ranges::begin(stream_view));
1017 std::ranges::next(std::ranges::begin(stream_view));
1019 switch (array_value_type_id)
1022 read_sam_dict_vector(target[tag], stream_view, int8_t{});
1025 read_sam_dict_vector(target[tag], stream_view, uint8_t{});
1028 read_sam_dict_vector(target[tag], stream_view, int16_t{});
1031 read_sam_dict_vector(target[tag], stream_view, uint16_t{});
1034 read_sam_dict_vector(target[tag], stream_view, int32_t{});
1037 read_sam_dict_vector(target[tag], stream_view, uint32_t{});
1040 read_sam_dict_vector(target[tag], stream_view,
float{});
1043 throw format_error{
std::string(
"The first character in the numerical ") +
1044 "id of a SAM tag must be one of [cCsSiIf] but '" + array_value_type_id +
1050 throw format_error{
std::string(
"The second character in the numerical id of a "
1051 "SAM tag must be one of [A,i,Z,H,B,f] but '") + type_id +
"' was given."};
1062 template <
typename stream_it_t, std::ranges::forward_range field_type>
1063 inline void format_sam::write_range_or_asterisk(stream_it_t & stream_it, field_type && field_value)
1065 if (std::ranges::empty(field_value))
1071 if constexpr (std::same_as<
std::remove_cvref_t<std::ranges::range_reference_t<field_type>>,
char>)
1072 stream_it.write_range(field_value);
1074 stream_it.write_range(field_value | views::
to_char);
1084 template <typename stream_it_t>
1085 inline
void format_sam::write_range_or_asterisk(stream_it_t & stream_it,
char const * const field_value)
1097 template <
typename stream_it_t>
1098 inline void format_sam::write_tag_fields(stream_it_t & stream_it, sam_tag_dictionary
const & tag_dict,
char const separator)
1100 auto const stream_variant_fn = [&stream_it] (
auto && arg)
1104 if constexpr (std::ranges::input_range<T>)
1108 stream_it.write_range(arg);
1112 if (!std::ranges::empty(arg))
1114 stream_it.write_number(std::to_integer<uint8_t>(*std::ranges::begin(arg)));
1119 stream_it.write_number(std::to_integer<uint8_t>(elem));
1125 if (!std::ranges::empty(arg))
1127 stream_it.write_number(*std::ranges::begin(arg));
1132 stream_it.write_number(elem);
1143 stream_it.write_number(arg);
1147 for (
auto & [tag, variant] : tag_dict)
1149 *stream_it = separator;
1151 char const char0 = tag / 256;
1152 char const char1 = tag % 256;
1157 *stream_it = detail::sam_tag_type_char[variant.
index()];
1160 if (detail::sam_tag_type_char_extra[variant.
index()] !=
'\0')
1162 *stream_it = detail::sam_tag_type_char_extra[variant.
index()];
Adaptations of algorithms from the Ranges TS.
Provides seqan3::alignment_file_output_options.
Core alphabet concept and free function/type trait wrappers.
Provides seqan3::views::char_to.
The alphabet of a gap character '-'.
Definition: gap.hpp:37
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:326
Provides various transformation traits used by the range module.
Auxiliary for pretty printing of exception messages.
Provides type traits for working with templates.
Provides concepts for core language types and relations that don't have concepts in C++20 (yet).
Provides seqan3::detail::fast_ostreambuf_iterator.
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: misc.hpp:73
@ none
None of the flags below are set.
constexpr auto to_char
Return the char representation of an alphabet object.
Definition: concept.hpp:328
constexpr auto is_space
Checks whether c is a space character.
Definition: predicate.hpp:144
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:434
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:150
constexpr auto drop
A view adaptor that returns all elements after n from the underlying range (or an empty range if the ...
Definition: drop.hpp:170
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:141
constexpr auto take_until_or_throw
A view adaptor that returns elements from the underlying range until the functor evaluates to true (t...
Definition: take_until.hpp:614
constexpr auto istreambuf
A view factory that returns a view over the stream buffer of an input stream.
Definition: istreambuf.hpp:114
constexpr auto take_until
A view adaptor that returns elements from the underlying range until the functor evaluates to true (o...
Definition: take_until.hpp:600
constexpr auto take_until_or_throw_and_consume
A view adaptor that returns elements from the underlying range until the functor evaluates to true (t...
Definition: take_until.hpp:642
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:70
constexpr auto take_until_and_consume
A view adaptor that returns elements from the underlying range until the functor evaluates to true (o...
Definition: take_until.hpp:628
Provides seqan3::detail::ignore_output_iterator for writing to null stream.
The generic alphabet concept that covers most data types used in ranges.
Resolves to std::ranges::implicitly_convertible_to<type1, type2>(). <dl class="no-api">This entity i...
A more refined container concept than seqan3::container.
The generic concept for a (biological) sequence.
Whether a type behaves like a tuple.
Provides helper data structures for the seqan3::alignment_file_output.
Provides various utility functions.
Provides seqan3::views::istreambuf.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides various utility functions.
Adaptations of concepts from the Ranges TS.
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
Provides seqan3::sequence_file_output_options.
Provides seqan3::views::slice.
The options type defines various option members that influence the behavior of all or some formats.
Definition: output_options.hpp:23
bool sam_require_header
Whether to require a header for SAM files.
Definition: output_options.hpp:41
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition: output_options.hpp:27
Thrown if there is a parse error, such as reading an unexpected character from an input stream.
Definition: exception.hpp:48
The options type defines various option members that influence the behaviour of all or some formats.
Definition: output_options.hpp:22
Exposes the value_type of another type.
Definition: pre.hpp:58
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
Provides seqan3::views::to.
Provides seqan3::views::to_char.
Provides traits to inspect some information of a type, for example its name.
Provides character predicates for tokenisation.
Provides seqan3::tuple_like.