150 template <
typename stream_type,
151 typename seq_legal_alph_type,
bool seq_qual_combined,
159 qual_type & qualities);
161 template <
typename stream_type,
169 qual_type && qualities);
171 template <
typename stream_type,
172 typename seq_legal_alph_type,
173 typename ref_seqs_type,
174 typename ref_ids_type,
177 typename offset_type,
178 typename ref_seq_type,
179 typename ref_id_type,
180 typename ref_offset_type,
187 typename tag_dict_type,
188 typename e_value_type,
189 typename bit_score_type>
192 ref_seqs_type & ref_seqs,
197 offset_type & offset,
198 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
199 ref_id_type & ref_id,
200 ref_offset_type & ref_offset,
202 cigar_type & cigar_vector,
206 tag_dict_type & tag_dict,
207 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
208 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score));
210 template <
typename stream_type,
211 typename header_type,
214 typename ref_seq_type,
215 typename ref_id_type,
219 typename tag_dict_type,
220 typename e_value_type,
221 typename bit_score_type>
224 header_type && header,
228 int32_t
const offset,
229 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
230 ref_id_type && ref_id,
237 tag_dict_type && tag_dict,
238 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
239 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score));
261 template <
typename t>
264 return std::forward<t>(v);
267 using format_sam_base::read_field;
269 template <
typename stream_view_type,
typename value_type>
271 stream_view_type && stream_view,
274 template <
typename stream_view_type>
276 stream_view_type && stream_view);
278 template <
typename stream_view_type>
281 template <
typename stream_it_t, std::ranges::forward_range field_type>
284 template <
typename stream_it_t>
287 template <
typename stream_it_t>
292 template <
typename stream_type,
293 typename seq_legal_alph_type,
bool seq_qual_combined,
301 qual_type & qualities)
305 if constexpr (seq_qual_combined)
309 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
310 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
313 get<1>(*dit).assign_char(*sit);
318 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
319 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
322 if constexpr (!detail::decays_to_ignore_v<seq_type>)
323 if (std::ranges::distance(
sequence) == 0)
324 throw parse_error{
"The sequence information must not be empty."};
325 if constexpr (!detail::decays_to_ignore_v<id_type>)
326 if (std::ranges::distance(
id) == 0)
327 throw parse_error{
"The id information must not be empty."};
334 template <
typename stream_type,
342 qual_type && qualities)
370 template <
typename stream_type,
371 typename seq_legal_alph_type,
372 typename ref_seqs_type,
373 typename ref_ids_type,
376 typename offset_type,
377 typename ref_seq_type,
378 typename ref_id_type,
379 typename ref_offset_type,
386 typename tag_dict_type,
387 typename e_value_type,
388 typename bit_score_type>
391 ref_seqs_type & ref_seqs,
396 offset_type & offset,
397 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
398 ref_id_type & ref_id,
399 ref_offset_type & ref_offset,
401 cigar_type & cigar_vector,
405 tag_dict_type & tag_dict,
406 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
407 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
409 static_assert(detail::decays_to_ignore_v<ref_offset_type> ||
410 detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
411 "The ref_offset must be a specialisation of std::optional.");
417 int32_t ref_offset_tmp{};
418 std::ranges::range_value_t<decltype(header.
ref_ids())> ref_id_tmp{};
419 [[maybe_unused]] int32_t offset_tmp{};
420 [[maybe_unused]] int32_t soft_clipping_end{};
422 [[maybe_unused]] int32_t ref_length{0}, seq_length{0};
426 if (is_char<'@'>(*std::ranges::begin(stream_view)))
430 if (std::ranges::begin(stream_view) == std::ranges::end(stream_view))
438 uint16_t flag_integral{};
448 if (ref_offset_tmp == -1)
450 else if (ref_offset_tmp > -1)
452 else if (ref_offset_tmp < -1)
453 throw format_error{
"No negative values are allowed for field::ref_offset."};
459 if constexpr (!detail::decays_to_ignore_v<align_type> || !detail::decays_to_ignore_v<cigar_type>)
461 if (!is_char<'*'>(*std::ranges::begin(stream_view)))
469 std::ranges::next(std::ranges::begin(field_view));
481 if constexpr (!detail::decays_to_ignore_v<mate_type>)
483 std::ranges::range_value_t<decltype(header.
ref_ids())> tmp_mate_ref_id{};
486 if (tmp_mate_ref_id ==
"=")
488 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
502 get<1>(
mate) = --tmp_pnext;
503 else if (tmp_pnext < 0)
504 throw format_error{
"No negative values are allowed at the mate mapping position."};
511 for (
size_t i = 0; i < 3u; ++i)
519 if (!is_char<'*'>(*std::ranges::begin(stream_view)))
521 auto constexpr is_legal_alph = char_is_valid_for<seq_legal_alph_type>;
524 if (!is_legal_alph(c))
526 "char_is_valid_for<" +
527 detail::type_name_as_string<seq_legal_alph_type> +
528 "> evaluated to false on " +
533 if constexpr (detail::decays_to_ignore_v<seq_type>)
535 if constexpr (!detail::decays_to_ignore_v<align_type>)
538 "If you want to read ALIGNMENT but not SEQ, the alignment"
539 " object must store a sequence container at the second (query) position.");
541 if (!tmp_cigar_vector.empty())
544 auto tmp_iter = std::ranges::begin(seq_stream);
545 std::ranges::advance(tmp_iter, offset_tmp);
547 for (; seq_length > 0; --seq_length)
549 get<1>(align).push_back(std::ranges::range_value_t<decltype(get<1>(align))>{}.assign_char(*tmp_iter));
553 std::ranges::advance(tmp_iter, soft_clipping_end);
569 if constexpr (!detail::decays_to_ignore_v<align_type>)
571 if (!tmp_cigar_vector.empty())
573 assign_unaligned(get<1>(align),
582 std::ranges::next(std::ranges::begin(field_view));
587 auto const tab_or_end = is_char<'\t'> || is_char<'\r'> || is_char<'\n'>;
590 if constexpr (!detail::decays_to_ignore_v<seq_type> && !detail::decays_to_ignore_v<qual_type>)
592 if (std::ranges::distance(
seq) != 0 && std::ranges::distance(
qual) != 0 &&
593 std::ranges::distance(
seq) != std::ranges::distance(
qual))
596 ") and quality length (", std::ranges::distance(
qual),
597 ") must be the same.")};
603 while (is_char<'\t'>(*std::ranges::begin(stream_view)))
605 std::ranges::next(std::ranges::begin(stream_view));
615 if constexpr (!detail::decays_to_ignore_v<align_type>)
617 int32_t ref_idx{(ref_id_tmp.empty()) ? -1 : 0};
619 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
621 if (!ref_id_tmp.empty())
623 assert(header.
ref_dict.count(ref_id_tmp) != 0);
624 ref_idx = header.
ref_dict[ref_id_tmp];
628 construct_alignment(align, tmp_cigar_vector, ref_idx, ref_seqs, ref_offset_tmp, ref_length);
631 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
632 std::swap(cigar_vector, tmp_cigar_vector);
636 template <
typename stream_type,
637 typename header_type,
640 typename ref_seq_type,
641 typename ref_id_type,
645 typename tag_dict_type,
646 typename e_value_type,
647 typename bit_score_type>
650 header_type && header,
654 int32_t
const offset,
655 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
656 ref_id_type && ref_id,
663 tag_dict_type && tag_dict,
664 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
665 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score))
683 static_assert((std::ranges::forward_range<seq_type> &&
684 alphabet<std::ranges::range_reference_t<seq_type>>),
685 "The seq object must be a std::ranges::forward_range over "
686 "letters that model seqan3::alphabet.");
688 static_assert((std::ranges::forward_range<id_type> &&
689 alphabet<std::ranges::range_reference_t<id_type>>),
690 "The id object must be a std::ranges::forward_range over "
691 "letters that model seqan3::alphabet.");
693 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
695 static_assert((std::ranges::forward_range<ref_id_type> ||
698 "The ref_id object must be a std::ranges::forward_range "
699 "over letters that model seqan3::alphabet.");
703 static_assert(!detail::decays_to_ignore_v<header_type>,
704 "If you give indices as reference id information the header must also be present.");
708 "The align object must be a std::pair of two ranges whose "
709 "value_type is comparable to seqan3::gap");
712 std::equality_comparable_with<
gap, std::ranges::range_reference_t<decltype(std::get<0>(align))>> &&
713 std::equality_comparable_with<
gap, std::ranges::range_reference_t<decltype(std::get<1>(align))>>),
714 "The align object must be a std::pair of two ranges whose "
715 "value_type is comparable to seqan3::gap");
717 static_assert((std::ranges::forward_range<qual_type> &&
718 alphabet<std::ranges::range_reference_t<qual_type>>),
719 "The qual object must be a std::ranges::forward_range "
720 "over letters that model seqan3::alphabet.");
723 "The mate object must be a std::tuple of size 3 with "
724 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
725 "2) a std::integral or std::optional<std::integral>, and "
726 "3) a std::integral.");
728 static_assert(((std::ranges::forward_range<decltype(std::get<0>(
mate))> ||
734 "The mate object must be a std::tuple of size 3 with "
735 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
736 "2) a std::integral or std::optional<std::integral>, and "
737 "3) a std::integral.");
741 static_assert(!detail::decays_to_ignore_v<header_type>,
742 "If you give indices as mate reference id information the header must also be present.");
745 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
750 if constexpr (!detail::decays_to_ignore_v<header_type> &&
751 !detail::decays_to_ignore_v<ref_id_type> &&
760 if constexpr (std::ranges::contiguous_range<decltype(
ref_id)> &&
761 std::ranges::sized_range<decltype(
ref_id)> &&
762 std::ranges::borrowed_range<decltype(
ref_id)>)
771 "The ref_id type is not convertible to the reference id information stored in the "
772 "reference dictionary of the header object.");
784 throw format_error{
"The ref_offset object must be an std::integral >= 0."};
789 if constexpr (!detail::decays_to_ignore_v<header_type>)
803 constexpr
char separator{
'\t'};
806 *stream_it = separator;
808 stream_it.write_number(
static_cast<uint16_t
>(
flag));
809 *stream_it = separator;
811 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
834 *stream_it = separator;
837 stream_it.write_number(
ref_offset.value_or(-1) + 1);
838 *stream_it = separator;
840 stream_it.write_number(
static_cast<unsigned>(
mapq));
841 *stream_it = separator;
843 if (!std::ranges::empty(cigar_vector))
845 for (
auto & c : cigar_vector)
846 stream_it.write_range(c.to_string());
848 else if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align)))
855 for (
auto chr : get<1>(align))
870 *stream_it = separator;
878 if (get<0>(
mate).has_value())
890 *stream_it = separator;
895 stream_it.write_number(get<1>(
mate).value_or(-1) + 1);
896 *stream_it = separator;
900 stream_it.write_number(get<1>(
mate));
901 *stream_it = separator;
904 stream_it.write_number(get<2>(
mate));
905 *stream_it = separator;
908 *stream_it = separator;
935 template <
typename stream_view_type,
typename value_type>
937 stream_view_type && stream_view,
941 while (std::ranges::begin(stream_view) != ranges::end(stream_view))
946 if (is_char<','>(*std::ranges::begin(stream_view)))
947 std::ranges::next(std::ranges::begin(stream_view));
965 template <
typename stream_view_type>
967 stream_view_type && stream_view)
972 while (std::ranges::begin(stream_view) != ranges::end(stream_view))
980 throw format_error{
"Hexadecimal tag has an uneven number of digits!"};
1006 template <
typename stream_view_type>
1014 uint16_t tag =
static_cast<uint16_t
>(*std::ranges::begin(stream_view)) << 8;
1015 std::ranges::next(std::ranges::begin(stream_view));
1016 tag +=
static_cast<uint16_t
>(*std::ranges::begin(stream_view));
1017 std::ranges::next(std::ranges::begin(stream_view));
1018 std::ranges::next(std::ranges::begin(stream_view));
1019 char type_id = *std::ranges::begin(stream_view);
1020 std::ranges::next(std::ranges::begin(stream_view));
1021 std::ranges::next(std::ranges::begin(stream_view));
1027 target[tag] =
static_cast<char>(*std::ranges::begin(stream_view));
1028 std::ranges::next(std::ranges::begin(stream_view));
1047 target[tag] = stream_view | views::to<std::string>;
1057 char array_value_type_id = *std::ranges::begin(stream_view);
1058 std::ranges::next(std::ranges::begin(stream_view));
1059 std::ranges::next(std::ranges::begin(stream_view));
1061 switch (array_value_type_id)
1086 "id of a SAM tag must be one of [cCsSiIf] but '" + array_value_type_id +
1093 "SAM tag must be one of [A,i,Z,H,B,f] but '") + type_id +
"' was given."};
1104 template <
typename stream_it_t, std::ranges::forward_range field_type>
1107 if (std::ranges::empty(field_value))
1113 if constexpr (std::same_as<
std::remove_cvref_t<std::ranges::range_reference_t<field_type>>,
char>)
1114 stream_it.write_range(field_value);
1126 template <
typename stream_it_t>
1139 template <
typename stream_it_t>
1142 auto const stream_variant_fn = [&stream_it] (
auto && arg)
1146 if constexpr (std::ranges::input_range<T>)
1150 stream_it.write_range(arg);
1154 if (!std::ranges::empty(arg))
1156 stream_it.write_number(std::to_integer<uint8_t>(*std::ranges::begin(arg)));
1161 stream_it.write_number(std::to_integer<uint8_t>(elem));
1167 if (!std::ranges::empty(arg))
1169 stream_it.write_number(*std::ranges::begin(arg));
1174 stream_it.write_number(elem);
1185 stream_it.write_number(arg);
1189 for (
auto & [tag, variant] : tag_dict)
1191 *stream_it = separator;
1193 char const char0 = tag / 256;
1194 char const char1 = tag % 256;
Adaptations of algorithms from the Ranges TS.
Core alphabet concept and free function/type trait wrappers.
Provides seqan3::views::char_to.
Provides seqan3::views::to_char.
Functionally the same as std::ostreambuf_iterator, but offers writing a range more efficiently.
Definition: fast_ostreambuf_iterator.hpp:39
The alphabet of a gap character '-'.
Definition: gap.hpp:39
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:332
Provides various transformation traits used by the range module.
Auxiliary for pretty printing of exception messages.
Provides type traits for working with templates.
Provides concepts for core language types and relations that don't have concepts in C++20 (yet).
Provides seqan3::detail::fast_ostreambuf_iterator.
std::string make_printable(char const c)
Returns a printable value for the given character c.
Definition: pretty_print.hpp:48
constexpr auto is_space
Checks whether c is a space character.
Definition: predicate.hpp:144
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: sam_flag.hpp:73
std::string get_cigar_string(std::vector< cigar > const &cigar_vector)
Transforms a vector of cigar elements into a string representation.
Definition: cigar.hpp:263
@ none
None of the flags below are set.
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr void consume(rng_t &&rng)
Iterate over a range (consumes single-pass input ranges).
Definition: misc.hpp:28
typename decltype(detail::split_after< i >(list_t{}))::second_type drop
Return a seqan3::type_list of the types in the input type list, except the first n.
Definition: traits.hpp:351
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:434
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:150
auto const to_char
A view that calls seqan3::to_char() on each element in the input range.
Definition: to_char.hpp:68
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:191
constexpr auto take_until_or_throw
A view adaptor that returns elements from the underlying range until the functor evaluates to true (t...
Definition: take_until.hpp:614
constexpr auto istreambuf
A view factory that returns a view over the stream buffer of an input stream.
Definition: istreambuf.hpp:114
constexpr auto take_until
A view adaptor that returns elements from the underlying range until the functor evaluates to true (o...
Definition: take_until.hpp:600
constexpr auto take_exactly_or_throw
A view adaptor that returns the first size elements from the underlying range and also exposes size i...
Definition: take_exactly.hpp:91
constexpr auto take_until_or_throw_and_consume
A view adaptor that returns elements from the underlying range until the functor evaluates to true (t...
Definition: take_until.hpp:642
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:70
constexpr auto take_until_and_consume
A view adaptor that returns elements from the underlying range until the functor evaluates to true (o...
Definition: take_until.hpp:628
Provides seqan3::detail::ignore_output_iterator for writing to null stream.
The generic alphabet concept that covers most data types used in ranges.
Resolves to std::ranges::implicitly_convertible_to<type1, type2>(). <dl class="no-api">This entity i...
A more refined container concept than seqan3::container.
The generic concept for a (biological) sequence.
Whether a type behaves like a tuple.
Provides various utility functions.
Auxiliary functions for the alignment IO.
Provides seqan3::views::istreambuf.
std::tuple< std::vector< cigar >, int32_t, int32_t > parse_cigar(cigar_input_type &&cigar_input)
Parses a cigar string into a vector of operation-count pairs (e.g. (M, 3)).
Definition: cigar.hpp:134
constexpr char sam_tag_type_char_extra[12]
Each types SAM tag type extra char id. Index corresponds to the seqan3::detail::sam_tag_variant types...
Definition: sam_tag_dictionary.hpp:38
constexpr char sam_tag_type_char[12]
Each SAM tag type char identifier. Index corresponds to the seqan3::detail::sam_tag_variant types.
Definition: sam_tag_dictionary.hpp:36
std::string to_string(value_type &&...values)
Streams all parameters via the seqan3::debug_stream and returns a concatenated string.
Definition: to_string.hpp:29
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides various utility functions.
Adaptations of concepts from the Ranges TS.
Provides seqan3::sam_file_output_options.
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
Provides helper data structures for the seqan3::sam_file_output.
Provides seqan3::sequence_file_output_options.
Provides seqan3::views::slice.
Thrown if there is a parse error, such as reading an unexpected character from an input stream.
Definition: exception.hpp:48
The options type defines various option members that influence the behavior of all or some formats.
Definition: output_options.hpp:23
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition: output_options.hpp:27
bool sam_require_header
Whether to require a header for SAM files.
Definition: output_options.hpp:41
The options type defines various option members that influence the behaviour of all or some formats.
Definition: output_options.hpp:22
Exposes the value_type of another type.
Definition: pre.hpp:58
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
Provides seqan3::views::to.
Provides traits to inspect some information of a type, for example its name.
Provides character predicates for tokenisation.
Provides seqan3::tuple_like.