-
Notifications
You must be signed in to change notification settings - Fork 136
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update GFA reader to support GFA v1.2 paths
- Loading branch information
Showing
8 changed files
with
462 additions
and
192 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
//*************************************************************************** | ||
//* Copyright (c) 2023-2024 SPAdes team | ||
//* All Rights Reserved | ||
//* See file LICENSE for details. | ||
//*************************************************************************** | ||
|
||
#include <lexy/action/parse.hpp> // lexy::parse | ||
#include <lexy/input/string_input.hpp> | ||
#include <lexy_ext/report_error.hpp> | ||
|
||
#include "cigar.hpp" | ||
|
||
#include "cigar.inl" | ||
|
||
namespace cigar { | ||
std::ostream &operator<<(std::ostream &s, const tag &t) { | ||
s << t.name[0] << t.name[1] << ':'; | ||
return std::visit([&](const auto& value) -> std::ostream& { return s << value; }, t.val); | ||
} | ||
|
||
std::optional<tag> parseTag(const char* line, size_t len) { | ||
lexy::visualization_options opts; | ||
opts.max_lexeme_width = 35; | ||
|
||
auto result = lexy::parse<grammar::tag>(lexy::string_input(line, len), lexy_ext::report_error.opts(opts)); | ||
if (result.has_value()) | ||
return std::make_optional(result.value()); | ||
|
||
return {}; | ||
} | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
//*************************************************************************** | ||
//* Copyright (c) 2023-2024 SPAdes team | ||
//* All Rights Reserved | ||
//* See file LICENSE for details. | ||
//*************************************************************************** | ||
|
||
#pragma once | ||
|
||
#include <string> | ||
#include <variant> | ||
#include <string_view> | ||
#include <optional> | ||
#include <vector> | ||
#include <algorithm> | ||
#include <ostream> | ||
|
||
#include <cstdint> | ||
#include <cstdlib> | ||
#include <cinttypes> | ||
#include <cstdio> | ||
|
||
namespace cigar { | ||
struct tag { | ||
char name[2]; | ||
char type; | ||
std::variant<int64_t, std::string, float> val; | ||
|
||
template<typename T> | ||
tag(std::string_view n, std::string_view t, T v) | ||
: name{n[0], n[1]}, type(t.front()), val(std::move(v)) {} | ||
|
||
friend std::ostream &operator<<(std::ostream &s, const tag &t); | ||
|
||
void print() const { | ||
fprintf(stdout, "%c%c", name[0], name[1]); | ||
fputs(":", stdout); | ||
std::visit([&](const auto& value) { _print(value); }, val); | ||
} | ||
|
||
private: | ||
void _print(int64_t i) const { | ||
std::fprintf(stdout, "%c:%" PRId64, type, i); | ||
} | ||
|
||
void _print(const std::string &str) const { | ||
std::fprintf(stdout, "%c:%s", type, str.c_str()); | ||
} | ||
|
||
void _print(float f) const { | ||
std::fprintf(stdout, "%c:%g", type, f); | ||
} | ||
}; | ||
|
||
struct cigarop { | ||
uint32_t count : 24; | ||
char op : 8; | ||
|
||
void print() const { | ||
std::fprintf(stdout, "%u%c", count, op); | ||
} | ||
}; | ||
|
||
using cigar_string = std::vector<cigarop>; | ||
|
||
static inline std::optional<tag> | ||
getTag(const char *name, | ||
const std::vector<tag> &tags) { | ||
auto res = std::find_if(tags.begin(), tags.end(), | ||
[=](const tag &tag) { | ||
return (tag.name[0] == name[0] && | ||
tag.name[1] == name[1]); | ||
}); | ||
if (res == tags.end()) | ||
return {}; | ||
|
||
return *res; | ||
} | ||
|
||
template<class T> | ||
std::optional<T> getTag(const char *name, | ||
const std::vector<tag> &tags) { | ||
auto res = std::find_if(tags.begin(), tags.end(), | ||
[=](const tag &tag) { | ||
return (tag.name[0] == name[0] && | ||
tag.name[1] == name[1]); | ||
}); | ||
if (res == tags.end()) | ||
return {}; | ||
|
||
if (!std::holds_alternative<T>(res->val)) | ||
return {}; | ||
|
||
return std::get<T>(res->val); | ||
} | ||
|
||
std::optional<tag> parseTag(const char* line, size_t len); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
//*************************************************************************** | ||
//* Copyright (c) 2023-2024 SPAdes team | ||
//* All Rights Reserved | ||
//* See file LICENSE for details. | ||
//*************************************************************************** | ||
|
||
#include <lexy/dsl.hpp> // lexy::dsl::* | ||
#include <lexy/callback.hpp> // lexy callbacks | ||
#include <lexy/grammar.hpp> | ||
|
||
#include <string> | ||
|
||
namespace cigar::grammar { | ||
namespace dsl = lexy::dsl; | ||
|
||
struct tag { | ||
struct tag_character : lexy::token_production { | ||
static constexpr auto rule = dsl::capture(dsl::ascii::alpha_digit); | ||
static constexpr auto value = lexy::as_string<std::string>; | ||
}; | ||
|
||
struct tag_integer : lexy::token_production { | ||
static constexpr auto rule = | ||
dsl::minus_sign + dsl::integer<std::int64_t>(dsl::digits<>.no_leading_zero()); | ||
static constexpr auto value = lexy::as_integer<std::int64_t>; | ||
}; | ||
|
||
struct tag_string : lexy::token_production { | ||
static constexpr auto rule = dsl::identifier(dsl::ascii::print); | ||
static constexpr auto value = lexy::as_string<std::string>; | ||
}; | ||
|
||
struct tag_float : lexy::token_production { | ||
static constexpr auto rule = [] { | ||
auto integer = dsl::if_(dsl::lit_c < '-' > ) + dsl::digits<>.no_leading_zero(); | ||
auto fraction = dsl::lit_c < '.' > >> dsl::digits<>; | ||
auto exp_char = dsl::lit_c < 'e' > | dsl::lit_c<'E'>; | ||
auto exponent = exp_char >> (dsl::lit_c < '+' > | dsl::lit_c < '-' > ) + dsl::digits<>; | ||
return dsl::peek(dsl::lit_c < '-' > / dsl::digit<>) >> | ||
dsl::position + | ||
integer + | ||
dsl::if_(fraction) + | ||
dsl::if_(exponent) + | ||
dsl::position; | ||
}(); | ||
|
||
static constexpr auto value = lexy::callback<float>( | ||
// std::from_chars(const char*, const char*, float) is only | ||
// available starting from libc++ from LLVM 14 :( | ||
[](const char *first, const char *) { return ::atof(first); } | ||
); | ||
}; | ||
|
||
struct tag_name : lexy::token_production { | ||
static constexpr auto name = "tag name"; | ||
|
||
static constexpr auto rule = dsl::capture(dsl::token(dsl::ascii::alpha + dsl::ascii::alpha_digit)); | ||
static constexpr auto value = lexy::as_string<std::string_view>; | ||
}; | ||
|
||
struct invalid_tag_type { | ||
static constexpr auto name = "invalid tag type"; | ||
}; | ||
|
||
static constexpr auto rule = [] { | ||
auto colon = dsl::lit_c<':'>; | ||
return dsl::p<tag_name> >> colon + | ||
( | ||
dsl::capture(LEXY_LIT("A")) >> colon + dsl::p < tag_character > | | ||
dsl::capture(LEXY_LIT("i")) >> colon + dsl::p < tag_integer > | | ||
dsl::capture(LEXY_LIT("f")) >> colon + dsl::p < tag_float > | | ||
dsl::capture(LEXY_LIT("Z")) >> colon + dsl::p < tag_string > | | ||
dsl::capture(LEXY_LIT("J")) >> colon + dsl::p < tag_string > | | ||
dsl::capture(LEXY_LIT("H")) >> colon + dsl::p < tag_string > | | ||
dsl::capture(LEXY_LIT("B")) >> colon + dsl::p < tag_string > | | ||
dsl::error<invalid_tag_type> | ||
); | ||
}(); | ||
|
||
static constexpr auto value = lexy::callback<cigar::tag>( | ||
[](std::string_view name, auto type, auto val) { | ||
return cigar::tag{name, std::string_view{type.data(), type.size()}, val}; | ||
}); | ||
}; | ||
|
||
struct cigar_string { | ||
static constexpr auto name = "CIGAR string"; | ||
|
||
static constexpr auto cigaropcode = | ||
LEXY_CHAR_CLASS("CIGAR opcode", | ||
LEXY_LIT("M") / LEXY_LIT("I") / LEXY_LIT("D") / | ||
LEXY_LIT("N") / LEXY_LIT("S") / LEXY_LIT("H") / | ||
LEXY_LIT("P") / LEXY_LIT("X") / LEXY_LIT("=")) / LEXY_LIT("J"); | ||
|
||
struct cigarop : lexy::transparent_production { | ||
static constexpr auto name = "CIGAR operation"; | ||
|
||
static constexpr auto rule = | ||
dsl::period | | ||
dsl::integer<std::uint32_t> >> dsl::capture(cigaropcode); | ||
static constexpr auto value = lexy::callback<cigar::cigarop>( | ||
[]() { return cigar::cigarop{0, 0}; }, | ||
[](std::uint32_t cnt, auto lexeme) { | ||
return cigar::cigarop{cnt, lexeme[0]}; | ||
}); | ||
}; | ||
|
||
static constexpr auto rule = dsl::list(dsl::p<cigarop>); | ||
static constexpr auto value = lexy::as_list<std::vector<cigar::cigarop>>; | ||
}; | ||
|
||
static constexpr auto tab = dsl::lit_c<'\t'>; | ||
|
||
struct opt_tags { | ||
static constexpr auto name = "tags"; | ||
|
||
static constexpr auto rule = [] { | ||
auto tags = dsl::list(dsl::p<tag>, dsl::trailing_sep(tab)); | ||
return dsl::eof | (tab >> tags + dsl::eof); | ||
}(); | ||
static constexpr auto value = lexy::as_list<std::vector<cigar::tag>>; | ||
}; | ||
} |
Oops, something went wrong.