Skip to content

Commit

Permalink
Update GFA reader to support GFA v1.2 paths
Browse files Browse the repository at this point in the history
  • Loading branch information
asl committed Nov 1, 2024
1 parent 84d47d2 commit dcba88e
Show file tree
Hide file tree
Showing 8 changed files with 462 additions and 192 deletions.
2 changes: 1 addition & 1 deletion src/common/io/graph/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ project(graphio CXX)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})

add_library(graphio STATIC
gfa.cpp gfa_reader.cpp gfa_writer.cpp
gfa.cpp cigar.cpp gfa_reader.cpp gfa_writer.cpp
fastg_writer.cpp)
target_link_libraries(graphio foonathan::lexy zlibstatic)
33 changes: 33 additions & 0 deletions src/common/io/graph/cigar.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//***************************************************************************
//* Copyright (c) 2023-2024 SPAdes team
//* All Rights Reserved
//* See file LICENSE for details.
//***************************************************************************

#include <lexy/action/parse.hpp> // lexy::parse
#include <lexy/input/string_input.hpp>
#include <lexy_ext/report_error.hpp>

#include "cigar.hpp"

#include "cigar.inl"

namespace cigar {
std::ostream &operator<<(std::ostream &s, const tag &t) {
s << t.name[0] << t.name[1] << ':';
return std::visit([&](const auto& value) -> std::ostream& { return s << value; }, t.val);
}

std::optional<tag> parseTag(const char* line, size_t len) {
lexy::visualization_options opts;
opts.max_lexeme_width = 35;

auto result = lexy::parse<grammar::tag>(lexy::string_input(line, len), lexy_ext::report_error.opts(opts));
if (result.has_value())
return std::make_optional(result.value());

return {};
}


}
97 changes: 97 additions & 0 deletions src/common/io/graph/cigar.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
//***************************************************************************
//* Copyright (c) 2023-2024 SPAdes team
//* All Rights Reserved
//* See file LICENSE for details.
//***************************************************************************

#pragma once

#include <string>
#include <variant>
#include <string_view>
#include <optional>
#include <vector>
#include <algorithm>
#include <ostream>

#include <cstdint>
#include <cstdlib>
#include <cinttypes>
#include <cstdio>

namespace cigar {
struct tag {
char name[2];
char type;
std::variant<int64_t, std::string, float> val;

template<typename T>
tag(std::string_view n, std::string_view t, T v)
: name{n[0], n[1]}, type(t.front()), val(std::move(v)) {}

friend std::ostream &operator<<(std::ostream &s, const tag &t);

void print() const {
fprintf(stdout, "%c%c", name[0], name[1]);
fputs(":", stdout);
std::visit([&](const auto& value) { _print(value); }, val);
}

private:
void _print(int64_t i) const {
std::fprintf(stdout, "%c:%" PRId64, type, i);
}

void _print(const std::string &str) const {
std::fprintf(stdout, "%c:%s", type, str.c_str());
}

void _print(float f) const {
std::fprintf(stdout, "%c:%g", type, f);
}
};

struct cigarop {
uint32_t count : 24;
char op : 8;

void print() const {
std::fprintf(stdout, "%u%c", count, op);
}
};

using cigar_string = std::vector<cigarop>;

static inline std::optional<tag>
getTag(const char *name,
const std::vector<tag> &tags) {
auto res = std::find_if(tags.begin(), tags.end(),
[=](const tag &tag) {
return (tag.name[0] == name[0] &&
tag.name[1] == name[1]);
});
if (res == tags.end())
return {};

return *res;
}

template<class T>
std::optional<T> getTag(const char *name,
const std::vector<tag> &tags) {
auto res = std::find_if(tags.begin(), tags.end(),
[=](const tag &tag) {
return (tag.name[0] == name[0] &&
tag.name[1] == name[1]);
});
if (res == tags.end())
return {};

if (!std::holds_alternative<T>(res->val))
return {};

return std::get<T>(res->val);
}

std::optional<tag> parseTag(const char* line, size_t len);
}
123 changes: 123 additions & 0 deletions src/common/io/graph/cigar.inl
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
//***************************************************************************
//* Copyright (c) 2023-2024 SPAdes team
//* All Rights Reserved
//* See file LICENSE for details.
//***************************************************************************

#include <lexy/dsl.hpp> // lexy::dsl::*
#include <lexy/callback.hpp> // lexy callbacks
#include <lexy/grammar.hpp>

#include <string>

namespace cigar::grammar {
namespace dsl = lexy::dsl;

struct tag {
struct tag_character : lexy::token_production {
static constexpr auto rule = dsl::capture(dsl::ascii::alpha_digit);
static constexpr auto value = lexy::as_string<std::string>;
};

struct tag_integer : lexy::token_production {
static constexpr auto rule =
dsl::minus_sign + dsl::integer<std::int64_t>(dsl::digits<>.no_leading_zero());
static constexpr auto value = lexy::as_integer<std::int64_t>;
};

struct tag_string : lexy::token_production {
static constexpr auto rule = dsl::identifier(dsl::ascii::print);
static constexpr auto value = lexy::as_string<std::string>;
};

struct tag_float : lexy::token_production {
static constexpr auto rule = [] {
auto integer = dsl::if_(dsl::lit_c < '-' > ) + dsl::digits<>.no_leading_zero();
auto fraction = dsl::lit_c < '.' > >> dsl::digits<>;
auto exp_char = dsl::lit_c < 'e' > | dsl::lit_c<'E'>;
auto exponent = exp_char >> (dsl::lit_c < '+' > | dsl::lit_c < '-' > ) + dsl::digits<>;
return dsl::peek(dsl::lit_c < '-' > / dsl::digit<>) >>
dsl::position +
integer +
dsl::if_(fraction) +
dsl::if_(exponent) +
dsl::position;
}();

static constexpr auto value = lexy::callback<float>(
// std::from_chars(const char*, const char*, float) is only
// available starting from libc++ from LLVM 14 :(
[](const char *first, const char *) { return ::atof(first); }
);
};

struct tag_name : lexy::token_production {
static constexpr auto name = "tag name";

static constexpr auto rule = dsl::capture(dsl::token(dsl::ascii::alpha + dsl::ascii::alpha_digit));
static constexpr auto value = lexy::as_string<std::string_view>;
};

struct invalid_tag_type {
static constexpr auto name = "invalid tag type";
};

static constexpr auto rule = [] {
auto colon = dsl::lit_c<':'>;
return dsl::p<tag_name> >> colon +
(
dsl::capture(LEXY_LIT("A")) >> colon + dsl::p < tag_character > |
dsl::capture(LEXY_LIT("i")) >> colon + dsl::p < tag_integer > |
dsl::capture(LEXY_LIT("f")) >> colon + dsl::p < tag_float > |
dsl::capture(LEXY_LIT("Z")) >> colon + dsl::p < tag_string > |
dsl::capture(LEXY_LIT("J")) >> colon + dsl::p < tag_string > |
dsl::capture(LEXY_LIT("H")) >> colon + dsl::p < tag_string > |
dsl::capture(LEXY_LIT("B")) >> colon + dsl::p < tag_string > |
dsl::error<invalid_tag_type>
);
}();

static constexpr auto value = lexy::callback<cigar::tag>(
[](std::string_view name, auto type, auto val) {
return cigar::tag{name, std::string_view{type.data(), type.size()}, val};
});
};

struct cigar_string {
static constexpr auto name = "CIGAR string";

static constexpr auto cigaropcode =
LEXY_CHAR_CLASS("CIGAR opcode",
LEXY_LIT("M") / LEXY_LIT("I") / LEXY_LIT("D") /
LEXY_LIT("N") / LEXY_LIT("S") / LEXY_LIT("H") /
LEXY_LIT("P") / LEXY_LIT("X") / LEXY_LIT("=")) / LEXY_LIT("J");

struct cigarop : lexy::transparent_production {
static constexpr auto name = "CIGAR operation";

static constexpr auto rule =
dsl::period |
dsl::integer<std::uint32_t> >> dsl::capture(cigaropcode);
static constexpr auto value = lexy::callback<cigar::cigarop>(
[]() { return cigar::cigarop{0, 0}; },
[](std::uint32_t cnt, auto lexeme) {
return cigar::cigarop{cnt, lexeme[0]};
});
};

static constexpr auto rule = dsl::list(dsl::p<cigarop>);
static constexpr auto value = lexy::as_list<std::vector<cigar::cigarop>>;
};

static constexpr auto tab = dsl::lit_c<'\t'>;

struct opt_tags {
static constexpr auto name = "tags";

static constexpr auto rule = [] {
auto tags = dsl::list(dsl::p<tag>, dsl::trailing_sep(tab));
return dsl::eof | (tab >> tags + dsl::eof);
}();
static constexpr auto value = lexy::as_list<std::vector<cigar::tag>>;
};
}
Loading

0 comments on commit dcba88e

Please sign in to comment.