Skip to content

Commit

Permalink
fix: add FastaSequenceDataType import (#151)
Browse files Browse the repository at this point in the history
* fix: add FastaSequenceDataType import
* test: add test, cleanup naming

---------

Co-authored-by: Trent Hauck <trent@trenthauck.com>
  • Loading branch information
ghuls and tshauck authored Jun 20, 2024
1 parent d367f2e commit 25d05de
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 11 deletions.
2 changes: 2 additions & 0 deletions python/biobear/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from biobear.compression import Compression

from .biobear import FileCompressionType
from .biobear import FastaSequenceDataType
from .biobear import FASTQReadOptions
from .biobear import FASTAReadOptions
from .biobear import VCFReadOptions
Expand Down Expand Up @@ -65,6 +66,7 @@
"compression",
"Compression",
"FileCompressionType",
"FastaSequenceDataType",
"FASTQReadOptions",
"FASTAReadOptions",
"BCFReadOptions",
Expand Down
17 changes: 16 additions & 1 deletion python/tests/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,18 @@
from pathlib import Path
import importlib
import tempfile
from biobear.biobear import BEDReadOptions
import polars as pl

import pytest

from biobear import (
BAMReadOptions,
connect,
FastaSequenceDataType,
FASTQReadOptions,
FASTAReadOptions,
FileCompressionType,
BEDReadOptions,
BCFReadOptions,
GFFReadOptions,
VCFReadOptions,
Expand Down Expand Up @@ -192,6 +193,20 @@ def test_fasta_sequence_type():
assert df.get_column("sequence").dtype == pl.List(pl.Int8)


def test_fasta_sequence_type_with_options():
"""Test reading a fasta file."""
session = connect()

df = session.read_fasta_file(
str(DATA / "test.fasta"),
options=FASTAReadOptions(
fasta_sequence_data_type=FastaSequenceDataType.INTEGER_ENCODE_DNA
),
).to_polars()

assert df.get_column("sequence").dtype == pl.List(pl.Int8)


@pytest.mark.skipif(
not importlib.util.find_spec("polars"), reason="polars not installed"
)
Expand Down
23 changes: 13 additions & 10 deletions src/datasources/fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,22 @@ const DEFAULT_FASTA_FILE_EXTENSION: &str = "fasta";
#[derive(Debug, Clone)]
#[pyclass]
pub enum FastaSequenceDataType {
Utf8,
LargeUtf8,
IntegerEncodeDNA,
IntegerEncodeProtein,
UTF8,
#[allow(non_camel_case_types)]
LARGE_UTF8,
#[allow(non_camel_case_types)]
INTEGER_ENCODE_DNA,
#[allow(non_camel_case_types)]
INTEGER_ENCODE_PROTEIN,
}

impl From<FastaSequenceDataType> for SequenceDataType {
fn from(data_type: FastaSequenceDataType) -> Self {
match data_type {
FastaSequenceDataType::Utf8 => SequenceDataType::Utf8,
FastaSequenceDataType::LargeUtf8 => SequenceDataType::LargeUtf8,
FastaSequenceDataType::IntegerEncodeDNA => SequenceDataType::IntegerEncodeDNA,
FastaSequenceDataType::IntegerEncodeProtein => SequenceDataType::IntegerEncodeProtein,
FastaSequenceDataType::UTF8 => SequenceDataType::Utf8,
FastaSequenceDataType::LARGE_UTF8 => SequenceDataType::LargeUtf8,
FastaSequenceDataType::INTEGER_ENCODE_DNA => SequenceDataType::IntegerEncodeDNA,
FastaSequenceDataType::INTEGER_ENCODE_PROTEIN => SequenceDataType::IntegerEncodeProtein,
}
}
}
Expand Down Expand Up @@ -75,7 +78,7 @@ impl Default for FASTAReadOptions {
Self {
file_extension: String::from(DEFAULT_FASTA_FILE_EXTENSION),
file_compression_type: FileCompressionType::UNCOMPRESSED,
fasta_sequence_data_type: FastaSequenceDataType::Utf8,
fasta_sequence_data_type: FastaSequenceDataType::UTF8,
}
}
}
Expand Down Expand Up @@ -107,7 +110,7 @@ impl FASTAReadOptions {
file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED);

let fasta_sequence_data_type =
fasta_sequence_data_type.unwrap_or(FastaSequenceDataType::Utf8);
fasta_sequence_data_type.unwrap_or(FastaSequenceDataType::UTF8);

Ok(Self {
file_compression_type,
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ fn biobear(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<vcf_reader::VCFIndexedReader>()?;
m.add_class::<bcf_reader::BCFIndexedReader>()?;
m.add_class::<file_compression_type::FileCompressionType>()?;
m.add_class::<datasources::fasta::FastaSequenceDataType>()?;
m.add_class::<datasources::fastq::FASTQReadOptions>()?;
m.add_class::<datasources::fasta::FASTAReadOptions>()?;
m.add_class::<datasources::bcf::BCFReadOptions>()?;
Expand Down

0 comments on commit 25d05de

Please sign in to comment.