Skip to content

Commit

Permalink
Add error handling; fix performance; fix cid decoding (#4)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarshalX authored Feb 24, 2024
1 parent a99bc9b commit 37a1544
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 160 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "libipld"
version = "1.1.0"
version = "1.1.1"
edition = "2021"
license = "MIT"
description = "Python binding to the Rust IPLD library"
Expand All @@ -13,7 +13,7 @@ name = "libipld"
crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.19", features = ["generate-import-lib", "anyhow"] }
pyo3 = { version = "0.20", features = ["generate-import-lib", "anyhow"] }
python3-dll-a = "0.2.7"
anyhow = "1.0.75"
futures = "0.3"
Expand Down
286 changes: 128 additions & 158 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,217 +1,187 @@
use std::borrow::Cow;
use std::collections::{HashMap};
use std::io::{BufReader, Cursor, Read, Seek};
use pyo3::prelude::*;
use pyo3::conversion::ToPyObject;
use pyo3::{PyObject, Python};
use pyo3::types::{PyBytes};

use ::libipld::{cid::Cid, Ipld};
use ::libipld::cbor::{cbor::MajorKind, DagCborCodec, decode};
use ::libipld::prelude::Codec;
use anyhow::Result;
use iroh_car::{CarHeader, CarReader};
use futures::{executor, stream::StreamExt};
use ::libipld::cbor::cbor::MajorKind;
use ::libipld::cbor::decode;
use ::libipld::{cid::Cid, Ipld};


#[derive(Clone, PartialEq)]
pub enum HashMapItem {
Null,
Bool(bool),
Integer(i128),
Float(f64),
String(String),
List(Vec<HashMapItem>),
Map(HashMap<String, HashMapItem>),
Bytes(Cow<'static, [u8]>),
}

impl HashMapItem {
fn value(&self) -> PyObject {
Python::with_gil(|py| match self {
Self::Null => py.None(),
Self::Bool(b) => b.to_object(py),
Self::String(s) => s.to_object(py),
Self::Integer(i) => i.to_object(py),
Self::Float(f) => f.to_object(py),
Self::List(l) => l.to_object(py),
Self::Map(m) => m.to_object(py),
Self::Bytes(b) => b.to_object(py),
})
use iroh_car::{CarHeader, CarReader, Error};
use pyo3::{PyObject, Python};
use pyo3::conversion::ToPyObject;
use pyo3::prelude::*;
use pyo3::types::{PyBytes, PyDict, PyList};

fn ipld_to_pyobject(py: Python<'_>, ipld: &Ipld) -> PyObject {
match ipld {
Ipld::Null => py.None(),
Ipld::Bool(b) => b.to_object(py),
Ipld::Integer(i) => i.to_object(py),
Ipld::Float(f) => f.to_object(py),
Ipld::String(s) => s.to_object(py),
Ipld::Bytes(b) => PyBytes::new(py, b).into(),
Ipld::Link(cid) => cid.to_string().to_object(py),
Ipld::List(l) => {
let list_obj = PyList::empty(py);
l.iter().for_each(|item| {
let item_obj = ipld_to_pyobject(py, item);
list_obj.append(item_obj).unwrap();
});
list_obj.into()
}
Ipld::Map(m) => {
let dict_obj = PyDict::new(py);
m.iter().for_each(|(key, value)| {
let key_obj = key.to_object(py);
let value_obj = ipld_to_pyobject(py, value);
dict_obj.set_item(key_obj, value_obj).unwrap();
});
dict_obj.into()
}
}
}

impl ToPyObject for HashMapItem {
fn to_object(&self, _: Python<'_>) -> PyObject {
self.value().into()
}
}
fn car_header_to_pydict<'py>(py: Python<'py>, header: &CarHeader) -> &'py PyDict {
let dict_obj = PyDict::new(py);

impl IntoPy<Py<PyAny>> for HashMapItem {
fn into_py(self, _: Python<'_>) -> Py<PyAny> {
self.value().into()
}
}
dict_obj.set_item("version", header.version()).unwrap();

let roots = PyList::empty(py);
header.roots().iter().for_each(|cid| {
let cid_obj = cid.to_string().to_object(py);
roots.append(cid_obj).unwrap();
});

fn ipld_to_hashmap(x: Ipld) -> HashMapItem {
match x {
Ipld::Null => HashMapItem::Null,
Ipld::Bool(b) => HashMapItem::Bool(b),
Ipld::Integer(i) => HashMapItem::Integer(i),
Ipld::Float(f) => HashMapItem::Float(f),
Ipld::String(s) => HashMapItem::String(s),
Ipld::Bytes(b) => HashMapItem::Bytes(Cow::Owned(b)),
Ipld::List(l) => HashMapItem::List(l.into_iter().map(ipld_to_hashmap).collect()),
Ipld::Map(m) => HashMapItem::Map(
m.into_iter()
.map(|(k, v)| (k, ipld_to_hashmap(v)))
.collect(),
),
Ipld::Link(cid) => HashMapItem::String(cid.to_string()),
}
}
dict_obj.set_item("roots", roots).unwrap();

fn car_header_to_hashmap(header: &CarHeader) -> HashMapItem {
HashMapItem::Map(
vec![
("version".to_string(), HashMapItem::Integer(header.version() as i128)),
(
"roots".to_string(),
HashMapItem::List(
header
.roots()
.iter()
.map(|cid| HashMapItem::String(cid.to_string()))
.collect(),
),
),
]
.into_iter()
.collect(),
)
dict_obj.into()
}

fn _cid_hash_to_hashmap(cid: &Cid) -> HashMapItem {
fn cid_hash_to_pydict<'py>(py: Python<'py>, cid: &Cid) -> &'py PyDict {
let hash = cid.hash();
HashMapItem::Map(
vec![
("code".to_string(), HashMapItem::Integer(hash.code() as i128)),
("size".to_string(), HashMapItem::Integer(hash.size() as i128)),
("digest".to_string(), HashMapItem::Bytes(Cow::Owned(hash.digest().to_vec()))),
]
.into_iter()
.collect(),
)
let dict_obj = PyDict::new(py);

dict_obj.set_item("code", hash.code()).unwrap();
dict_obj.set_item("size", hash.size()).unwrap();
dict_obj.set_item("digest", PyBytes::new(py, &hash.digest())).unwrap();

dict_obj.into()
}

fn cid_to_hashmap(cid: &Cid) -> HashMapItem {
HashMapItem::Map(
vec![
("version".to_string(), HashMapItem::Integer(cid.version() as i128)),
("codec".to_string(), HashMapItem::Integer(cid.codec() as i128)),
("hash".to_string(), _cid_hash_to_hashmap(cid)),
]
.into_iter()
.collect(),
)
fn cid_to_pydict<'py>(py: Python<'py>, cid: &Cid) -> &'py PyDict {
let dict_obj = PyDict::new(py);

dict_obj.set_item("version", cid.version() as u64).unwrap();
dict_obj.set_item("codec", cid.codec()).unwrap();
dict_obj.set_item("hash", cid_hash_to_pydict(py, cid)).unwrap();

dict_obj.into()
}

fn parse_dag_cbor_object<R: Read + Seek>(mut reader: &mut BufReader<R>) -> Result<Ipld> {
let major = decode::read_major(&mut reader)?;
fn parse_dag_cbor_object<R: Read + Seek>(r: &mut R) -> Result<Ipld> {
let major = decode::read_major(r)?;
Ok(match major.kind() {
MajorKind::UnsignedInt | MajorKind::NegativeInt => Ipld::Integer(major.info() as i128),
MajorKind::ByteString => Ipld::Bytes(decode::read_bytes(&mut reader, major.info() as u64)?),
MajorKind::TextString => Ipld::String(decode::read_str(&mut reader, major.info() as u64)?),
MajorKind::Array => Ipld::List(decode::read_list(&mut reader, major.info() as u64)?),
MajorKind::Map => Ipld::Map(decode::read_map(&mut reader, major.info() as u64)?),
MajorKind::ByteString => Ipld::Bytes(decode::read_bytes(r, major.info() as u64)?),
MajorKind::TextString => Ipld::String(decode::read_str(r, major.info() as u64)?),
MajorKind::Array => Ipld::List(decode::read_list(r, major.info() as u64)?),
MajorKind::Map => Ipld::Map(decode::read_map(r, major.info() as u64)?),
MajorKind::Tag => {
if major.info() != 42 {
return Err(anyhow::anyhow!("non-42 tags are not supported"));
}

parse_dag_cbor_object(reader)?
Ipld::Link(decode::read_link(r)?)
}
MajorKind::Other => Ipld::Null,
})
}

#[pyfunction]
fn decode_dag_cbor_multi(data: Vec<u8>) -> PyResult<Vec<HashMapItem>> {
fn decode_dag_cbor_multi(py: Python, data: &[u8]) -> PyResult<Vec<PyObject>> {
let mut reader = BufReader::new(Cursor::new(data));

let mut parts = Vec::new();

loop {
let cbor = parse_dag_cbor_object(&mut reader);
if let Ok(cbor) = cbor {
parts.push(_ipld_to_python(cbor));
let ipld = parse_dag_cbor_object(&mut reader);
if let Ok(cbor) = ipld {
parts.push(ipld_to_pyobject(py, &cbor));
} else {
break;
}
}

Ok(parts)
}

fn _decode_dag_cbor(data: Vec<u8>) -> Result<Ipld> {
let mut reader = BufReader::new(Cursor::new(data));
parse_dag_cbor_object(&mut reader)
}
#[pyfunction]
fn decode_car<'py>(py: Python<'py>, data: &[u8]) -> PyResult<(&'py PyDict, &'py PyDict)> {
let car_response = executor::block_on(CarReader::new(data));
if let Err(e) = car_response {
return Err(get_err("Failed to decode CAR", e.to_string()));
}

fn _ipld_to_python(ipld: Ipld) -> HashMapItem {
ipld_to_hashmap(ipld.clone())
}
let car = car_response.unwrap();

#[pyfunction]
fn decode_car(data: Vec<u8>) -> (HashMapItem, HashMap<String, HashMapItem>) {
let car = executor::block_on(CarReader::new(data.as_slice())).unwrap();
let header = car_header_to_hashmap(car.header());
let blocks = executor::block_on(car
.stream()
.filter_map(|block| async {
if let Ok((cid, bytes)) = block {
let mut reader = BufReader::new(Cursor::new(bytes));

let ipld = parse_dag_cbor_object(&mut reader);
if let Ok(ipld) = ipld {
Some((cid.to_string(), ipld))
} else {
None
}
} else {
None
}
})
.collect::<HashMap<String, Ipld>>());
let header = car_header_to_pydict(py, car.header());
let parsed_blocks = PyDict::new(py);

let mut decoded_blocks = HashMap::new();
for (cid, ipld) in &blocks {
decoded_blocks.insert(cid.to_string(), _ipld_to_python(ipld.clone()));
}
let blocks: Vec<Result<(Cid, Vec<u8>), Error>> = executor::block_on(car.stream().collect());
blocks.into_iter().for_each(|block| {
if let Ok((cid, bytes)) = block {
let ipld = DagCborCodec.decode(&bytes);
if let Ok(ipld) = ipld {
let key = cid.to_string().to_object(py);
let value = ipld_to_pyobject(py, &ipld);
parsed_blocks.set_item(key, value).unwrap();
}
}
});

(header, decoded_blocks)
Ok((header, parsed_blocks))
}

#[pyfunction]
fn decode_dag_cbor(data: Vec<u8>) -> PyResult<HashMapItem> {
Ok(_ipld_to_python(_decode_dag_cbor(data)?))
fn decode_dag_cbor(py: Python, data: &[u8]) -> PyResult<PyObject> {
let ipld = DagCborCodec.decode(data);
if let Ok(ipld) = ipld {
Ok(ipld_to_pyobject(py, &ipld))
} else {
Err(get_err("Failed to decode DAG-CBOR", ipld.unwrap_err().to_string()))
}
}

#[pyfunction]
fn decode_cid(data: String) -> PyResult<HashMapItem> {
let cid = Cid::try_from(data.as_str()).unwrap();
Ok(cid_to_hashmap(&cid))
fn decode_cid(py: Python, data: String) -> PyResult<&PyDict> {
let cid = Cid::try_from(data.as_str());
if let Ok(cid) = cid {
Ok(cid_to_pydict(py, &cid))
} else {
Err(get_err("Failed to decode CID", cid.unwrap_err().to_string()))
}
}

#[pyfunction]
fn decode_multibase(py: Python, data: String) -> (char, PyObject) {
let (base, data) = multibase::decode(data).unwrap();
(base.code(), PyBytes::new(py, &data).into())
fn decode_multibase(py: Python, data: String) -> PyResult<(char, PyObject)> {
let base = multibase::decode(data);
if let Ok((base, data)) = base {
Ok((base.code(), PyBytes::new(py, &data).into()))
} else {
Err(get_err("Failed to decode multibase", base.unwrap_err().to_string()))
}
}

#[pyfunction]
fn encode_multibase(code: char, data: Vec<u8>) -> String {
let base = multibase::Base::from_code(code).unwrap();
let encoded = multibase::encode(base, data);
encoded
fn encode_multibase(code: char, data: &[u8]) -> PyResult<String> {
let base = multibase::Base::from_code(code);
if let Ok(base) = base {
Ok(multibase::encode(base, data))
} else {
Err(get_err("Failed to encode multibase", base.unwrap_err().to_string()))
}
}

fn get_err(msg: &str, err: String) -> PyErr {
PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{}. {}", msg, err))
}

#[pymodule]
Expand Down

0 comments on commit 37a1544

Please sign in to comment.