From 37a1544cce84bc5d1d22d5f80e8c6ef9869df6c8 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Sat, 24 Feb 2024 20:37:58 +0100 Subject: [PATCH] Add error handling; fix performance; fix cid decoding (#4) --- Cargo.toml | 4 +- src/lib.rs | 286 ++++++++++++++++++++++++----------------------------- 2 files changed, 130 insertions(+), 160 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 096b53e..5634c8c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libipld" -version = "1.1.0" +version = "1.1.1" edition = "2021" license = "MIT" description = "Python binding to the Rust IPLD library" @@ -13,7 +13,7 @@ name = "libipld" crate-type = ["cdylib"] [dependencies] -pyo3 = { version = "0.19", features = ["generate-import-lib", "anyhow"] } +pyo3 = { version = "0.20", features = ["generate-import-lib", "anyhow"] } python3-dll-a = "0.2.7" anyhow = "1.0.75" futures = "0.3" diff --git a/src/lib.rs b/src/lib.rs index cd026ae..1fb4f11 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,217 +1,187 @@ -use std::borrow::Cow; -use std::collections::{HashMap}; use std::io::{BufReader, Cursor, Read, Seek}; -use pyo3::prelude::*; -use pyo3::conversion::ToPyObject; -use pyo3::{PyObject, Python}; -use pyo3::types::{PyBytes}; + +use ::libipld::{cid::Cid, Ipld}; +use ::libipld::cbor::{cbor::MajorKind, DagCborCodec, decode}; +use ::libipld::prelude::Codec; use anyhow::Result; -use iroh_car::{CarHeader, CarReader}; use futures::{executor, stream::StreamExt}; -use ::libipld::cbor::cbor::MajorKind; -use ::libipld::cbor::decode; -use ::libipld::{cid::Cid, Ipld}; - - -#[derive(Clone, PartialEq)] -pub enum HashMapItem { - Null, - Bool(bool), - Integer(i128), - Float(f64), - String(String), - List(Vec), - Map(HashMap), - Bytes(Cow<'static, [u8]>), -} - -impl HashMapItem { - fn value(&self) -> PyObject { - Python::with_gil(|py| match self { - Self::Null => py.None(), - Self::Bool(b) => b.to_object(py), - Self::String(s) => s.to_object(py), - Self::Integer(i) => i.to_object(py), - Self::Float(f) => f.to_object(py), - Self::List(l) => l.to_object(py), - Self::Map(m) => m.to_object(py), - Self::Bytes(b) => b.to_object(py), - }) +use iroh_car::{CarHeader, CarReader, Error}; +use pyo3::{PyObject, Python}; +use pyo3::conversion::ToPyObject; +use pyo3::prelude::*; +use pyo3::types::{PyBytes, PyDict, PyList}; + +fn ipld_to_pyobject(py: Python<'_>, ipld: &Ipld) -> PyObject { + match ipld { + Ipld::Null => py.None(), + Ipld::Bool(b) => b.to_object(py), + Ipld::Integer(i) => i.to_object(py), + Ipld::Float(f) => f.to_object(py), + Ipld::String(s) => s.to_object(py), + Ipld::Bytes(b) => PyBytes::new(py, b).into(), + Ipld::Link(cid) => cid.to_string().to_object(py), + Ipld::List(l) => { + let list_obj = PyList::empty(py); + l.iter().for_each(|item| { + let item_obj = ipld_to_pyobject(py, item); + list_obj.append(item_obj).unwrap(); + }); + list_obj.into() + } + Ipld::Map(m) => { + let dict_obj = PyDict::new(py); + m.iter().for_each(|(key, value)| { + let key_obj = key.to_object(py); + let value_obj = ipld_to_pyobject(py, value); + dict_obj.set_item(key_obj, value_obj).unwrap(); + }); + dict_obj.into() + } } } -impl ToPyObject for HashMapItem { - fn to_object(&self, _: Python<'_>) -> PyObject { - self.value().into() - } -} +fn car_header_to_pydict<'py>(py: Python<'py>, header: &CarHeader) -> &'py PyDict { + let dict_obj = PyDict::new(py); -impl IntoPy> for HashMapItem { - fn into_py(self, _: Python<'_>) -> Py { - self.value().into() - } -} + dict_obj.set_item("version", header.version()).unwrap(); + let roots = PyList::empty(py); + header.roots().iter().for_each(|cid| { + let cid_obj = cid.to_string().to_object(py); + roots.append(cid_obj).unwrap(); + }); -fn ipld_to_hashmap(x: Ipld) -> HashMapItem { - match x { - Ipld::Null => HashMapItem::Null, - Ipld::Bool(b) => HashMapItem::Bool(b), - Ipld::Integer(i) => HashMapItem::Integer(i), - Ipld::Float(f) => HashMapItem::Float(f), - Ipld::String(s) => HashMapItem::String(s), - Ipld::Bytes(b) => HashMapItem::Bytes(Cow::Owned(b)), - Ipld::List(l) => HashMapItem::List(l.into_iter().map(ipld_to_hashmap).collect()), - Ipld::Map(m) => HashMapItem::Map( - m.into_iter() - .map(|(k, v)| (k, ipld_to_hashmap(v))) - .collect(), - ), - Ipld::Link(cid) => HashMapItem::String(cid.to_string()), - } -} + dict_obj.set_item("roots", roots).unwrap(); -fn car_header_to_hashmap(header: &CarHeader) -> HashMapItem { - HashMapItem::Map( - vec![ - ("version".to_string(), HashMapItem::Integer(header.version() as i128)), - ( - "roots".to_string(), - HashMapItem::List( - header - .roots() - .iter() - .map(|cid| HashMapItem::String(cid.to_string())) - .collect(), - ), - ), - ] - .into_iter() - .collect(), - ) + dict_obj.into() } -fn _cid_hash_to_hashmap(cid: &Cid) -> HashMapItem { +fn cid_hash_to_pydict<'py>(py: Python<'py>, cid: &Cid) -> &'py PyDict { let hash = cid.hash(); - HashMapItem::Map( - vec![ - ("code".to_string(), HashMapItem::Integer(hash.code() as i128)), - ("size".to_string(), HashMapItem::Integer(hash.size() as i128)), - ("digest".to_string(), HashMapItem::Bytes(Cow::Owned(hash.digest().to_vec()))), - ] - .into_iter() - .collect(), - ) + let dict_obj = PyDict::new(py); + + dict_obj.set_item("code", hash.code()).unwrap(); + dict_obj.set_item("size", hash.size()).unwrap(); + dict_obj.set_item("digest", PyBytes::new(py, &hash.digest())).unwrap(); + + dict_obj.into() } -fn cid_to_hashmap(cid: &Cid) -> HashMapItem { - HashMapItem::Map( - vec![ - ("version".to_string(), HashMapItem::Integer(cid.version() as i128)), - ("codec".to_string(), HashMapItem::Integer(cid.codec() as i128)), - ("hash".to_string(), _cid_hash_to_hashmap(cid)), - ] - .into_iter() - .collect(), - ) +fn cid_to_pydict<'py>(py: Python<'py>, cid: &Cid) -> &'py PyDict { + let dict_obj = PyDict::new(py); + + dict_obj.set_item("version", cid.version() as u64).unwrap(); + dict_obj.set_item("codec", cid.codec()).unwrap(); + dict_obj.set_item("hash", cid_hash_to_pydict(py, cid)).unwrap(); + + dict_obj.into() } -fn parse_dag_cbor_object(mut reader: &mut BufReader) -> Result { - let major = decode::read_major(&mut reader)?; +fn parse_dag_cbor_object(r: &mut R) -> Result { + let major = decode::read_major(r)?; Ok(match major.kind() { MajorKind::UnsignedInt | MajorKind::NegativeInt => Ipld::Integer(major.info() as i128), - MajorKind::ByteString => Ipld::Bytes(decode::read_bytes(&mut reader, major.info() as u64)?), - MajorKind::TextString => Ipld::String(decode::read_str(&mut reader, major.info() as u64)?), - MajorKind::Array => Ipld::List(decode::read_list(&mut reader, major.info() as u64)?), - MajorKind::Map => Ipld::Map(decode::read_map(&mut reader, major.info() as u64)?), + MajorKind::ByteString => Ipld::Bytes(decode::read_bytes(r, major.info() as u64)?), + MajorKind::TextString => Ipld::String(decode::read_str(r, major.info() as u64)?), + MajorKind::Array => Ipld::List(decode::read_list(r, major.info() as u64)?), + MajorKind::Map => Ipld::Map(decode::read_map(r, major.info() as u64)?), MajorKind::Tag => { if major.info() != 42 { return Err(anyhow::anyhow!("non-42 tags are not supported")); } - parse_dag_cbor_object(reader)? + Ipld::Link(decode::read_link(r)?) } MajorKind::Other => Ipld::Null, }) } #[pyfunction] -fn decode_dag_cbor_multi(data: Vec) -> PyResult> { +fn decode_dag_cbor_multi(py: Python, data: &[u8]) -> PyResult> { let mut reader = BufReader::new(Cursor::new(data)); - let mut parts = Vec::new(); + loop { - let cbor = parse_dag_cbor_object(&mut reader); - if let Ok(cbor) = cbor { - parts.push(_ipld_to_python(cbor)); + let ipld = parse_dag_cbor_object(&mut reader); + if let Ok(cbor) = ipld { + parts.push(ipld_to_pyobject(py, &cbor)); } else { break; } } + Ok(parts) } -fn _decode_dag_cbor(data: Vec) -> Result { - let mut reader = BufReader::new(Cursor::new(data)); - parse_dag_cbor_object(&mut reader) -} +#[pyfunction] +fn decode_car<'py>(py: Python<'py>, data: &[u8]) -> PyResult<(&'py PyDict, &'py PyDict)> { + let car_response = executor::block_on(CarReader::new(data)); + if let Err(e) = car_response { + return Err(get_err("Failed to decode CAR", e.to_string())); + } -fn _ipld_to_python(ipld: Ipld) -> HashMapItem { - ipld_to_hashmap(ipld.clone()) -} + let car = car_response.unwrap(); -#[pyfunction] -fn decode_car(data: Vec) -> (HashMapItem, HashMap) { - let car = executor::block_on(CarReader::new(data.as_slice())).unwrap(); - let header = car_header_to_hashmap(car.header()); - let blocks = executor::block_on(car - .stream() - .filter_map(|block| async { - if let Ok((cid, bytes)) = block { - let mut reader = BufReader::new(Cursor::new(bytes)); - - let ipld = parse_dag_cbor_object(&mut reader); - if let Ok(ipld) = ipld { - Some((cid.to_string(), ipld)) - } else { - None - } - } else { - None - } - }) - .collect::>()); + let header = car_header_to_pydict(py, car.header()); + let parsed_blocks = PyDict::new(py); - let mut decoded_blocks = HashMap::new(); - for (cid, ipld) in &blocks { - decoded_blocks.insert(cid.to_string(), _ipld_to_python(ipld.clone())); - } + let blocks: Vec), Error>> = executor::block_on(car.stream().collect()); + blocks.into_iter().for_each(|block| { + if let Ok((cid, bytes)) = block { + let ipld = DagCborCodec.decode(&bytes); + if let Ok(ipld) = ipld { + let key = cid.to_string().to_object(py); + let value = ipld_to_pyobject(py, &ipld); + parsed_blocks.set_item(key, value).unwrap(); + } + } + }); - (header, decoded_blocks) + Ok((header, parsed_blocks)) } #[pyfunction] -fn decode_dag_cbor(data: Vec) -> PyResult { - Ok(_ipld_to_python(_decode_dag_cbor(data)?)) +fn decode_dag_cbor(py: Python, data: &[u8]) -> PyResult { + let ipld = DagCborCodec.decode(data); + if let Ok(ipld) = ipld { + Ok(ipld_to_pyobject(py, &ipld)) + } else { + Err(get_err("Failed to decode DAG-CBOR", ipld.unwrap_err().to_string())) + } } #[pyfunction] -fn decode_cid(data: String) -> PyResult { - let cid = Cid::try_from(data.as_str()).unwrap(); - Ok(cid_to_hashmap(&cid)) +fn decode_cid(py: Python, data: String) -> PyResult<&PyDict> { + let cid = Cid::try_from(data.as_str()); + if let Ok(cid) = cid { + Ok(cid_to_pydict(py, &cid)) + } else { + Err(get_err("Failed to decode CID", cid.unwrap_err().to_string())) + } } #[pyfunction] -fn decode_multibase(py: Python, data: String) -> (char, PyObject) { - let (base, data) = multibase::decode(data).unwrap(); - (base.code(), PyBytes::new(py, &data).into()) +fn decode_multibase(py: Python, data: String) -> PyResult<(char, PyObject)> { + let base = multibase::decode(data); + if let Ok((base, data)) = base { + Ok((base.code(), PyBytes::new(py, &data).into())) + } else { + Err(get_err("Failed to decode multibase", base.unwrap_err().to_string())) + } } #[pyfunction] -fn encode_multibase(code: char, data: Vec) -> String { - let base = multibase::Base::from_code(code).unwrap(); - let encoded = multibase::encode(base, data); - encoded +fn encode_multibase(code: char, data: &[u8]) -> PyResult { + let base = multibase::Base::from_code(code); + if let Ok(base) = base { + Ok(multibase::encode(base, data)) + } else { + Err(get_err("Failed to encode multibase", base.unwrap_err().to_string())) + } +} + +fn get_err(msg: &str, err: String) -> PyErr { + PyErr::new::(format!("{}. {}", msg, err)) } #[pymodule]