diff --git a/Cargo.toml b/Cargo.toml index 164cc11..9ff7784 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "myval" -version = "0.1.15" +version = "0.1.16" edition = "2021" authors = ["Serhij S. "] license = "Apache-2.0" diff --git a/src/df.rs b/src/df.rs index 3ad9795..1f0a6f5 100644 --- a/src/df.rs +++ b/src/df.rs @@ -11,6 +11,7 @@ use arrow2::io::ipc::read::{StreamReader, StreamState}; use arrow2::io::ipc::write::{StreamWriter, WriteOptions}; use arrow2::types::NativeType; use chrono::{DateTime, Local, NaiveDateTime, SecondsFormat, Utc}; +use std::fmt; use std::ops::{Add, Div, Mul, Sub}; use std::str::FromStr; @@ -379,6 +380,30 @@ impl DataFrame { df.metadata = metadata; Ok(df) } + /// Clone series by name + pub fn clone_series(&mut self, name: &str) -> Result<(Series, DataType), Error> { + if let Some((pos, _)) = self + .fields + .iter() + .enumerate() + .find(|(_, field)| field.name == name) + { + Ok((self.data[pos].clone(), self.fields[pos].data_type.clone())) + } else { + Err(Error::NotFound(name.to_owned())) + } + } + /// Clone series by index + pub fn clone_series_at(&mut self, index: usize) -> Result<(Series, DataType), Error> { + if index < self.fields.len() { + Ok(( + self.data[index].clone(), + self.fields[index].data_type.clone(), + )) + } else { + Err(Error::OutOfBounds) + } + } /// Pop series by name pub fn pop_series(&mut self, name: &str) -> Result<(Series, DataType), Error> { if let Some((pos, _)) = self @@ -446,6 +471,37 @@ impl DataFrame { Err(Error::OutOfBounds) } } + /// Convert to string + pub fn stringify(&mut self, name: &str) -> Result<(), Error> + where + T: NativeType + fmt::Display, + { + if let Some(pos) = self.get_column_index(name) { + self.stringify_at::(pos) + } else { + Err(Error::NotFound(name.to_owned())) + } + } + pub fn stringify_at(&mut self, index: usize) -> Result<(), Error> + where + T: NativeType + fmt::Display, + { + if let Some(series) = self.data.get(index) { + let values: &PrimitiveArray = + series.as_any().downcast_ref().ok_or(Error::TypeMismatch)?; + #[allow(clippy::redundant_closure_for_method_calls)] + let dt: Vec> = values + .into_iter() + .map(|v| v.map(|n| n.to_string())) + .collect(); + let arr = Utf8Array::::from(dt); + self.data[index] = arr.boxed(); + self.fields[index].data_type = DataType::LargeUtf8; + Ok(()) + } else { + Err(Error::OutOfBounds) + } + } /// apply a custom function pub fn apply(&mut self, name: &str, func: F) -> Result<(), Error> where @@ -469,7 +525,10 @@ impl DataFrame { let values: &PrimitiveArray = series.as_any().downcast_ref().ok_or(Error::TypeMismatch)?; let dt: Vec> = values.into_iter().map(|v| func(v.copied())).collect(); - self.data[index] = PrimitiveArray::::from(dt).boxed(); + let arr = PrimitiveArray::::from(dt).boxed(); + let dtype = arr.data_type().clone(); + self.data[index] = arr; + self.fields[index].data_type = dtype; Ok(()) } else { Err(Error::OutOfBounds)