-
-
Notifications
You must be signed in to change notification settings - Fork 255
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Target naming #256
base: master
Are you sure you want to change the base?
Target naming #256
Changes from 6 commits
b96d84f
a5b344a
81dfd57
9ef33d5
c0d0c4b
e3234e6
b4ed2a3
3d07413
5842ecb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,7 @@ impl<R: Records, S> DatasetBase<R, S> { | |
targets, | ||
weights: Array1::zeros(0), | ||
feature_names: Vec::new(), | ||
target_names: Vec::new(), | ||
} | ||
} | ||
|
||
|
@@ -81,13 +82,14 @@ impl<R: Records, S> DatasetBase<R, S> { | |
/// Updates the records of a dataset | ||
/// | ||
/// This function overwrites the records in a dataset. It also invalidates the weights and | ||
/// feature names. | ||
/// feature/target names. | ||
pub fn with_records<T: Records>(self, records: T) -> DatasetBase<T, S> { | ||
DatasetBase { | ||
records, | ||
targets: self.targets, | ||
weights: Array1::zeros(0), | ||
feature_names: Vec::new(), | ||
target_names: Vec::new(), | ||
} | ||
} | ||
|
||
|
@@ -100,6 +102,7 @@ impl<R: Records, S> DatasetBase<R, S> { | |
targets, | ||
weights: self.weights, | ||
feature_names: self.feature_names, | ||
target_names: self.target_names, | ||
} | ||
} | ||
|
||
|
@@ -118,6 +121,15 @@ impl<R: Records, S> DatasetBase<R, S> { | |
|
||
self | ||
} | ||
|
||
/// Updates the target names of a dataset | ||
pub fn with_target_names<I: Into<String>>(mut self, names: Vec<I>) -> DatasetBase<R, S> { | ||
let target_names = names.into_iter().map(|x| x.into()).collect(); | ||
|
||
self.target_names = target_names; | ||
|
||
self | ||
} | ||
} | ||
|
||
impl<L, R: Records, T: AsTargets<Elem = L>> DatasetBase<R, T> { | ||
|
@@ -143,6 +155,7 @@ impl<L, R: Records, T: AsTargets<Elem = L>> DatasetBase<R, T> { | |
targets, | ||
weights, | ||
feature_names, | ||
target_names, | ||
.. | ||
} = self; | ||
|
||
|
@@ -153,6 +166,20 @@ impl<L, R: Records, T: AsTargets<Elem = L>> DatasetBase<R, T> { | |
targets: targets.map(fnc), | ||
weights, | ||
feature_names, | ||
target_names, | ||
} | ||
} | ||
|
||
/// Returns target names | ||
/// | ||
/// A target name gives a human-readable string describing the purpose of a single target. | ||
pub fn target_names(&self) -> Vec<String> { | ||
if !self.target_names.is_empty() { | ||
self.target_names.clone() | ||
} else { | ||
(0..self.ntargets()) | ||
.map(|idx| format!("class-{}", idx)) | ||
.collect() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there are no names, this method should instead just return empty list. The signature should be |
||
} | ||
} | ||
|
||
|
@@ -216,6 +243,7 @@ where | |
DatasetBase::new(records, targets) | ||
.with_feature_names(self.feature_names.clone()) | ||
.with_weights(self.weights.clone()) | ||
.with_target_names(self.target_names.clone()) | ||
} | ||
|
||
/// Iterate over features | ||
|
@@ -289,11 +317,13 @@ where | |
}; | ||
let dataset1 = DatasetBase::new(records_first, targets_first) | ||
.with_weights(first_weights) | ||
.with_feature_names(self.feature_names.clone()); | ||
.with_feature_names(self.feature_names.clone()) | ||
.with_target_names(self.target_names.clone()); | ||
|
||
let dataset2 = DatasetBase::new(records_second, targets_second) | ||
.with_weights(second_weights) | ||
.with_feature_names(self.feature_names.clone()); | ||
.with_feature_names(self.feature_names.clone()) | ||
.with_target_names(self.target_names.clone()); | ||
|
||
(dataset1, dataset2) | ||
} | ||
|
@@ -339,7 +369,8 @@ where | |
label, | ||
DatasetBase::new(self.records().view(), targets) | ||
.with_feature_names(self.feature_names.clone()) | ||
.with_weights(self.weights.clone()), | ||
.with_weights(self.weights.clone()) | ||
.with_target_names(self.target_names.clone()), | ||
) | ||
}) | ||
.collect()) | ||
|
@@ -395,6 +426,7 @@ impl<F, D: Data<Elem = F>, I: Dimension> From<ArrayBase<D, I>> | |
targets: empty_targets, | ||
weights: Array1::zeros(0), | ||
feature_names: Vec::new(), | ||
target_names: Vec::new(), | ||
} | ||
} | ||
} | ||
|
@@ -411,6 +443,7 @@ where | |
targets: rec_tar.1, | ||
weights: Array1::zeros(0), | ||
feature_names: Vec::new(), | ||
target_names: Vec::new(), | ||
} | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -77,16 +77,17 @@ where | |
if self.target_or_feature && self.dataset.nfeatures() <= self.idx { | ||
return None; | ||
} | ||
|
||
let mut records = self.dataset.records.view(); | ||
let mut targets = self.dataset.targets.as_targets(); | ||
let feature_names; | ||
let mut target_names = vec!["class".to_string()]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just write |
||
let weights = self.dataset.weights.clone(); | ||
|
||
if !self.target_or_feature { | ||
// This branch should only run for 2D targets | ||
targets.collapse_axis(Axis(1), self.idx); | ||
feature_names = self.dataset.feature_names.clone(); | ||
target_names = self.dataset.target_names.clone(); | ||
oojo12 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} else { | ||
records.collapse_axis(Axis(1), self.idx); | ||
oojo12 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if self.dataset.feature_names.len() == records.len_of(Axis(1)) { | ||
|
@@ -103,6 +104,7 @@ where | |
targets, | ||
weights, | ||
feature_names, | ||
target_names, | ||
}; | ||
|
||
Some(dataset_view) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -164,6 +164,7 @@ impl Deref for Pr { | |
/// * `targets`: a two-/one-dimension matrix with dimensionality (nsamples, ntargets) | ||
/// * `weights`: optional weights for each sample with dimensionality (nsamples) | ||
/// * `feature_names`: optional descriptive feature names with dimensionality (nfeatures) | ||
/// * `target_names`: optional descriptive target names with dimensionality (1) | ||
oojo12 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// | ||
/// # Trait bounds | ||
/// | ||
|
@@ -180,6 +181,7 @@ where | |
|
||
pub weights: Array1<f32>, | ||
feature_names: Vec<String>, | ||
target_names: Vec<String>, | ||
} | ||
|
||
/// Targets with precomputed, counted labels | ||
|
@@ -333,6 +335,13 @@ mod tests { | |
use ndarray::{array, Array1, Array2, Axis}; | ||
use rand::{rngs::SmallRng, SeedableRng}; | ||
|
||
#[test] | ||
fn set_target_name() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you also add target name verification to one or two tests with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. addressed lines 551-556 now test this. |
||
let dataset = Dataset::new(array![[1., 2.], [1., 2.]], array![0., 1.]) | ||
.with_target_names(vec!["test"]); | ||
assert_eq!(dataset.target_names, vec!["test"]); | ||
} | ||
|
||
#[test] | ||
fn dataset_implements_required_methods() { | ||
let mut rng = SmallRng::seed_from_u64(42); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This method should check the length of the input vector so that it's equal to number of targets. The input should be Vec so that it can be assigned directly. You can also try implementing this change for
feature_names
, but that might require more work to change.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the input for the function is already Vec(I) where I implements Into(String). Unless I am mistaken you want something like?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah pretty much. I don't mind panicking here, so you can just
assert
the condition.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gotcha