From 32e4cc74749f8883a2e896f0c2ef96da78734b04 Mon Sep 17 00:00:00 2001 From: Lance Gliser Date: Thu, 10 Oct 2024 14:51:23 -0500 Subject: [PATCH] Updated inferFeatureAttributes options to match standard engine options --- MIGRATION.md | 21 +++++++++++++-- src/features/base.ts | 32 ++++++++++------------- src/features/sources/Array.test.ts | 2 +- src/features/sources/Array.ts | 22 +++++----------- src/features/sources/Base.ts | 20 +++++--------- src/features/sources/examples/CSV.test.ts | 2 +- 6 files changed, 48 insertions(+), 51 deletions(-) diff --git a/MIGRATION.md b/MIGRATION.md index 0cfe086..08c4e8c 100644 --- a/MIGRATION.md +++ b/MIGRATION.md @@ -4,7 +4,7 @@ This major change refactors the client and types for all Howso Engine operations. -### Typing Changes +### Typing changes - Most of all the types are now autogenerated from the Engine API, and will have a different naming schema across the board. However, most of the type's properties should remain the same. @@ -13,7 +13,7 @@ This major change refactors the client and types for all Howso Engine operations - The existing `Trainee` type has been renamed to `BaseTrainee` and no longer has a `features` property. Request features via the method `getFeatureAttributes` instead. -### Client Changes +### Client changes - `BaseClient` has been renamed to `AbstractBaseClient`. - `WasmClient` has been renamed to `HowsoWorkerClient`. @@ -33,6 +33,23 @@ This major change refactors the client and types for all Howso Engine operations - The `react` method now uses `context_values` instead of `context` and `action_values` instead of `actions`. - `local_*` react features have been removed. Use their unqualified versions instead. +### Utilities changes + +- The options of `inferFeatureAttributes`'s `service.infer`'s `InferFeatureAttributesOptions` have been updated to + a subset of standard [engine options](https://docs.howso.com/en/release-latest/api_reference/_autosummary/howso.utilities.html#howso.utilities.infer_feature_attributes). Please remap to the following: + +```ts +type InferFeatureAttributesOptions = { + dependent_features?: Record; + features?: FeatureAttributesIndex; + include_sample?: boolean; + infer_bounds?: boolean; + mode_bound_features?: string[]; + ordinal_feature_values?: Record; + tight_bounds?: boolean; +}; +``` + ## 5.x The `inferFeatureAttributes` function now requires a `sourceFormat` argument, and is strongly typed through a union. diff --git a/src/features/base.ts b/src/features/base.ts index f78ae80..2411a99 100644 --- a/src/features/base.ts +++ b/src/features/base.ts @@ -2,24 +2,20 @@ import type { FeatureAttributesIndex } from "../types"; export type FeatureSourceFormat = "unknown" | "array" | "parsed_array"; -export interface InferFeatureBoundsOptions { - tightBounds?: boolean | string[]; - modeBounds?: boolean | string[]; -} - -export interface InferFeatureTimeSeriesOptions { - timeFeature: string; - idFeatureName?: string; -} - -export interface InferFeatureAttributesOptions { - defaults?: FeatureAttributesIndex; - inferBounds?: boolean | InferFeatureBoundsOptions; - timeSeries?: InferFeatureTimeSeriesOptions; - ordinalFeatureValues?: Record; - dependentFeatures?: Record; - includeSample?: boolean; -} +/** + * A subset of supported options. + * Full options are available only when using the Python direct client or Platform installation. + * @see https://docs.howso.com/en/release-latest/api_reference/_autosummary/howso.utilities.html#howso.utilities.infer_feature_attributes + **/ +export type InferFeatureAttributesOptions = { + dependent_features?: Record; + features?: FeatureAttributesIndex; + include_sample?: boolean; + infer_bounds?: boolean; + mode_bound_features?: string[]; + ordinal_feature_values?: Record; + tight_bounds?: boolean; +}; export interface ArrayData { readonly columns: C[]; diff --git a/src/features/sources/Array.test.ts b/src/features/sources/Array.test.ts index e04ccc4..027860d 100644 --- a/src/features/sources/Array.test.ts +++ b/src/features/sources/Array.test.ts @@ -70,7 +70,7 @@ describe("features/sources/Array", () => { number: { type: "continuous", data_type: "number" }, date: { type: "continuous", data_type: "formatted_date_time", date_time_format: "%Y-%m-%dT%h-%m-%s" }, }; - const features = await service.infer({ defaults, includeSample: true }); + const features = await service.infer({ features: defaults, include_sample: true }); expectFeatureAttributesIndex(features); // Id diff --git a/src/features/sources/Array.ts b/src/features/sources/Array.ts index 5df94cb..9a14e87 100644 --- a/src/features/sources/Array.ts +++ b/src/features/sources/Array.ts @@ -1,12 +1,5 @@ import type { FeatureAttributes } from "../../types"; -import { - AbstractDataType, - ArrayData, - FeatureSourceFormat, - InferFeatureBoundsOptions, - InferFeatureTimeSeriesOptions, - isArrayData, -} from "../base"; +import { AbstractDataType, ArrayData, FeatureSourceFormat, InferFeatureAttributesOptions, isArrayData } from "../base"; import * as utils from "../utils"; import { FeatureSerializerBase, InferFeatureAttributeFeatureStatistics, InferFeatureAttributesBase } from "./Base"; @@ -124,7 +117,7 @@ export class InferFeatureAttributesFromArray extends InferFeatureAttributesBase public async inferBounds( attributes: Readonly, featureName: string, - options: InferFeatureBoundsOptions, + options: InferFeatureAttributesOptions, ): Promise { const { minimum, maximum, hasNulls, samples, uniqueValues, totalValues } = await this.getStatistics(featureName); @@ -167,15 +160,12 @@ export class InferFeatureAttributesFromArray extends InferFeatureAttributesBase const actualMax = maxValue; if (minValue !== undefined && maxValue !== undefined) { - if ( - !options.tightBounds || - (Array.isArray(options.tightBounds) && options.tightBounds.indexOf(featureName) === -1) - ) { + if (!options.tight_bounds) { // Use loose bounds [minValue, maxValue] = utils.guessLooseBounds(minValue, maxValue); - const { modeBounds = true } = options; - if (modeBounds || (Array.isArray(modeBounds) && modeBounds.indexOf(featureName) >= 0)) { + const { mode_bound_features = [] } = options; + if (mode_bound_features.includes(featureName)) { // Check for mode bounds if (uniqueValues !== totalValues) { const [modes, modeCount] = utils.allModes(column); @@ -234,7 +224,7 @@ export class InferFeatureAttributesFromArray extends InferFeatureAttributesBase /* eslint-disable-next-line @typescript-eslint/no-unused-vars*/ featureName: string, /* eslint-disable-next-line @typescript-eslint/no-unused-vars*/ - options: InferFeatureTimeSeriesOptions, + options: InferFeatureAttributesOptions, ): Promise> { // TODO - infer time series throw new Error("Method not implemented."); diff --git a/src/features/sources/Base.ts b/src/features/sources/Base.ts index 57ad586..460f02c 100644 --- a/src/features/sources/Base.ts +++ b/src/features/sources/Base.ts @@ -1,11 +1,5 @@ import { FeatureAttributes, FeatureAttributesIndex, FeatureOriginalType } from "../../types"; -import { - AbstractDataType, - FeatureSourceFormat, - InferFeatureAttributesOptions, - InferFeatureBoundsOptions, - InferFeatureTimeSeriesOptions, -} from "../base"; +import { AbstractDataType, FeatureSourceFormat, InferFeatureAttributesOptions } from "../base"; import { coerceDate } from "../utils"; export type InferFeatureAttributeFeatureStatistics = { @@ -37,10 +31,10 @@ export abstract class InferFeatureAttributesBase { // Loop the columns into attributes immediately to get order assigned. Probably should be a Map... const columns = await this.getFeatureNames(); const attributes: FeatureAttributesIndex = columns.reduce((attributes, column) => { - attributes[column] = (options.defaults?.[column] || {}) as FeatureAttributes; + attributes[column] = (options.features?.[column] || {}) as FeatureAttributes; return attributes; }, {} as FeatureAttributesIndex); - const { ordinalFeatureValues = {}, dependentFeatures = {} } = options; + const { ordinal_feature_values: ordinalFeatureValues = {}, dependent_features: dependentFeatures = {} } = options; const getFeatureAttributes = async (featureName: string): Promise => { const originalFeatureType = await this.getOriginalFeatureType(featureName); @@ -100,7 +94,7 @@ export abstract class InferFeatureAttributesBase { } // Infer bounds - const { inferBounds = true } = options; + const { infer_bounds: inferBounds = true } = options; if (inferBounds && !attributes[featureName].bounds) { const bounds = await this.inferBounds( attributes[featureName], @@ -117,7 +111,7 @@ export abstract class InferFeatureAttributesBase { // TODO - infer time series // } - if (options.includeSample) { + if (options.include_sample) { additions.sample = await this.getSample(featureName); } @@ -248,12 +242,12 @@ export abstract class InferFeatureAttributesBase { public abstract inferBounds( attributes: Readonly, featureName: string, - options: InferFeatureBoundsOptions, + options: InferFeatureAttributesOptions, ): Promise; public abstract inferTimeSeries( attributes: Readonly, featureName: string, - options: InferFeatureTimeSeriesOptions, + options: InferFeatureAttributesOptions, ): Promise>; protected async getSample(featureName: string): Promise { diff --git a/src/features/sources/examples/CSV.test.ts b/src/features/sources/examples/CSV.test.ts index 5f1b163..c2de658 100644 --- a/src/features/sources/examples/CSV.test.ts +++ b/src/features/sources/examples/CSV.test.ts @@ -60,7 +60,7 @@ describe("features/sources/CSV", () => { const service = new InferFeatureAttributesFromCSV(data, serviceOptions); expect(service.samples?.length).toBe(serviceOptions.samplesLimit); - const features = await service.infer({ includeSample: true }); + const features = await service.infer({ include_sample: true }); expect(Object.keys(features)).toStrictEqual(columns); expectFeatureAttributesIndex(features); expectAsteroids(features);