-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
21169: Infer feature attributes: Added an example CSV parser, fixed a…
… bug that caused iterating over return values to order alphabetically, updated the unknown type defaults (#21) - Altered the unknown feature return type to `{ type: "continuous", data_type: "number", bounds: { allow_null: true } }`
- Loading branch information
1 parent
fde04d8
commit 86dfea4
Showing
7 changed files
with
276 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import { readFileSync } from "fs"; | ||
import type { FeatureAttributes, FeatureAttributesIndex } from "../../../types"; | ||
import type { AbstractDataType } from "../../base"; | ||
import { expectFeatureAttributesIndex } from "../../infer.test"; | ||
import { InferFeatureAttributesFromCSV, type InferFeatureAttributesFromCSVOptions } from "./CSV"; | ||
|
||
describe("features/sources/CSV", () => { | ||
describe("InferFeatureAttributesFromArray", () => { | ||
it("isAcceptedSourceFormat should accept csv only", () => { | ||
expect(InferFeatureAttributesFromCSV.isAcceptedSourceFormat("" as unknown as AbstractDataType)).toBe(true); | ||
// @ts-expect-error Invalid data type on purpose | ||
expect(InferFeatureAttributesFromCSV.isAcceptedSourceFormat([{}, {}])).toBe(false); | ||
}); | ||
|
||
describe("infer", () => { | ||
const data = readFileSync("src/tests/assets/asteroids-sample.csv").toString(); | ||
describe("asteroids", () => { | ||
const columns = [ | ||
"full_name", | ||
"a", | ||
"e", | ||
"G", | ||
"i", | ||
"om", | ||
"w", | ||
"q", | ||
"ad", | ||
"per_y", | ||
"data_arc", | ||
"condition_code", | ||
"n_obs_used", | ||
"H", | ||
"diameter", | ||
"extent", | ||
"albedo", | ||
"rot_per", | ||
"GM", | ||
"BV", | ||
"UB", | ||
"IR", | ||
"spec_B", | ||
"spec_T", | ||
"neo", | ||
"pha", | ||
"moid", | ||
]; | ||
|
||
it("should infer feature attributes from data", async () => { | ||
const service = new InferFeatureAttributesFromCSV(data); | ||
const features = await service.infer(); | ||
expectFeatureAttributesIndex(features); | ||
expectAsteroids(features); | ||
}); | ||
|
||
it("should infer feature attributes from data using options", async () => { | ||
const serviceOptions: InferFeatureAttributesFromCSVOptions = { | ||
limit: 5, | ||
samplesLimit: 2, | ||
}; | ||
const service = new InferFeatureAttributesFromCSV(data, serviceOptions); | ||
expect(service.samples?.length).toBe(serviceOptions.samplesLimit); | ||
|
||
const features = await service.infer({ includeSample: true }); | ||
expect(Object.keys(features)).toStrictEqual(columns); | ||
expectFeatureAttributesIndex(features); | ||
expectAsteroids(features); | ||
}); | ||
|
||
const expectAsteroids = (features: FeatureAttributesIndex) => { | ||
expect(Object.keys(features)).toStrictEqual(columns); | ||
expectIRFeature(features.IR); | ||
}; | ||
|
||
// IR is a special snowflake, there's no data for it. Testing the unknown handling | ||
const expectIRFeature = (attributes: FeatureAttributes) => { | ||
expect(attributes.type).toBe("continuous"); | ||
expect(attributes.data_type).toBe("number"); | ||
expect(attributes.bounds?.allow_null).toBe(true); | ||
}; | ||
}); | ||
}); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* | ||
* This class is an example implementation of a CSV parser. | ||
* It provides inspiration, and an easy means for testing our own CSV files. | ||
* Your implementation will vary, especially in options. | ||
*/ | ||
|
||
import { autoType, csvParse } from "d3-dsv"; | ||
import type { AbstractDataType, FeatureSourceFormat } from "../../base"; | ||
import { InferFeatureAttributesFromArray } from "../Array"; | ||
|
||
export type InferFeatureAttributesFromCSVOptions = { | ||
limit?: number; | ||
/** The number of samples to be returned. Default: 5 */ | ||
samplesLimit?: number; | ||
}; | ||
export class InferFeatureAttributesFromCSV extends InferFeatureAttributesFromArray { | ||
public static sourceFormat: FeatureSourceFormat = "parsed_array"; | ||
public readonly samples: ReturnType<typeof samplesAutoType>[] | undefined; | ||
|
||
public static isAcceptedSourceFormat(data: AbstractDataType): boolean { | ||
return typeof data === "string"; | ||
} | ||
|
||
constructor(dataset: string, options: InferFeatureAttributesFromCSVOptions = {}) { | ||
options.samplesLimit ||= 5; | ||
|
||
const raw = csvParse(dataset); | ||
const limited = raw.slice(0, options.limit).map((row) => ({ ...row })); | ||
// @ts-expect-error I'll assign column immediately below | ||
const data: typeof raw = limited.map(autoType); | ||
data.columns = raw.columns; | ||
|
||
super({ | ||
columns: data.columns, | ||
data: data.map((object) => Object.values(object)), | ||
}); | ||
|
||
if (options.samplesLimit) { | ||
this.samples = raw.slice(0, options.samplesLimit).map(samplesAutoType); | ||
} | ||
} | ||
} | ||
|
||
// Adapted from https://github.com/d3/d3-dsv/blob/main/src/autoType.js | ||
function samplesAutoType(object: Record<string, string>) { | ||
for (const key in object) { | ||
let value: string | number | null = object[key].trim(), | ||
number; | ||
if (!value) value = null; | ||
// else if (value === "true") value = true; | ||
// else if (value === "false") value = false; | ||
else if (value === "NaN") value = NaN; | ||
else if (!isNaN((number = +value))) value = number; | ||
// We don't want dates to show | ||
// else if (m = value.match(/^([-+]\d{2})?\d{4}(-\d{2}(-\d{2})?)?(T\d{2}:\d{2}(:\d{2}(\.\d{3})?)?(Z|[-+]\d{2}:\d{2})?)?$/)) { | ||
// if (fixtz && !!m[4] && !m[7]) value = value.replace(/-/g, "/").replace(/T/, " "); | ||
// value = new Date(value); | ||
// } | ||
else continue; | ||
// @ts-expect-error It's sure to be a string on the way in, but not so much on the way out | ||
object[key] = value; | ||
} | ||
return object as Record<string, string | number | null>; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
full_name,a,e,G,i,om,w,q,ad,per_y,data_arc,condition_code,n_obs_used,H,diameter,extent,albedo,rot_per,GM,BV,UB,IR,spec_B,spec_T,neo,pha,moid | ||
1 Ceres,2.769165155,0.076009029,0.12,10.59406704,80.30553157,73.59769412,2.5586836,2.979646709,4.608201802,8822,0,1002,3.34,939.4,964.4 x 964.2 x 891.8,0.09,9.07417,62.6284,0.713,0.426,,C,G,N,N,1.59478 | ||
2 Pallas,2.772465922,0.230336821,0.11,34.83623442,173.0800627,310.0488574,2.133864935,3.411066909,4.616443528,72318,0,8490,4.13,545,582x556x500,0.101,7.8132,14.3,0.635,0.284,,B,B,N,N,1.23324 | ||
3 Juno,2.669149517,0.25694232,0.32,12.98891913,169.8527598,248.1386262,1.983332047,3.354966987,4.360813923,72684,0,7104,5.33,246.596,,0.214,7.21,,0.824,0.433,,Sk,S,N,N,1.03454 | ||
4 Vesta,2.361417896,0.08872146,0.32,7.141770812,103.8108044,150.7285413,2.151909454,2.570926338,3.628837138,24288,0,9325,3.2,525.4,572.6 x 557.2 x 446.4,0.4228,5.34212766,17.8,0.782,0.492,,V,V,N,N,1.13948 | ||
5 Astraea,2.574248919,0.191094519,,5.366987944,141.5766042,358.6876078,2.08232406,3.066173778,4.130322954,63431,0,2861,6.85,106.699,,0.274,16.806,,0.826,0.411,,S,S,N,N,1.09589 | ||
6 Hebe,2.42515999,0.203007109,0.24,14.7379011,138.6402028,239.8074902,1.93283527,2.917484709,3.776754838,62329,0,6034,5.71,185.18,,0.2679,7.2745,,0.822,0.399,,S,S,N,N,0.973965 | ||
7 Iris,2.385333814,0.231205792,,5.523651387,259.5632307,145.2651058,1.833830821,2.936836807,3.684104574,62452,0,5206,5.51,199.83,,0.2766,7.139,,0.855,0.484,,S,S,N,N,0.8461 | ||
8 Flora,2.201764189,0.156499251,0.28,5.88695456,110.8893299,285.2874622,1.857189743,2.546338635,3.267114898,62655,0,2744,6.49,147.491,,0.226,12.865,,0.885,0.489,,,S,N,N,0.874176 | ||
9 Metis,2.385636536,0.123114272,0.17,5.576815511,68.9085767,6.417369231,2.09193063,2.679342441,3.684805919,61821,0,2649,6.28,190,,0.118,5.079,,0.858,0.496,,,S,N,N,1.10691 | ||
10 Hygiea,3.141539179,0.112460658,,3.831560034,283.2021669,312.3152062,2.788239617,3.494838741,5.56829099,62175,0,3409,5.43,407.12,,0.0717,27.63,7,0.696,0.351,,C,C,N,N,1.77839 | ||
11 Parthenope,2.453109376,0.100472272,,4.629885838,125.5465868,195.5503938,2.206639904,2.699578848,3.842231878,61699,0,5475,6.55,142.887,,0.191,13.7204,,0.837,0.417,,Sk,S,N,N,1.19322 | ||
12 Victoria,2.334315086,0.220171581,0.22,8.373074237,235.4101683,69.64182011,1.820365243,2.84826493,3.566542615,61581,0,3051,7.24,115.087,,0.163,8.6599,,0.874,0.515,,L,S,N,N,0.824953 | ||
13 Egeria,2.575981091,0.085121415,,16.53612295,43.22191706,80.54483096,2.356709935,2.795252248,4.1344925,61531,0,2359,6.74,222.792,,0.07,7.045,,0.745,0.452,,Ch,G,N,N,1.43633 | ||
14 Irene,2.585567305,0.16658231,,9.121643599,86.12266133,97.85899133,2.15485753,3.016277081,4.157593008,61450,0,2688,6.3,152,,0.159,15.028,,0.833,0.388,,S,S,N,N,1.17966 | ||
15 Eunomia,2.644100304,0.186084362,0.23,11.75242982,292.9343387,98.49868084,2.152074586,3.136126023,4.299570669,61247,0,2501,5.28,231.689,,0.248,6.083,,0.839,0.451,,S,S,N,N,1.19485 | ||
16 Psyche,2.92381368,0.133568415,0.2,3.096005209,150.0456664,228.8230714,2.533284522,3.314342839,4.999571031,12856,0,2364,5.9,226 ,279 x 232 x 189,0.1203,4.196,1.53,0.729,0.299,,X,M,N,N,1.5358 | ||
17 Thetis,2.470354085,0.133031598,,5.591204766,125.5529437,136.2082517,2.141718935,2.798989236,3.882817805,61117,0,3650,7.76,84.899,,0.193,12.27048,,0.829,0.438,,Sl,S,N,N,1.12981 | ||
18 Melpomene,2.29665351,0.217674362,0.25,10.1287313,150.3838618,227.9508469,1.796730922,2.796576098,3.480578404,60906,0,5082,6.51,139.594,,0.181,11.57,,0.854,0.425,,S,S,N,N,0.813258 | ||
19 Fortuna,2.442710697,0.158046851,0.1,1.573782207,211.1440435,182.0650176,2.056647963,2.828773431,3.817827076,60970,0,3316,7.13,200,,0.037,7.4432,,0.719,0.324,,Ch,G,N,N,1.06213 | ||
20 Massalia,2.409781792,0.142066705,0.25,0.708751203,206.1089109,256.7731964,2.067432032,2.752131551,3.740888639,59461,0,2481,6.5,135.68,,0.241,8.098,,0.854,0.463,,S,S,N,N,1.08461 | ||
21 Lutetia,2.434958098,0.163356103,0.11,3.064054861,80.86581221,249.9169213,2.037192832,2.832723363,3.799666154,55915,0,4752,7.35,95.76,,0.2212,8.1655,,0.686,0.189,,Xk,M,N,N,1.02791 | ||
22 Kalliope,2.914849074,0.097676889,0.21,13.71531205,66.05276971,356.0820817,2.630135684,3.199562465,4.976595142,60393,0,2923,6.45,167.536,,0.166,4.1483,0.491,0.715,0.234,,X,M,N,N,1.64321 | ||
23 Thalia,2.625683428,0.234983917,,10.11425811,66.84681403,60.63681441,2.008690051,3.242676805,4.254727466,59343,0,2213,6.95,107.53,,0.2536,12.312,,0.859,0.442,,S,S,N,N,1.04633 | ||
24 Themis,3.136171315,0.124742775,0.19,0.751587935,35.92589903,106.957169,2.744956601,3.527386028,5.554025502,60645,0,3662,7.08,198,,0.067,8.374,,0.684,0.336,,B,C,N,N,1.75813 | ||
25 Phocaea,2.400160565,0.254613898,,21.60484031,214.1306087,90.26321459,1.789046327,3.011274804,3.718507367,58150,0,3132,7.83,61.054,,0.35,9.9341,,0.932,0.513,,S,S,N,N,0.923495 | ||
26 Proserpina,2.654330893,0.090144874,,3.563412996,45.77798089,193.4497991,2.415056568,2.893605218,4.324548727,57907,0,2259,7.4,94.8,,0.1966,13.11,,0.891,0.525,,S,S,N,N,1.40287 | ||
27 Euterpe,2.346664413,0.173225845,,1.583713718,94.78795695,356.4498658,1.940161487,2.753167339,3.594882363,60345,0,2669,7,96,,0.215,10.4082,,0.878,0.502,,S,S,N,N,0.956809 | ||
28 Bellona,2.77576593,0.151862613,,9.429548932,144.2961697,344.1009091,2.354230863,3.197300998,4.624688265,59495,0,2738,7.09,120.9,,0.1763,15.706,,0.845,0.469,,S,S,N,N,1.37086 | ||
29 Amphitrite,2.554113563,0.072695536,0.2,6.082523344,356.3417672,63.36324048,2.368440909,2.739786216,4.081957767,59923,0,2318,5.85,189.559,,0.216,5.3921,,0.838,0.449,,S,S,N,N,1.3871 | ||
30 Urania,2.365572137,0.127581,,2.095743731,307.4686116,87.42620759,2.063770078,2.667374196,3.638417204,58087,0,3289,7.57,92.787,,0.192,13.686,,0.873,0.459,,Sl,S,N,N,1.0709 | ||
31 Euphrosyne,3.155402311,0.220857011,,26.30334546,31.11858135,61.47037758,2.458509589,3.852295033,5.605189646,58007,0,2377,6.74,267.08,,0.053,5.53,,0.687,0.317,,Cb,C,N,N,1.56992 | ||
32 Pomona,2.588507764,0.080901389,,5.523952791,220.4415627,339.5357809,2.379093891,2.797921637,4.164687411,56838,0,2133,7.56,80.76,,0.2564,9.448,,0.857,0.429,,S,S,N,N,1.37704 | ||
33 Polyhymnia,2.87345553,0.331346295,0.33,1.852843675,8.466365462,338.3718433,1.921346687,3.825564373,4.870964033,59639,0,2832,8.55,52.929,,0.24,18.608,,0.848,0.438,,Sq,S,N,N,0.913808 | ||
34 Circe,2.68738341,0.105948047,,5.496017155,184.3613092,330.6761387,2.402660386,2.972106434,4.405575516,56728,0,3115,8.51,132.992,,0.052,12.15,,0.707,0.357,,Ch,C,N,N,1.41435 | ||
35 Leukothea,2.994071835,0.225588162,,7.932713103,353.7389409,213.3967316,2.318644673,3.669498998,5.180856057,58229,0,3165,8.5,103.055,,0.066,31.9,,0.703,0.335,,C,C,N,N,1.31998 | ||
36 Atalante,2.74682401,0.304906016,,18.36926143,358.2100148,47.79654908,1.909300844,3.584347177,4.552547174,56069,0,1720,8.46,115.204,,0.036,9.93,,0.713,0.363,,,C,N,N,0.959267 |