From a4e0fe8974f1ecf39b04683636ff297fd6341338 Mon Sep 17 00:00:00 2001 From: manushak Date: Fri, 6 Dec 2024 10:24:27 +0400 Subject: [PATCH 01/12] feat(builtins): add csv-import plugin --- src/if-run/builtins/csv-import/index.ts | 186 ++++++++++++++++++++++++ src/if-run/builtins/index.ts | 1 + 2 files changed, 187 insertions(+) create mode 100644 src/if-run/builtins/csv-import/index.ts diff --git a/src/if-run/builtins/csv-import/index.ts b/src/if-run/builtins/csv-import/index.ts new file mode 100644 index 000000000..f459e4ad7 --- /dev/null +++ b/src/if-run/builtins/csv-import/index.ts @@ -0,0 +1,186 @@ +/* eslint-disable eqeqeq */ +import {readFile} from 'fs/promises'; +import axios from 'axios'; +import {z} from 'zod'; +import {parse} from 'csv-parse/sync'; + +import {ConfigParams, PluginParams} from '@grnsft/if-core/types'; +import {PluginFactory} from '@grnsft/if-core/interfaces'; +import {ERRORS, validate} from '@grnsft/if-core/utils'; + +import {STRINGS} from '../../config'; + +const { + FILE_FETCH_FAILED, + FILE_READ_FAILED, + MISSING_CSV_COLUMN, + MISSING_CONFIG, +} = STRINGS; + +const { + FetchingFileError, + ReadFileError, + MissingCSVColumnError, + ConfigError, + CSVParseError, +} = ERRORS; + +export const CSVImport = PluginFactory({ + configValidation: (config: ConfigParams) => { + if (!config || !Object.keys(config)?.length) { + throw new ConfigError(MISSING_CONFIG); + } + + const configSchema = z.object({ + filepath: z.string(), + output: z + .string() + .or(z.array(z.string())) + .or(z.array(z.array(z.string()))), + }); + + return validate>(configSchema, config); + }, + implementation: async (inputs: PluginParams[], config: ConfigParams) => { + /** + * 1. Tries to retrieve given file (with url or local path). + * 2. Parses given CSV. + * 3. Filters requested information from CSV. + */ + const {filepath, output} = config; + const file = await retrieveFile(filepath); + const parsedCSV = parseCSVFile(file); + + const result = parsedCSV?.map((input: PluginParams) => + filterOutput(input, output) + ); + + return [...inputs, ...result]; + }, +}); + +/** + * Checks if given string is URL. + */ +const isURL = (filepath: string) => { + try { + new URL(filepath); + return true; + } catch (error) { + return false; + } +}; + +/** + * Checks if given `filepath` is url, then tries to fetch it. + * Otherwise tries to read file. + */ +const retrieveFile = async (filepath: string) => { + if (isURL(filepath)) { + const {data} = await axios.get(filepath).catch(error => { + throw new FetchingFileError( + FILE_FETCH_FAILED(filepath, error.response.message) + ); + }); + + return data; + } + + return readFile(filepath).catch(error => { + throw new ReadFileError(FILE_READ_FAILED(filepath, error)); + }); +}; + +/** + * Checks if value is invalid: `undefined`, `null` or an empty string, then sets `nan` instead. + */ +const setNanValue = (value: any) => + value == null || value === '' ? 'nan' : value; + +/** + * Converts empty values to `nan`. + */ +const nanifyEmptyValues = (object: any) => { + if (typeof object === 'object') { + const keys = Object.keys(object); + + keys.forEach(key => { + const value = object[key]; + object[key] = setNanValue(value); + }); + + return object; + } + + return setNanValue(object); +}; + +/** + * If `field` is missing from `object`, then reject with error. + * Otherwise nanify empty values and return data. + */ +const fieldAccessor = (field: string, object: any) => { + if (!(`${field}` in object)) { + throw new MissingCSVColumnError(MISSING_CSV_COLUMN(field)); + } + + return nanifyEmptyValues(object[field]); +}; + +/** + * 1. If output is anything, then removes query data from csv record to escape duplicates. + * 2. Otherwise checks if it's a miltidimensional array, then grabs multiple fields (). + * 3. If not, then returns single field. + * 4. In case if it's string, then + */ +const filterOutput = ( + dataFromCSV: any, + output: string | string[] | string[][] +) => { + if (output === '*') { + return nanifyEmptyValues(dataFromCSV); + } + + if (Array.isArray(output)) { + /** Check if it's a multidimensional array. */ + if (Array.isArray(output[0])) { + const result: any = {}; + + output.forEach(outputField => { + /** Check if there is no renaming request, then export as is */ + const outputTitle = outputField[1] || outputField[0]; + result[outputTitle] = fieldAccessor(outputField[0], dataFromCSV); + }); + + return result; + } + + const outputTitle = output[1] || output[0]; + + return { + [outputTitle as string]: fieldAccessor(output[0], dataFromCSV), + }; + } + + return { + [output]: fieldAccessor(output, dataFromCSV), + }; +}; + +/** + * Parses CSV file. + */ +const parseCSVFile = (file: string | Buffer) => { + try { + const parsedCSV: any[] = parse(file, { + columns: true, + skip_empty_lines: true, + cast: true, + }); + + return parsedCSV; + } catch (error: any) { + console.error(error); + throw new CSVParseError(error); + } +}; diff --git a/src/if-run/builtins/index.ts b/src/if-run/builtins/index.ts index 774533baf..8037aaf90 100644 --- a/src/if-run/builtins/index.ts +++ b/src/if-run/builtins/index.ts @@ -9,6 +9,7 @@ export {SciEmbodied} from './sci-embodied'; export {Sci} from './sci'; export {Exponent} from './exponent'; export {CSVLookup} from './csv-lookup'; +export {CSVImport} from './csv-import'; export {Shell} from './shell'; export {Regex} from './regex'; export {Copy} from './copy-param'; From 88d616cfe5c9de01f140849bbfb3c1f6de6557c7 Mon Sep 17 00:00:00 2001 From: manushak Date: Fri, 6 Dec 2024 10:25:30 +0400 Subject: [PATCH 02/12] test(builtins): add unit test for csv-import plugin --- .../if-run/builtins/csv-import.test.ts | 386 ++++++++++++++++++ 1 file changed, 386 insertions(+) create mode 100644 src/__tests__/if-run/builtins/csv-import.test.ts diff --git a/src/__tests__/if-run/builtins/csv-import.test.ts b/src/__tests__/if-run/builtins/csv-import.test.ts new file mode 100644 index 000000000..5f708045d --- /dev/null +++ b/src/__tests__/if-run/builtins/csv-import.test.ts @@ -0,0 +1,386 @@ +jest.mock('fs/promises', () => require('../../../__mocks__/fs')); + +import axios from 'axios'; +import AxiosMockAdapter from 'axios-mock-adapter'; +import {ERRORS} from '@grnsft/if-core/utils'; + +import {CSVImport} from '../../../if-run/builtins'; + +import {STRINGS} from '../../../if-run/config'; + +const { + ConfigError, + ReadFileError, + FetchingFileError, + MissingCSVColumnError, + CSVParseError, +} = ERRORS; +const {MISSING_CONFIG, MISSING_CSV_COLUMN} = STRINGS; + +describe('builtins/CSVImport: ', () => { + const mock = new AxiosMockAdapter(axios); + + describe('CSVImport: ', () => { + const parametersMetadata = { + inputs: {}, + outputs: {}, + }; + afterEach(() => { + mock.reset(); + }); + + describe('init: ', () => { + it('successfully initalized.', () => { + const config = { + filepath: '', + output: ['cpu-tdp', 'tdp'], + }; + + const csvImport = CSVImport(config, parametersMetadata, {}); + + expect(csvImport).toHaveProperty('metadata'); + expect(csvImport).toHaveProperty('execute'); + }); + }); + + describe('execute(): ', () => { + it('successfully applies CSVImport `url` strategy to given input.', async () => { + const config = { + filepath: + 'https://raw.githubusercontent.com/Green-Software-Foundation/if-data/main/cloud-metdata-aws-instances.csv', + output: [['cpu-cores-utilized'], ['cpu-tdp']], + }; + + const csvImport = CSVImport(config, parametersMetadata, {}); + const responseData = `cpu-cores-available,cpu-cores-utilized,cpu-manufacturer,cpu-model-name,cpu-tdp,gpu-count,gpu-model-name,Hardware Information on AWS Documentation & Comments,instance-class,instance-storage,memory-available,platform-memory,release-date,storage-drives +16,8,AWS,AWS Graviton,150.00,N/A,N/A,AWS Graviton (ARM),a1.2xlarge,EBS-Only,16,32,November 2018,0 +16,16,AWS,AWS Graviton,150.00,N/A,N/A,AWS Graviton (ARM),a1.4xlarge,EBS-Only,32,32,November 2018,0`; + mock.onGet(config.filepath).reply(200, responseData); + + const result = await csvImport.execute([]); + const expectedResult = [ + { + 'cpu-cores-utilized': 8, + 'cpu-tdp': 150, + }, + { + 'cpu-cores-utilized': 16, + 'cpu-tdp': 150, + }, + ]; + + expect.assertions(1); + + expect(result).toStrictEqual(expectedResult); + }); + + it('successfully applies CSVImport `local file` strategy to given input.', async () => { + expect.assertions(1); + const config = { + filepath: './file.csv', + output: [ + ['cpu-cores-available'], + ['cpu-cores-utilized'], + ['cpu-manufacturer'], + ['cpu-tdp', 'tdp'], + ], + }; + + const csvImport = CSVImport(config, parametersMetadata, {}); + const result = await csvImport.execute([]); + const expectedResult = [ + { + 'cpu-cores-available': 16, + 'cpu-cores-utilized': 8, + 'cpu-manufacturer': 'AWS', + tdp: 150, + }, + { + 'cpu-cores-available': 16, + 'cpu-cores-utilized': 16, + 'cpu-manufacturer': 'AWS', + tdp: 150, + }, + ]; + + expect(result).toStrictEqual(expectedResult); + }); + + it('successfully executes when the file is empty.', async () => { + const config = { + filepath: + 'https://raw.githubusercontent.com/Green-Software-Foundation/if-data/main/cloud-metdata-aws-instances.csv', + output: [['cpu-cores-utilized'], ['cpu-tdp']], + }; + + const csvImport = CSVImport(config, parametersMetadata, {}); + const responseData = ''; + mock.onGet(config.filepath).reply(200, responseData); + + const result = await csvImport.execute([]); + + expect.assertions(1); + + expect(result).toStrictEqual([]); + }); + + it('successfully executes when `mapping` has valid data.', async () => { + expect.assertions(1); + const config = { + filepath: './file.csv', + output: [ + ['cpu-cores-utilized', 'cpu/utilized'], + ['cpu-tdp', 'tdp'], + ], + }; + const parameterMetadata = {inputs: {}, outputs: {}}; + const mapping = { + 'cpu/utilized': 'cpu/util', + }; + const csvImport = CSVImport(config, parameterMetadata, mapping); + + const result = await csvImport.execute([]); + const expectedResult = [ + { + 'cpu/util': 8, + tdp: 150, + }, + { + 'cpu/util': 16, + tdp: 150, + }, + ]; + + expect(result).toStrictEqual(expectedResult); + }); + + it('rejects with file not found error.', async () => { + const config = { + filepath: './file-fail.csv', + output: ['cpu-tdp', 'tdp'], + }; + + const csvImport = CSVImport(config, parametersMetadata, {}); + + try { + await csvImport.execute([]); + } catch (error) { + if (error instanceof Error) { + expect(error).toBeInstanceOf(ReadFileError); + } + } + }); + + it('rejects with axios error.', async () => { + const config = { + filepath: + 'https://raw.githubusercontent.com/Green-Software-Foundation/if-data/main/cloud-metdata-aws-instances.csv', + output: ['cpu-tdp', 'tdp'], + }; + mock.onGet(config.filepath).reply(404); + + const csvImport = CSVImport(config, parametersMetadata, {}); + + try { + await csvImport.execute([]); + } catch (error) { + if (error instanceof Error) { + expect(error).toBeInstanceOf(FetchingFileError); + } + } + }); + + it('successfully applies CSVImport if output is `*`.', async () => { + expect.assertions(1); + const config = { + filepath: './file.csv', + output: '*', + }; + const csvImport = CSVImport(config, parametersMetadata, {}); + const result = await csvImport.execute([ + { + timestamp: '2024-03-01', + 'cpu/available': 16, + 'cpu/utilized': 16, + 'cpu/manufacturer': 'AWS', + }, + ]); + const expectedResult = [ + { + 'cpu/available': 16, + 'cpu/manufacturer': 'AWS', + 'cpu/utilized': 16, + timestamp: '2024-03-01', + }, + { + 'Hardware Information on AWS Documentation & Comments': + 'AWS Graviton (ARM)', + 'cpu-cores-available': 16, + 'cpu-cores-utilized': 8, + 'cpu-manufacturer': 'AWS', + 'cpu-model-name': 'AWS Graviton', + 'cpu-tdp': 150, + + 'gpu-count': 'N/A', + 'gpu-model-name': 'N/A', + 'instance-class': 'a1.2xlarge', + 'instance-storage': 'EBS-Only', + 'memory-available': 16, + 'platform-memory': 32, + 'release-date': 'November 2018', + 'storage-drives': 'nan', + }, + { + 'Hardware Information on AWS Documentation & Comments': + 'AWS Graviton (ARM)', + 'cpu-cores-available': 16, + 'cpu-cores-utilized': 16, + 'cpu-manufacturer': 'AWS', + 'cpu-model-name': 'AWS Graviton', + 'cpu-tdp': 150, + 'gpu-count': 'N/A', + 'gpu-model-name': 'N/A', + 'instance-class': 'a1.4xlarge', + 'instance-storage': 'EBS-Only', + 'memory-available': 32, + 'platform-memory': 32, + 'release-date': 'November 2018', + 'storage-drives': 'nan', + }, + ]; + + expect(result).toStrictEqual(expectedResult); + }); + + it('successfully applies CSVImport if output is exact string.', async () => { + expect.assertions(1); + const config = { + filepath: './file.csv', + output: 'cpu-tdp', + }; + + const csvImport = CSVImport(config, parametersMetadata, {}); + const result = await csvImport.execute([]); + const expectedResult = [ + { + 'cpu-tdp': 150, + }, + { + 'cpu-tdp': 150, + }, + ]; + + expect(result).toStrictEqual(expectedResult); + }); + + it('rejects with config not found error.', async () => { + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + const csvImport = CSVImport(); + expect.assertions(2); + + try { + await csvImport.execute([]); + } catch (error) { + if (error instanceof Error) { + expect(error).toBeInstanceOf(ConfigError); + expect(error.message).toEqual(MISSING_CONFIG); + } + } + }); + + it('rejects with no such column in csv error.', async () => { + expect.assertions(2); + + const config = { + filepath: './file.csv', + output: 'mock', + }; + const csvImport = CSVImport(config, parametersMetadata, {}); + const input = [ + { + timestamp: '2024-03-01', + 'cpu/available': 16, + 'cpu/utilized': 16, + 'cpu/manufacturer': 'AWS', + }, + ]; + + try { + await csvImport.execute(input); + } catch (error) { + if (error instanceof Error) { + expect(error).toBeInstanceOf(MissingCSVColumnError); + expect(error.message).toEqual(MISSING_CSV_COLUMN(config.output)); + } + } + }); + + it('successfully applies CSVImport if output is array with string.', async () => { + expect.assertions(1); + const config = { + filepath: './file.csv', + output: ['cpu-cores-utilized'], + }; + + const csvImport = CSVImport(config, parametersMetadata, {}); + const result = await csvImport.execute([]); + const expectedResult = [ + { + 'cpu-cores-utilized': 8, + }, + { + 'cpu-cores-utilized': 16, + }, + ]; + + expect(result).toStrictEqual(expectedResult); + }); + + it('successfully applies CSVImport if output is matrix with strings.', async () => { + expect.assertions(1); + const config = { + filepath: './file.csv', + output: [['cpu-cores-utilized']], + }; + + const csvImport = CSVImport(config, parametersMetadata, {}); + const result = await csvImport.execute([]); + const expectedResult = [ + { + 'cpu-cores-utilized': 8, + }, + { + 'cpu-cores-utilized': 16, + }, + ]; + + expect(result).toStrictEqual(expectedResult); + }); + + it('rejects with CSV parse error.', async () => { + process.env.csv = 'fail'; + expect.assertions(1); + const config = { + filepath: './fail-csv-reader.csv', + output: [['gpu-count']], + }; + + const csvImport = CSVImport(config, parametersMetadata, {}); + + try { + await csvImport.execute([ + { + timestamp: '2024-03-01', + 'cpu/available': 16, + 'cpu/utilized': 16, + 'cpu/manufacturer': 'AWS', + }, + ]); + } catch (error) { + expect(error).toBeInstanceOf(CSVParseError); + } + }); + }); + }); +}); From 0ce93d090bd625b30aef00e1f07b04b008793005 Mon Sep 17 00:00:00 2001 From: manushak Date: Fri, 6 Dec 2024 10:26:43 +0400 Subject: [PATCH 03/12] docs(builtins): add README file --- src/if-run/builtins/csv-import/README.md | 190 +++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 src/if-run/builtins/csv-import/README.md diff --git a/src/if-run/builtins/csv-import/README.md b/src/if-run/builtins/csv-import/README.md new file mode 100644 index 000000000..06e304282 --- /dev/null +++ b/src/if-run/builtins/csv-import/README.md @@ -0,0 +1,190 @@ +# CSV Import Plugin + +`csv-import` is a versatile plugin that allows you to extract specific data from a CSV file and seamlessly integrate it into the `input` data of your manifest file. + +You provide path to the target csv file. The file path can reference either a local file on your system or a URL pointing to an online resource. + +For example, for the following CSV: + +| | | | | | | | | | | | | | | | | | +| ---- | -------------- | --------------- | ---------- | ---------- | ------------ | --------- | ---------------- | ---------- | ---------- | ---------------------- | ---------- | -------------------------- | --------------------------------- | ------------------------------ | -------------------------------- | --------------------- | +| year | cloud-provider | cloud-region | cfe-region | em-zone-id | wt-region-id | location | geolocation | cfe-hourly | cfe-annual | power-usage-efficiency | net-carbon | grid-carbon-intensity-24x7 | grid-carbon-intensity-consumption | grid-carbon-intensity-marginal | grid-carbon-intensity-production | grid-carbon-intensity | +| 2022 | Google Cloud | asia-east1 | Taiwan | TW | TW | Taiwan | 25.0375,121.5625 | 0.18 | | | 0 | 453 | | | | 453 | +| 2022 | Google Cloud | asia-east2 | Hong Kong | HK | HK | Hong Kong | 22.3,114.2 | 0.28 | | | 0 | 453 | | | | 360 | +| 2022 | Google Cloud | asia-northeast1 | Tokyo | JP-TK | JP-TK | Tokyo | 35.6897,139.692 | 0.28 | | | 0 | 463 | | | | 463 | + +You could select all the data for the cloud provider `Google Cloud` in the region `asia-east2` using the following configuration: + +```yaml +filepath: https://raw.githubusercontent.com/Green-Software-Foundation/if-data/main/region-metadata.csv +output: '*' +``` + +## Parameters + +### Plugin config + +- `filepath` - path to a csv file, either on the local filesystem or on the internet +- `output` - the columns to grab data from and add to output data - should support wildcard or multiple values. + +The plugin also supports data renaming. This means you can grab data from a named column but push it into your manifest file data under another name, for example, maybe we want to grab data from the `processor-name` column in the target csv and add it to the manifest file data as `processor-id` because this is the name expected by some other plugin in your piepline. You can do this by passing comma separated values in arrays. + +```yaml +output: ['processor-name': 'processor-id'] +``` + +You can nest arrays to do this renaming for multiple columns. + +```yaml +output: + [['processor-name', 'processor-model-id'], ['tdp', 'thermal-design-power']] +``` + +- `"*"` - indicating all columns should be selected +- `"tdp"` - indicating that only column `tdp` should be selected +- `["processor-name", "processor-model-id"]` - indicating that only column `processor-name` should be selected and output as `processor-model-id` +- `[["processor-name", "processor-model-id"],["tdp", "thermal-design-power"]]` - indicating that the `processor-name` and `tdp` columns should be selected with `processor-name` output as `processor-model-id` and `tdp` as `thermal-design-power` + +### Plugin parameter metadata + +The `parameter-metadata` section contains information about `description`, `unit` and `aggregation-method` of the parameters of the inputs and outputs + +- `inputs`: describe the parameters of the `inputs`. Each parameter has: + + - `description`: description of the parameter + - `unit`: unit of the parameter + - `aggregation-method`: aggregation method object of the parameter + - `time`: this value is used for `horizontal` aggregation. It can be of the following values: `sum`, `avg`, `copy`, or `none`. + - `component`: this value is used for `vertical` aggregation. It can be of the following values: `sum`, `avg`, `copy`, or `none`. + +- `outputs`: describe the parameters in the `output` of the config block. The parameter has the following attributes: + - `description`: description of the parameter + - `unit`: unit of the parameter + - `aggregation-method`: aggregation method object of the parameter + - `time`: this value is used for `horizontal` aggregation. It can be of the following values: `sum`, `avg`, `copy`, or `none`. + - `component`: this value is used for `vertical` aggregation. It can be of the following values: `sum`, `avg`, `copy`, or `none`. + +### Mapping + +The `mapping` block is an optional block. It is added in the plugin section and allows the plugin to receive a parameter from the input with a different name than the one the plugin uses for data manipulation. The parameter with the mapped name will not appear in the outputs. It also maps the output parameter of the plugin. The structure of the `mapping` block is: + +```yaml +cloud-metadata: + method: CSVImport + path: 'builtin' + mapping: + 'parameter-name-in-the-plugin': 'parameter-name-in-the-input' +``` + +### Inputs + +There are no strict requirements on input for this plugin because they depend upon the contents of the target CSV and your input data at the time the CSV import is invoked. Please make sure you are requesting data from columns that exist in the target csv file and that your query values are available in your `input` data. + +## Returns + +The input data with the requested csv content appended to it. + +## Plugin logic + +1. Validates config which contains `filepath` and `output`. +2. Tries to retrieve given file (with url or local path). +3. Parses given CSV. +4. Filters requested information from CSV. +5. Returns enriched input data + +## Implementation + +To run the plugin, you must first create an instance of `CSVImport`. Then, you can call `execute()`. + +```typescript +const config = { + filepath: 'https://raw.githubusercontent.com/Green-Software-Foundation/if-data/main/cloud-metdata-aws-instances.csv', + output: ['cpu-tdp', 'tdp'], +}; +const parametersMetadata = {inputs: {}, outputs: {}}; +const mapping = {}; +const csvImport = CSVImport(config, parametersMetadata, mapping); + +const result = await csvImport.execute([ + { + timestamp: '2023-08-06T00:00' + duration: 3600 + 'cpu/energy': 0.001 + 'cloud/provider': gcp + 'cloud/region': asia-east + }, +]); +``` + +## Example manifest + +IF users will typically call the plugin as part of a pipeline defined in a manifest file. In this case, instantiating the plugin is handled by `if` and does not have to be done explicitly by the user. The following is an example manifest that calls `csv-import`: + +```yaml +name: csv-import-demo +description: +tags: +initialize: + plugins: + cloud-metadata: + method: CSVImport + path: 'builtin' + config: + filepath: https://raw.githubusercontent.com/Green-Software-Foundation/if-data/main/region-metadata.csv + output: '*' + mapping: + cloud/region: cloud/area +tree: + children: + child: + pipeline: + compute: + - cloud-metadata + inputs: + - timestamp: 2023-08-06T00:00 + duration: 3600 + cloud/provider: Google Cloud + cloud/area: europe-north1 +``` + +You can run this example by saving it as `./examples/manifests/csv-import.yml` and executing the following command from the project root: + +```sh +npm i -g @grnsft/if +if-run --manifest manifests/plugins/csv-import.yml --output manifests/outputs/csv-import +``` + +The results will be saved to a new `yaml` file in `manifests/outputs`. + +## Errors + +`CSVImport` exposes six of the IF error classes. + +### FetchingFileError + +This error is caused by problems finding the file at the path provided in the `filepath`. If the file is on your local filesystem, you can check that the file is definitely there. For a remote file, check your internet connection. You can check your connection to the server using a tool such as `ping` or `curl`. if you still experience problems, you could retrieve the remote file onto your local filesystem using a tool such as `wget`. + +### ReadFileError, + +This error is caused by problems reading the CSV file provided in the `filepath`. To fix it, check that the file contains valid CSV data. The file should have a `.csv` file extension and the data inside should be formatted correctly. + +### MissingCSVColumnError, + +This error is caused by `CsvImport` failing to find a column in the CSV file whose name matches what was provided in `query`. To debug, check that you do not have any typos in your `query` and confirm that the requested column name definitely exists in the target file. + +### CSVParseError, + +This error arises due to problems parsing CSV data into IF. This can occur when the CSV data is incorrectly formatted or contains unexpected characters that IF does not recognize. These errors are expected to be unusual edge cases as incorrectly formatted data will usually be identified during file loading and cause a `ReadFileError`. To debug, check your CSV file for any unexpected formatting or unusual characters. + +### ConfigError + +You will receive an error starting `ConfigError: ` if you have not provided the expected configuration data in the plugin's `initialize` block. + +The required parameters are: + +- `filepath`: This must be a path to a csv file +- `output`: this must be a string containing a name or a wildcard character (`"*"`) + +You can fix this error by checking you are providing valid values for each parameter in the config. + +For more information on our error classes, please visit [our docs](https://if.greensoftware.foundation/reference/errors) From 32b78abd7af11b72c3e50d3d81226d7a0f80760a Mon Sep 17 00:00:00 2001 From: manushak Date: Fri, 6 Dec 2024 10:29:29 +0400 Subject: [PATCH 04/12] docs(builtins): update csv-lookup and csv-import Readme files --- src/if-run/builtins/csv-import/README.md | 8 ++++---- src/if-run/builtins/csv-lookup/README.md | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/if-run/builtins/csv-import/README.md b/src/if-run/builtins/csv-import/README.md index 06e304282..ec5a83bde 100644 --- a/src/if-run/builtins/csv-import/README.md +++ b/src/if-run/builtins/csv-import/README.md @@ -158,21 +158,21 @@ The results will be saved to a new `yaml` file in `manifests/outputs`. ## Errors -`CSVImport` exposes six of the IF error classes. +`CSVImport` exposes five of the IF error classes. ### FetchingFileError This error is caused by problems finding the file at the path provided in the `filepath`. If the file is on your local filesystem, you can check that the file is definitely there. For a remote file, check your internet connection. You can check your connection to the server using a tool such as `ping` or `curl`. if you still experience problems, you could retrieve the remote file onto your local filesystem using a tool such as `wget`. -### ReadFileError, +### ReadFileError This error is caused by problems reading the CSV file provided in the `filepath`. To fix it, check that the file contains valid CSV data. The file should have a `.csv` file extension and the data inside should be formatted correctly. -### MissingCSVColumnError, +### MissingCSVColumnError This error is caused by `CsvImport` failing to find a column in the CSV file whose name matches what was provided in `query`. To debug, check that you do not have any typos in your `query` and confirm that the requested column name definitely exists in the target file. -### CSVParseError, +### CSVParseError This error arises due to problems parsing CSV data into IF. This can occur when the CSV data is incorrectly formatted or contains unexpected characters that IF does not recognize. These errors are expected to be unusual edge cases as incorrectly formatted data will usually be identified during file loading and cause a `ReadFileError`. To debug, check your CSV file for any unexpected formatting or unusual characters. diff --git a/src/if-run/builtins/csv-lookup/README.md b/src/if-run/builtins/csv-lookup/README.md index d9d8c3903..52004314d 100644 --- a/src/if-run/builtins/csv-lookup/README.md +++ b/src/if-run/builtins/csv-lookup/README.md @@ -2,7 +2,7 @@ `csv-lookup` is a generic plugin that enables you to select arbitrary data from a given csv file and add it to your manifest file's `input` data. -You provide path to the target csv file pus some query parameters. The filepath can point to a location on the local filesystem or it can be a URL for an online resource. The query parameters include the column names for the target data you want to return (can be one column name, multiple column names or all column names, indicated using `"*"`), plus the column names and values you want to use as selectors. +You provide path to the target csv file plus some query parameters. The filepath can point to a location on the local filesystem or it can be a URL for an online resource. The query parameters include the column names for the target data you want to return (can be one column name, multiple column names or all column names, indicated using `"*"`), plus the column names and values you want to use as selectors. For example, for the following CSV: @@ -33,7 +33,7 @@ Notice that the query parameters are key/value pairs where the key is the column - `query` - an array of key/value pairs where the key is a column name in the target csv and the value is a parameter from inputs - `output` - the columns to grab data from and add to output data - should support wildcard or multiple values. -The plugin also supports data renaming. This means you can grab data from a named column but push it into your manifest file data under another name, for example, maybe we want to grab data from the `processor-name` column int he target csv and add it to the manifest file data as `processor-id` because this is the name expected by some other plugin in your piepline. You can do this by passing comma separated values in arrays. +The plugin also supports data renaming. This means you can grab data from a named column but push it into your manifest file data under another name, for example, maybe we want to grab data from the `processor-name` column in the target csv and add it to the manifest file data as `processor-id` because this is the name expected by some other plugin in your piepline. You can do this by passing comma separated values in arrays. ```yaml output: ['processor-name': 'processor-id'] @@ -175,25 +175,25 @@ The results will be saved to a new `yaml` file in `manifests/outputs`. ## Errors -Coefficient exposes six of the IF error classes. +`CSVLookup` exposes six of the IF error classes. ### FetchingFileError This error is caused by problems finding the file at the path provided in the `filepath`. If the file is on your local filesystem, you can check that the file is definitely there. For a remote file, check your internet connection. You can check your connection to the server using a tool such as `ping` or `curl`. if you still experience problems, you could retrieve the remote file onto your local filesystem using a tool such as `wget`. -### ReadFileError, +### ReadFileError This error is caused by problems reading the CSV file provided in the `filepath`. To fix it, check that the file contains valid CSV data. The file should have a `.csv` file extension and the data inside should be formatted correctly. -### MissingCSVColumnError, +### MissingCSVColumnError This error is caused by `CsvLookup` failing to find a column in the CSV file whose name matches what was provided in `query`. To debug, check that you do not have any typos in your `query` and confirm that the requested column name definitely exists in the target file. -### QueryDataNotFoundError, +### QueryDataNotFoundError This error is caused by the `CsvLookup` plugin failing to find data that matches your query. Try revising your query parameters. -### CSVParseError, +### CSVParseError This error arises due to problems parsing CSV data into IF. This can occur when the CSV data is incorrectly formatted or contains unexpected characters that IF does not recognize. These errors are expected to be unusual edge cases as incorrectly formatted data will usually be identified during file loading and cause a `ReadFileError`. To debug, check your CSV file for any unexpected formatting or unusual characters. From 0a44a497ddc9ac4fa3bb819bdfb6dc8883ebd48f Mon Sep 17 00:00:00 2001 From: manushak Date: Fri, 6 Dec 2024 10:47:04 +0400 Subject: [PATCH 05/12] feat(manifests): add csv-import manifests --- .../examples/builtins/csv-import/success.yaml | 22 +++++ .../examples/builtins/csv-import/test.csv | 4 + .../outputs/builtins/csv-import/success.yaml | 84 +++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 manifests/examples/builtins/csv-import/success.yaml create mode 100644 manifests/examples/builtins/csv-import/test.csv create mode 100644 manifests/outputs/builtins/csv-import/success.yaml diff --git a/manifests/examples/builtins/csv-import/success.yaml b/manifests/examples/builtins/csv-import/success.yaml new file mode 100644 index 000000000..92168d8f4 --- /dev/null +++ b/manifests/examples/builtins/csv-import/success.yaml @@ -0,0 +1,22 @@ +name: csv-import +description: successful path +tags: +initialize: + plugins: + data-import: + method: CSVImport + path: 'builtin' + config: + filepath: manifests/examples/builtins/csv-import/test.csv + # filepath: https://raw.githubusercontent.com/Green-Software-Foundation/if-data/main/cloud-metdata-aws-instances.csv + output: '*' +tree: + children: + child: + pipeline: + compute: + - data-import + inputs: + - timestamp: 2023-07-06T00:00 + duration: 300 + physical-processor: AMD 3020e diff --git a/manifests/examples/builtins/csv-import/test.csv b/manifests/examples/builtins/csv-import/test.csv new file mode 100644 index 000000000..6a506edb6 --- /dev/null +++ b/manifests/examples/builtins/csv-import/test.csv @@ -0,0 +1,4 @@ +timestamp,duration,cpu-util,energy +2023-07-06T00:00,1,20,5 +2023-07-06T00:01,1,30,10 +2023-07-06T00:02,1,40,15 \ No newline at end of file diff --git a/manifests/outputs/builtins/csv-import/success.yaml b/manifests/outputs/builtins/csv-import/success.yaml new file mode 100644 index 000000000..1a0ac6fba --- /dev/null +++ b/manifests/outputs/builtins/csv-import/success.yaml @@ -0,0 +1,84 @@ +name: csv-import +description: successful path +tags: null +initialize: + plugins: + data-import: + path: builtin + method: CSVImport + config: + filepath: manifests/examples/builtins/csv-import/test.csv + output: '*' +execution: + command: >- + /Users/manushak/.npm/_npx/1bf7c3c15bf47d04/node_modules/.bin/ts-node + /Users/manushak/Documents/Projects/Green-Software/if/src/if-run/index.ts -m + manifests/examples/builtins/csv-import/success.yaml -o + manifests/outputs/builtins/csv-import/success + environment: + if-version: 0.7.2 + os: macOS + os-version: 14.6.1 + node-version: 18.20.4 + date-time: 2024-12-06T06:45:15.935Z (UTC) + dependencies: + - '@babel/core@7.22.10' + - '@babel/preset-typescript@7.23.3' + - '@commitlint/cli@18.6.0' + - '@commitlint/config-conventional@18.6.0' + - '@grnsft/if-core@0.0.28' + - >- + @grnsft/if-eco-ci-plugin@0.0.1 extraneous -> + file:../../../if-eco-ci-plugin + - '@jest/globals@29.7.0' + - '@types/jest@29.5.8' + - '@types/js-yaml@4.0.9' + - '@types/luxon@3.4.2' + - '@types/node@20.9.0' + - axios-mock-adapter@1.22.0 + - axios@1.7.7 + - cross-env@7.0.3 + - csv-parse@5.5.6 + - csv-stringify@6.4.6 + - fixpack@4.0.0 + - gts@5.2.0 + - husky@8.0.3 + - jest@29.7.0 + - js-yaml@4.1.0 + - lint-staged@15.2.10 + - luxon@3.4.4 + - release-it@16.3.0 + - rimraf@5.0.5 + - ts-command-line-args@2.5.1 + - ts-jest@29.1.1 + - typescript-cubic-spline@1.0.1 + - typescript@5.2.2 + - winston@3.11.0 + - zod@3.23.8 + status: success +tree: + children: + child: + pipeline: + compute: + - data-import + inputs: + - timestamp: 2023-07-06T00:00 + duration: 300 + physical-processor: AMD 3020e + outputs: + - timestamp: 2023-07-06T00:00 + duration: 300 + physical-processor: AMD 3020e + - timestamp: 2023-07-06T00:00 + duration: 1 + cpu-util: 20 + energy: 5 + - timestamp: 2023-07-06T00:01 + duration: 1 + cpu-util: 30 + energy: 10 + - timestamp: 2023-07-06T00:02 + duration: 1 + cpu-util: 40 + energy: 15 From 76d2db035fbd0800411ec4124a3c540d606b6547 Mon Sep 17 00:00:00 2001 From: Manushak Keramyan Date: Mon, 9 Dec 2024 15:29:59 +0400 Subject: [PATCH 06/12] fix(doc): update README.md Signed-off-by: Manushak Keramyan --- src/if-run/builtins/csv-import/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/if-run/builtins/csv-import/README.md b/src/if-run/builtins/csv-import/README.md index ec5a83bde..2612a8e51 100644 --- a/src/if-run/builtins/csv-import/README.md +++ b/src/if-run/builtins/csv-import/README.md @@ -13,7 +13,7 @@ For example, for the following CSV: | 2022 | Google Cloud | asia-east2 | Hong Kong | HK | HK | Hong Kong | 22.3,114.2 | 0.28 | | | 0 | 453 | | | | 360 | | 2022 | Google Cloud | asia-northeast1 | Tokyo | JP-TK | JP-TK | Tokyo | 35.6897,139.692 | 0.28 | | | 0 | 463 | | | | 463 | -You could select all the data for the cloud provider `Google Cloud` in the region `asia-east2` using the following configuration: +You could select all the data in the `csv` file using the following configuration: ```yaml filepath: https://raw.githubusercontent.com/Green-Software-Foundation/if-data/main/region-metadata.csv From 1c0bd2163f2393cbf48a8f732064e0209a788b0c Mon Sep 17 00:00:00 2001 From: Manushak Keramyan Date: Mon, 9 Dec 2024 19:06:37 +0400 Subject: [PATCH 07/12] Update manifests/examples/builtins/csv-import/test.csv Co-authored-by: Narek Hovhannisyan Signed-off-by: Manushak Keramyan --- manifests/examples/builtins/csv-import/test.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifests/examples/builtins/csv-import/test.csv b/manifests/examples/builtins/csv-import/test.csv index 6a506edb6..112f01d3e 100644 --- a/manifests/examples/builtins/csv-import/test.csv +++ b/manifests/examples/builtins/csv-import/test.csv @@ -1,4 +1,4 @@ timestamp,duration,cpu-util,energy 2023-07-06T00:00,1,20,5 2023-07-06T00:01,1,30,10 -2023-07-06T00:02,1,40,15 \ No newline at end of file +2023-07-06T00:02,1,40,15 From a993de0540be33d120fed3d643695b62b43c5eb2 Mon Sep 17 00:00:00 2001 From: manushak Date: Thu, 12 Dec 2024 12:11:23 +0400 Subject: [PATCH 08/12] feat(builtins): create utils for builtins --- src/if-run/builtins/util/helpers.ts | 97 +++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 src/if-run/builtins/util/helpers.ts diff --git a/src/if-run/builtins/util/helpers.ts b/src/if-run/builtins/util/helpers.ts new file mode 100644 index 000000000..a71fb8ed3 --- /dev/null +++ b/src/if-run/builtins/util/helpers.ts @@ -0,0 +1,97 @@ +import {readFile} from 'fs/promises'; +import axios from 'axios'; +import {parse} from 'csv-parse/sync'; +import {ERRORS} from '@grnsft/if-core/utils'; + +import {STRINGS} from '../../config'; + +const {FILE_FETCH_FAILED, FILE_READ_FAILED, MISSING_CSV_COLUMN} = STRINGS; + +const {FetchingFileError, ReadFileError, MissingCSVColumnError, CSVParseError} = + ERRORS; + +/** + * Checks if given string is URL. + */ +export const isURL = (filepath: string) => { + try { + new URL(filepath); + return true; + } catch (error) { + return false; + } +}; + +/** + * Checks if given `filepath` is url, then tries to fetch it. + * Otherwise tries to read file. + */ +export const retrieveFile = async (filepath: string) => { + if (isURL(filepath)) { + const {data} = await axios.get(filepath).catch(error => { + throw new FetchingFileError( + FILE_FETCH_FAILED(filepath, error.response.message) + ); + }); + + return data; + } + + return readFile(filepath).catch(error => { + throw new ReadFileError(FILE_READ_FAILED(filepath, error)); + }); +}; + +/** + * Checks if value is invalid: `undefined`, `null` or an empty string, then sets `nan` instead. + */ +export const setNanValue = (value: any) => + value === null || value === '' ? 'nan' : value; + +/** + * Converts empty values to `nan`. + */ +export const nanifyEmptyValues = (object: any) => { + if (typeof object === 'object') { + const keys = Object.keys(object); + + keys.forEach(key => { + const value = object[key]; + object[key] = setNanValue(value); + }); + + return object; + } + + return setNanValue(object); +}; + +/** + * If `field` is missing from `object`, then reject with error. + * Otherwise nanify empty values and return data. + */ +export const fieldAccessor = (field: string, object: any) => { + if (!(`${field}` in object)) { + throw new MissingCSVColumnError(MISSING_CSV_COLUMN(field)); + } + + return nanifyEmptyValues(object[field]); +}; + +/** + * Parses CSV file. + */ +export const parseCSVFile = (file: string | Buffer) => { + try { + const parsedCSV: any[] = parse(file, { + columns: true, + skip_empty_lines: true, + cast: true, + }); + + return parsedCSV; + } catch (error: any) { + console.error(error); + throw new CSVParseError(error); + } +}; From 10212e6244a0c2bb88f0287733c605a87f89a3b4 Mon Sep 17 00:00:00 2001 From: manushak Date: Thu, 12 Dec 2024 13:18:47 +0400 Subject: [PATCH 09/12] feat(builtins): move common functions into helpers --- src/if-run/builtins/csv-import/index.ts | 113 +++-------------------- src/if-run/builtins/csv-lookup/index.ts | 114 ++---------------------- 2 files changed, 19 insertions(+), 208 deletions(-) diff --git a/src/if-run/builtins/csv-import/index.ts b/src/if-run/builtins/csv-import/index.ts index f459e4ad7..7b2aa5e0d 100644 --- a/src/if-run/builtins/csv-import/index.ts +++ b/src/if-run/builtins/csv-import/index.ts @@ -1,8 +1,5 @@ /* eslint-disable eqeqeq */ -import {readFile} from 'fs/promises'; -import axios from 'axios'; import {z} from 'zod'; -import {parse} from 'csv-parse/sync'; import {ConfigParams, PluginParams} from '@grnsft/if-core/types'; import {PluginFactory} from '@grnsft/if-core/interfaces'; @@ -10,20 +7,16 @@ import {ERRORS, validate} from '@grnsft/if-core/utils'; import {STRINGS} from '../../config'; -const { - FILE_FETCH_FAILED, - FILE_READ_FAILED, - MISSING_CSV_COLUMN, - MISSING_CONFIG, -} = STRINGS; - -const { - FetchingFileError, - ReadFileError, - MissingCSVColumnError, - ConfigError, - CSVParseError, -} = ERRORS; +import { + fieldAccessor, + nanifyEmptyValues, + parseCSVFile, + retrieveFile, +} from '../util/helpers'; + +const {MISSING_CONFIG} = STRINGS; + +const {ConfigError} = ERRORS; export const CSVImport = PluginFactory({ configValidation: (config: ConfigParams) => { @@ -59,74 +52,6 @@ export const CSVImport = PluginFactory({ }, }); -/** - * Checks if given string is URL. - */ -const isURL = (filepath: string) => { - try { - new URL(filepath); - return true; - } catch (error) { - return false; - } -}; - -/** - * Checks if given `filepath` is url, then tries to fetch it. - * Otherwise tries to read file. - */ -const retrieveFile = async (filepath: string) => { - if (isURL(filepath)) { - const {data} = await axios.get(filepath).catch(error => { - throw new FetchingFileError( - FILE_FETCH_FAILED(filepath, error.response.message) - ); - }); - - return data; - } - - return readFile(filepath).catch(error => { - throw new ReadFileError(FILE_READ_FAILED(filepath, error)); - }); -}; - -/** - * Checks if value is invalid: `undefined`, `null` or an empty string, then sets `nan` instead. - */ -const setNanValue = (value: any) => - value == null || value === '' ? 'nan' : value; - -/** - * Converts empty values to `nan`. - */ -const nanifyEmptyValues = (object: any) => { - if (typeof object === 'object') { - const keys = Object.keys(object); - - keys.forEach(key => { - const value = object[key]; - object[key] = setNanValue(value); - }); - - return object; - } - - return setNanValue(object); -}; - -/** - * If `field` is missing from `object`, then reject with error. - * Otherwise nanify empty values and return data. - */ -const fieldAccessor = (field: string, object: any) => { - if (!(`${field}` in object)) { - throw new MissingCSVColumnError(MISSING_CSV_COLUMN(field)); - } - - return nanifyEmptyValues(object[field]); -}; - /** * 1. If output is anything, then removes query data from csv record to escape duplicates. * 2. Otherwise checks if it's a miltidimensional array, then grabs multiple fields (). @@ -166,21 +91,3 @@ const filterOutput = ( [output]: fieldAccessor(output, dataFromCSV), }; }; - -/** - * Parses CSV file. - */ -const parseCSVFile = (file: string | Buffer) => { - try { - const parsedCSV: any[] = parse(file, { - columns: true, - skip_empty_lines: true, - cast: true, - }); - - return parsedCSV; - } catch (error: any) { - console.error(error); - throw new CSVParseError(error); - } -}; diff --git a/src/if-run/builtins/csv-lookup/index.ts b/src/if-run/builtins/csv-lookup/index.ts index 08f7760c6..171a77af9 100644 --- a/src/if-run/builtins/csv-lookup/index.ts +++ b/src/if-run/builtins/csv-lookup/index.ts @@ -1,31 +1,21 @@ /* eslint-disable eqeqeq */ -import {readFile} from 'fs/promises'; -import axios from 'axios'; import {z} from 'zod'; -import {parse} from 'csv-parse/sync'; import {ConfigParams, PluginParams} from '@grnsft/if-core/types'; import {PluginFactory} from '@grnsft/if-core/interfaces'; import {ERRORS, validate} from '@grnsft/if-core/utils'; import {STRINGS} from '../../config'; +import { + fieldAccessor, + nanifyEmptyValues, + parseCSVFile, + retrieveFile, +} from '../util/helpers'; -const { - FILE_FETCH_FAILED, - FILE_READ_FAILED, - MISSING_CSV_COLUMN, - MISSING_CONFIG, - NO_QUERY_DATA, -} = STRINGS; - -const { - FetchingFileError, - ReadFileError, - MissingCSVColumnError, - QueryDataNotFoundError, - ConfigError, - CSVParseError, -} = ERRORS; +const {MISSING_CONFIG, NO_QUERY_DATA} = STRINGS; + +const {QueryDataNotFoundError, ConfigError} = ERRORS; export const CSVLookup = PluginFactory({ configValidation: (config: ConfigParams) => { @@ -78,74 +68,6 @@ export const CSVLookup = PluginFactory({ }, }); -/** - * Checks if given string is URL. - */ -const isURL = (filepath: string) => { - try { - new URL(filepath); - return true; - } catch (error) { - return false; - } -}; - -/** - * Checks if given `filepath` is url, then tries to fetch it. - * Otherwise tries to read file. - */ -const retrieveFile = async (filepath: string) => { - if (isURL(filepath)) { - const {data} = await axios.get(filepath).catch(error => { - throw new FetchingFileError( - FILE_FETCH_FAILED(filepath, error.response.message) - ); - }); - - return data; - } - - return readFile(filepath).catch(error => { - throw new ReadFileError(FILE_READ_FAILED(filepath, error)); - }); -}; - -/** - * Checks if value is invalid: `undefined`, `null` or an empty string, then sets `nan` instead. - */ -const setNanValue = (value: any) => - value == null || value === '' ? 'nan' : value; - -/** - * Converts empty values to `nan`. - */ -const nanifyEmptyValues = (object: any) => { - if (typeof object === 'object') { - const keys = Object.keys(object); - - keys.forEach(key => { - const value = object[key]; - object[key] = setNanValue(value); - }); - - return object; - } - - return setNanValue(object); -}; - -/** - * If `field` is missing from `object`, then reject with error. - * Otherwise nanify empty values and return data. - */ -const fieldAccessor = (field: string, object: any) => { - if (!(`${field}` in object)) { - throw new MissingCSVColumnError(MISSING_CSV_COLUMN(field)); - } - - return nanifyEmptyValues(object[field]); -}; - /** * 1. If output is anything, then removes query data from csv record to escape duplicates. * 2. Otherwise checks if it's a miltidimensional array, then grabs multiple fields (). @@ -207,21 +129,3 @@ const withCriteria = (queryData: Record) => (csvRecord: any) => { return ifMatchesCriteria.every(value => value === true); }; - -/** - * Parses CSV file. - */ -const parseCSVFile = (file: string | Buffer) => { - try { - const parsedCSV: any[] = parse(file, { - columns: true, - skip_empty_lines: true, - cast: true, - }); - - return parsedCSV; - } catch (error: any) { - console.error(error); - throw new CSVParseError(error); - } -}; From a3e4e96b863889e5a36f2794ac3f4b97274a06d3 Mon Sep 17 00:00:00 2001 From: manushak Date: Thu, 12 Dec 2024 13:19:28 +0400 Subject: [PATCH 10/12] test(util): add test for builtin util functions --- .../if-run/builtins/util/helpers.test.ts | 281 ++++++++++++++++++ 1 file changed, 281 insertions(+) create mode 100644 src/__tests__/if-run/builtins/util/helpers.test.ts diff --git a/src/__tests__/if-run/builtins/util/helpers.test.ts b/src/__tests__/if-run/builtins/util/helpers.test.ts new file mode 100644 index 000000000..56fae9682 --- /dev/null +++ b/src/__tests__/if-run/builtins/util/helpers.test.ts @@ -0,0 +1,281 @@ +import axios from 'axios'; +import {parse} from 'csv-parse/sync'; + +import { + isURL, + retrieveFile, + setNanValue, + nanifyEmptyValues, + fieldAccessor, + parseCSVFile, +} from '../../../../if-run/builtins/util/helpers'; + +import {ERRORS} from '@grnsft/if-core/utils'; +import {STRINGS} from '../../../../if-run/config'; + +jest.mock('csv-parse/sync', () => ({ + parse: jest.fn(), +})); + +const mockParse = parse as jest.Mock; + +const {FILE_FETCH_FAILED, MISSING_CSV_COLUMN} = STRINGS; + +const {FetchingFileError, MissingCSVColumnError, CSVParseError} = ERRORS; + +jest.mock('axios'); +jest.mock('fs/promises'); + +describe('util/helpers: ', () => { + describe('isURL(): ', () => { + it('returns true for valid URLs.', () => { + const validUrls = [ + 'https://www.example.com', + 'http://example.com', + 'ftp://example.com/file.csv', + 'https://subdomain.example.com/path?query=value#hash', + ]; + + validUrls.forEach(url => { + expect(isURL(url)).toBe(true); + }); + }); + + it('returns false for invalid URLs.', () => { + const invalidUrls = [ + 'just-a-string', + 'https//example.com', + '://missing-protocol.com', + 'www.example.com', // missing protocol + 'example', // no domain or protocol + 'https://', + ]; + + invalidUrls.forEach(url => { + expect(isURL(url)).toBe(false); + }); + }); + + it('returns false for empty or non-string inputs.', () => { + expect(isURL('')).toBe(false); + // @ts-expect-error Testing non-string input + expect(isURL(null)).toBe(false); + // @ts-expect-error Testing non-string input + expect(isURL(undefined)).toBe(false); + // @ts-expect-error Testing non-string input + expect(isURL(123)).toBe(false); + }); + }); + + describe('retrieveFile(): ', () => { + const mockedAxiosGet = axios.get as jest.MockedFunction; + + afterEach(() => { + jest.clearAllMocks(); + }); + + it('fetches a file from a URL and return its data.', async () => { + const filepath = 'https://example.com/file.txt'; + const mockData = 'file content'; + + mockedAxiosGet.mockResolvedValue({data: mockData}); + + const result = await retrieveFile(filepath); + + expect.assertions(2); + expect(mockedAxiosGet).toHaveBeenCalledWith(filepath); + expect(result).toBe(mockData); + }); + + it('throws an error when fetching a file from a URL fails.', async () => { + const filepath = 'https://example.com/file.txt'; + const mockError = {response: {message: 'Network error'}}; + + mockedAxiosGet.mockRejectedValue(mockError); + + expect.assertions(3); + + await expect(retrieveFile(filepath)).rejects.toThrow(FetchingFileError); + await expect(retrieveFile(filepath)).rejects.toThrow( + FILE_FETCH_FAILED(filepath, mockError.response.message) + ); + expect(mockedAxiosGet).toHaveBeenCalledWith(filepath); + }); + }); + + describe('setNanValue(): ', () => { + it('returns "nan" for falsy value.', () => { + expect(setNanValue(null)).toBe('nan'); + expect(setNanValue(undefined)).toBe(undefined); + expect(setNanValue('')).toBe('nan'); + }); + + it('returns the original value for non-empty string.', () => { + expect(setNanValue('mock')).toBe('mock'); + }); + + it('returns the original value for a number.', () => { + expect(setNanValue(42)).toBe(42); + }); + + it('returns the original value for a boolean.', () => { + expect(setNanValue(true)).toBe(true); + expect(setNanValue(false)).toBe(false); + }); + + it('returns the original value for an object.', () => { + const obj = {key: 'value'}; + expect(setNanValue(obj)).toBe(obj); + }); + + it('returns the original value for an array.', () => { + const arr = [1, 2, 3]; + expect(setNanValue(arr)).toBe(arr); + }); + }); + + describe('nanifyEmptyValues(): ', () => { + it('converts empty values to NaN for a flat object.', () => { + const input = {a: '', b: null, c: 5, d: 'text'}; + const expected = {a: 'nan', b: 'nan', c: 5, d: 'text'}; + + expect(nanifyEmptyValues(input)).toEqual(expected); + }); + + it('handles nested objects.', () => { + const input = {a: '', b: {c: null, d: 'text'}}; + const expected = {a: 'nan', b: {c: null, d: 'text'}}; + + expect(nanifyEmptyValues(input)).toEqual(expected); + }); + + it('handles non-object input values.', () => { + expect(nanifyEmptyValues('')).toBe('nan'); + expect(nanifyEmptyValues(42)).toBe(42); + expect(nanifyEmptyValues('text')).toBe('text'); + }); + + it('return NaN for empty objects.', () => { + const input = {}; + const expected = {}; + + expect(nanifyEmptyValues(input)).toEqual(expected); + }); + }); + + describe('fieldAccessor(): ', () => { + const mockObject = { + timestamp: '2023-08-06T00:00', + duration: 3600, + 'cpu/utilization': 80, + }; + + it('returns the value of the specified field.', () => { + const field = 'cpu/utilization'; + const expectedValue = 80; + + const result = fieldAccessor(field, mockObject); + + expect(result).toBe(expectedValue); + }); + + it('throws an error if the field does not exist in the object.', () => { + const field = 'nonExistentField'; + + expect(() => fieldAccessor(field, mockObject)).toThrow( + MissingCSVColumnError + ); + expect(() => fieldAccessor(field, mockObject)).toThrow( + MISSING_CSV_COLUMN(field) + ); + }); + + it('throws an error for invalid input types.', () => { + const invalidObject = null; + const field = 'name'; + + expect(() => fieldAccessor(field, invalidObject)).toThrow(TypeError); + }); + }); + + describe('parseCSVFile(): ', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('parses a valid CSV file string and return the parsed result.', () => { + const csvInput = + 'timestamp,duration,cpu/utilization\n2023-08-06T00:00,3600,80\n2023-08-06T00:00,3600,90'; + const parsedResult = [ + {timestamp: '2023-08-06T00:00', duration: 3600, 'cpu/utilization': 80}, + {timestamp: '2023-08-06T00:00', duration: 3600, 'cpu/utilization': 90}, + ]; + + mockParse.mockReturnValue(parsedResult); + + const result = parseCSVFile(csvInput); + + expect(result).toEqual(parsedResult); + expect(mockParse).toHaveBeenCalledWith(csvInput, { + columns: true, + skip_empty_lines: true, + cast: true, + }); + expect(mockParse).toHaveBeenCalledTimes(1); + }); + + it('parses a valid CSV file buffer and return the parsed result.', () => { + const csvInput = Buffer.from( + 'timestamp,duration,cpu/utilization\n2023-08-06T00:00,3600,80\n2023-08-06T00:00,3600,90' + ); + const parsedResult = [ + {timestamp: '2023-08-06T00:00', duration: 3600, 'cpu/utilization': 80}, + {timestamp: '2023-08-06T00:00', duration: 3600, 'cpu/utilization': 90}, + ]; + + mockParse.mockReturnValue(parsedResult); + + const result = parseCSVFile(csvInput); + + expect(result).toEqual(parsedResult); + expect(mockParse).toHaveBeenCalledWith(csvInput, { + columns: true, + skip_empty_lines: true, + cast: true, + }); + expect(mockParse).toHaveBeenCalledTimes(1); + }); + + it('throws an error if parsing fails.', () => { + const csvInput = 'invalid,csv,data'; + const mockError = new Error('Invalid CSV format'); + + mockParse.mockImplementation(() => { + throw mockError; + }); + + expect(() => parseCSVFile(csvInput)).toThrow(CSVParseError); + expect(mockParse).toHaveBeenCalledWith(csvInput, { + columns: true, + skip_empty_lines: true, + cast: true, + }); + expect(mockParse).toHaveBeenCalledTimes(1); + }); + + it('logs the error when parsing fails.', () => { + const consoleErrorSpy = jest.spyOn(console, 'error').mockImplementation(); + const csvInput = 'invalid,csv,data'; + const mockError = new Error('Invalid CSV format'); + + mockParse.mockImplementation(() => { + throw mockError; + }); + + expect(() => parseCSVFile(csvInput)).toThrow(CSVParseError); + expect(consoleErrorSpy).toHaveBeenCalledWith(mockError); + + consoleErrorSpy.mockRestore(); + }); + }); +}); From b25d48988ed469baf878dc923249ca0a0b62be3f Mon Sep 17 00:00:00 2001 From: manushak Date: Thu, 12 Dec 2024 15:50:38 +0400 Subject: [PATCH 11/12] fix(builtins): rename helpers to csv-helpers --- src/if-run/builtins/csv-import/index.ts | 2 +- src/if-run/builtins/csv-lookup/index.ts | 2 +- src/if-run/builtins/util/{helpers.ts => csv-helpers.ts} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename src/if-run/builtins/util/{helpers.ts => csv-helpers.ts} (100%) diff --git a/src/if-run/builtins/csv-import/index.ts b/src/if-run/builtins/csv-import/index.ts index 7b2aa5e0d..37161c0fa 100644 --- a/src/if-run/builtins/csv-import/index.ts +++ b/src/if-run/builtins/csv-import/index.ts @@ -12,7 +12,7 @@ import { nanifyEmptyValues, parseCSVFile, retrieveFile, -} from '../util/helpers'; +} from '../util/csv-helpers'; const {MISSING_CONFIG} = STRINGS; diff --git a/src/if-run/builtins/csv-lookup/index.ts b/src/if-run/builtins/csv-lookup/index.ts index 171a77af9..141d73ab0 100644 --- a/src/if-run/builtins/csv-lookup/index.ts +++ b/src/if-run/builtins/csv-lookup/index.ts @@ -11,7 +11,7 @@ import { nanifyEmptyValues, parseCSVFile, retrieveFile, -} from '../util/helpers'; +} from '../util/csv-helpers'; const {MISSING_CONFIG, NO_QUERY_DATA} = STRINGS; diff --git a/src/if-run/builtins/util/helpers.ts b/src/if-run/builtins/util/csv-helpers.ts similarity index 100% rename from src/if-run/builtins/util/helpers.ts rename to src/if-run/builtins/util/csv-helpers.ts From 265438fff3ce23a2424cccf680c87173dd891bc0 Mon Sep 17 00:00:00 2001 From: manushak Date: Thu, 12 Dec 2024 15:51:49 +0400 Subject: [PATCH 12/12] test(builtins): rename helpers test --- .../builtins/util/{helpers.test.ts => csv-helpers.test.ts} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/__tests__/if-run/builtins/util/{helpers.test.ts => csv-helpers.test.ts} (99%) diff --git a/src/__tests__/if-run/builtins/util/helpers.test.ts b/src/__tests__/if-run/builtins/util/csv-helpers.test.ts similarity index 99% rename from src/__tests__/if-run/builtins/util/helpers.test.ts rename to src/__tests__/if-run/builtins/util/csv-helpers.test.ts index 56fae9682..17aa55f39 100644 --- a/src/__tests__/if-run/builtins/util/helpers.test.ts +++ b/src/__tests__/if-run/builtins/util/csv-helpers.test.ts @@ -8,7 +8,7 @@ import { nanifyEmptyValues, fieldAccessor, parseCSVFile, -} from '../../../../if-run/builtins/util/helpers'; +} from '../../../../if-run/builtins/util/csv-helpers'; import {ERRORS} from '@grnsft/if-core/utils'; import {STRINGS} from '../../../../if-run/config';