Skip to content

Commit

Permalink
Merge pull request #243 from Canner/feature/huggingface-table-questio…
Browse files Browse the repository at this point in the history
…n-answering

Feature: HuggingFace TableQuestionAnswering filter
  • Loading branch information
kokokuo authored Jul 21, 2023
2 parents 9025811 + 9375bfe commit d8865d6
Show file tree
Hide file tree
Showing 24 changed files with 870 additions and 8 deletions.
20 changes: 12 additions & 8 deletions packages/cli/test/cli.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,18 @@ afterEach(async () => {
await runShutdownJobs();
});

it('Init command should create new folder with default config', async () => {
// Action
const config: any = jsYAML.load(
await fs.readFile(path.resolve(projectRoot, 'vulcan.yaml'), 'utf8')
);
// Assert
expect(config.name).toBe(projectName);
});
it(
'Init command should create new folder with default config',
async () => {
// Action
const config: any = jsYAML.load(
await fs.readFile(path.resolve(projectRoot, 'vulcan.yaml'), 'utf8')
);
// Assert
expect(config.name).toBe(projectName);
},
10 * 1000
);

it('Build command should make result.json', async () => {
// Action
Expand Down
3 changes: 3 additions & 0 deletions packages/core/src/lib/functional-extensions/filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ export interface FunctionalFilterOptions {
value: any;
args: Record<string, any>;
metadata: NunjucksExecutionMetadata;
// The options from configuration for the filter extension
options: Record<string, any> | Array<Record<string, any>>;
}

export type FunctionalFilter = (
Expand All @@ -32,6 +34,7 @@ export const createFilterExtension = (
value: options.value,
args: options.args[0],
metadata: options.metadata,
options: this.getConfig(),
});
}
}
Expand Down
3 changes: 3 additions & 0 deletions packages/core/src/lib/functional-extensions/tag.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ export interface FunctionalTagOptions {
sql: string;
args: Record<string, any>;
metadata: NunjucksExecutionMetadata;
// The options from configuration for the tag extension
options: Record<string, any> | Array<Record<string, any>>;
}

export type FunctionalTag = (options: FunctionalTagOptions) => Promise<string>;
Expand Down Expand Up @@ -57,6 +59,7 @@ export const createTagExtension = (
sql,
args: args[0] as any,
metadata,
options: this.getConfig(),
});
return new nunjucks.runtime.SafeString(result);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Hugging Face

## Installation

1. Install the package:

**If you are developing with binary, the package is already bundled in the binary. You can skip this step.**

```bash
npm i @vulcan-sql/extension-huggingface
```

2. Update your `vulcan.yaml` file to enable the extension:

```yaml
extensions:
...
// highlight-next-line
hf: '@vulcan-sql/extension-huggingface'

// highlight-next-line
hf:
// highlight-next-line
# Required: Hugging Face access token, see: https://huggingface.co/docs/hub/security-tokens
// highlight-next-line
accessToken: 'your-huggingface-access-token'
```
## Using Hugging Face
VulcanSQL support using Hugging Face tasks by [VulcanSQL Filters](https://vulcansql.com/docs/develop/advance#filters) statement.
:::caution
Hugging Face has a [rate limit](https://huggingface.co/docs/api-inference/faq#rate-limits), so it does not allow sending large datasets to the Hugging Face library for processing.
Otherwise, using a different Hugging Face model may yield different results or even result in failure.
:::
### Table Question Answering
The [Table Question Answering](https://huggingface.co/docs/api-inference/detailed_parameters#table-question-answering-task) is one of the Natural Language Processing tasks supported by Hugging Face.
Using the `huggingface_table_question_answering` filter.

Sample 1:

```sql
{% set data = [
{
"repository": "vulcan-sql",
"topic": ["analytics", "data-lake", "data-warehouse", "api-builder"],
"description":"Create and share Data APIs fast! Data API framework for DuckDB, ClickHouse, Snowflake, BigQuery, PostgreSQL"
},
{
"repository": "accio",
"topic": ["data-analytics", "data-lake", "data-warehouse", "bussiness-intelligence"],
"description": "Query Your Data Warehouse Like Exploring One Big View."
},
{
"repository": "hell-word",
"topic": [],
"description": "Sample repository for testing"
}
] %}
-- The source data for "huggingface_table_question_answering" needs to be an array of objects.
SELECT {{ data | huggingface_table_question_answering(query="How many repositories related to data-lake topic?") }}
```

Sample 2:

```sql
{% req products %}
SELECT * FROM products
{% endreq %}
SELECT {{ products.value() | huggingface_table_question_answering(query="How many products related to 3C type?", model="microsoft/tapex-base-finetuned-wtq", wait_for_model=true, use_cache=true) }}
```

### Arguments

Please check [Table Question Answering](https://huggingface.co/docs/api-inference/detailed_parameters#table-question-answering-task) for further information.

| Name | Required | Default | Description |
| -------------- | -------- | ------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
| query | Y | | The query in plain text that you want to ask the table. |
| model | N | google/tapas-base-finetuned-wtq | The model id of a pretrained model hosted inside a model repo on huggingface.co. See: https://huggingface.co/models?pipeline_tag=table-question-answering |
| use_cache | N | true | There is a cache layer on the inference API to speedup requests we have already seen |
| wait_for_model | N | false | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done |
18 changes: 18 additions & 0 deletions packages/extension-huggingface/.eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"extends": ["../../.eslintrc.json"],
"ignorePatterns": ["!**/*"],
"overrides": [
{
"files": ["*.ts", "*.tsx", "*.js", "*.jsx"],
"rules": {}
},
{
"files": ["*.ts", "*.tsx"],
"rules": {}
},
{
"files": ["*.js", "*.jsx"],
"rules": {}
}
]
}
2 changes: 2 additions & 0 deletions packages/extension-huggingface/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# @canner/canner-storage used files for running test cases
.env
72 changes: 72 additions & 0 deletions packages/extension-huggingface/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# extension-huggingface

Supporting Hugging Face Inference API task for VulcanSQL, provided by [Canner](https://canner.io/).

## Installation

1. Install the package:

```bash
npm i @vulcan-sql/extension-huggingface
```

2. Update your `vulcan.yaml` file to enable the extension:

```yaml
extensions:
hf: '@vulcan-sql/extension-huggingface'

hf:
# Required: Hugging Face access token, see: https://huggingface.co/docs/hub/security-tokens
accessToken: 'your-huggingface-access-token'
```
## Using Hugging Face
VulcanSQL support using Hugging Face tasks by [VulcanSQL Filters](https://vulcansql.com/docs/develop/advance#filters) statement.
**⚠️ Caution**: Hugging Face has a [rate limit](https://huggingface.co/docs/api-inference/faq#rate-limits), so it does not allow sending large datasets to the Hugging Face library for processing. Otherwise, using a different Hugging Face model may yield different results or even result in failure.
### Table Question Answering
The [Table Question Answering](https://huggingface.co/docs/api-inference/detailed_parameters#table-question-answering-task) is one of the Natural Language Processing tasks supported by Hugging Face.
Using the `huggingface_table_question_answering` filter.

Sample 1:

```sql
{% set data = [
{
"repository": "vulcan-sql",
"topic": ["analytics", "data-lake", "data-warehouse", "api-builder"],
"description":"Create and share Data APIs fast! Data API framework for DuckDB, ClickHouse, Snowflake, BigQuery, PostgreSQL"
},
{
"repository": "accio",
"topic": ["data-analytics", "data-lake", "data-warehouse", "bussiness-intelligence"],
"description": "Query Your Data Warehouse Like Exploring One Big View."
},
{
"repository": "hell-word",
"topic": [],
"description": "Sample repository for testing"
}
] %}
-- The source data for "huggingface_table_question_answering" needs to be an array of objects.
SELECT {{ data | huggingface_table_question_answering(query="How many repositories related to data-lake topic?") }}
```

Sample 2:

```sql
{% req products %}
SELECT * FROM products
{% endreq %}
-- The "model" keyword argument is optional. If not provided, the default value is 'google/tapas-base-finetuned-wtq'.
-- The "wait_for_model" keyword argument is optional. If not provided, the default value is false.
-- The "use_cache" keyword argument is optional. If not provided, the default value is true.
SELECT {{ products.value() | huggingface_table_question_answering(query="How many products related to 3C type?", model="microsoft/tapex-base-finetuned-wtq", wait_for_model=true, use_cache=true) }}
```
15 changes: 15 additions & 0 deletions packages/extension-huggingface/jest.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module.exports = {
displayName: 'extension-huggingface',
preset: '../../jest.preset.ts',
testEnvironment: 'node',
globals: {
'ts-jest': {
tsconfig: '<rootDir>/tsconfig.spec.json',
},
},
transform: {
'^.+\\.[tj]s$': 'ts-jest',
},
moduleFileExtensions: ['ts', 'js', 'html'],
coverageDirectory: '../../coverage/packages/extension-huggingface',
};
28 changes: 28 additions & 0 deletions packages/extension-huggingface/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"name": "@vulcan-sql/extension-huggingface",
"description": "Hugging Face feature for VulcanSQL",
"version": "0.6.0",
"type": "commonjs",
"publishConfig": {
"access": "public"
},
"keywords": [
"vulcan",
"vulcan-sql",
"data",
"sql",
"database",
"data-warehouse",
"data-lake",
"api-builder",
"huggingface"
],
"repository": {
"type": "git",
"url": "https://github.com/Canner/vulcan.git"
},
"license": "MIT",
"peerDependencies": {
"@vulcan-sql/core": "~0.6.0-0"
}
}
85 changes: 85 additions & 0 deletions packages/extension-huggingface/project.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"root": "packages/extension-huggingface",
"sourceRoot": "packages/extension-huggingface/src",
"targets": {
"build": {
"executor": "@nrwl/workspace:run-commands",
"options": {
"command": "yarn ts-node ./tools/scripts/replaceAlias.ts extension-huggingface"
},
"dependsOn": [
{
"projects": "self",
"target": "tsc"
},
{
"projects": "self",
"target": "install-dependencies"
}
]
},
"tsc": {
"executor": "@nrwl/js:tsc",
"outputs": ["{options.outputPath}"],
"options": {
"outputPath": "dist/packages/extension-huggingface",
"main": "packages/extension-huggingface/src/index.ts",
"tsConfig": "packages/extension-huggingface/tsconfig.lib.json",
"assets": ["packages/extension-huggingface/*.md"],
"buildableProjectDepsInPackageJsonType": "dependencies"
},
"dependsOn": [
{
"projects": "dependencies",
"target": "build"
},
{
"projects": "self",
"target": "install-dependencies"
}
]
},
"lint": {
"executor": "@nrwl/linter:eslint",
"outputs": ["{options.outputFile}"],
"options": {
"lintFilePatterns": ["packages/extension-huggingface/**/*.ts"]
}
},
"test": {
"executor": "@nrwl/jest:jest",
"outputs": ["coverage/packages/extension-huggingface"],
"options": {
"jestConfig": "packages/extension-huggingface/jest.config.ts",
"passWithNoTests": true
},
"dependsOn": [
{
"projects": "self",
"target": "install-dependencies"
}
]
},
"publish": {
"executor": "@nrwl/workspace:run-commands",
"options": {
"command": "node ../../../tools/scripts/publish.mjs {args.tag} {args.version}",
"cwd": "dist/packages/extension-huggingface"
},
"dependsOn": [
{
"projects": "self",
"target": "build"
}
]
},
"install-dependencies": {
"executor": "@nrwl/workspace:run-commands",
"options": {
"command": "yarn",
"cwd": "packages/extension-huggingface"
}
}
},
"tags": []
}
10 changes: 10 additions & 0 deletions packages/extension-huggingface/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Alias the Builder and Runner from the package and export them to prevent Extension loader loading the same Builder and Runner class when creating multiple Functional Filters or Tags.
import {
Builder as HuggingFaceTableQuestionAnsweringFilterBuilder,
Runner as HuggingFaceTableQuestionAnsweringFilterRunner,
} from './lib/filters/tableQuestionAnswering';

export default [
HuggingFaceTableQuestionAnsweringFilterBuilder,
HuggingFaceTableQuestionAnsweringFilterRunner,
];
1 change: 1 addition & 0 deletions packages/extension-huggingface/src/lib/filters/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './tableQuestionAnswering';
Loading

0 comments on commit d8865d6

Please sign in to comment.