Skip to content

Commit

Permalink
Specify Format type (#59)
Browse files Browse the repository at this point in the history
* commit

* fix tests
  • Loading branch information
friendlymatthew authored Jan 23, 2024
1 parent eb28e99 commit 7f06d69
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 22 deletions.
3 changes: 2 additions & 1 deletion examples/client/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
<script>
Appendable.init(
"green_tripdata_2023-01.jsonl",
"green_tripdata_2023-01.jsonl.index"
"green_tripdata_2023-01.jsonl.index",
Appendable.FormatType.Jsonl
).then(async (db) => {
let dbFields = new Set();
let fieldTypes = {};
Expand Down
1 change: 0 additions & 1 deletion pkg/appendable/csv_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ func fieldRankCsvField(fieldValue any) int {
func inferCSVField(fieldValue string) (interface{}, protocol.FieldType) {

if fieldValue == "" {
fmt.Printf("sir this is empty")
return nil, protocol.FieldTypeNull
}

Expand Down
49 changes: 32 additions & 17 deletions src/database.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { FormatType } from ".";
import { DataFile } from "./data-file";
import { VersionedIndexFile } from "./index-file";

Expand Down Expand Up @@ -33,18 +34,27 @@ export function containsType(fieldType: bigint, desiredType: FieldType) {
return (fieldType & BigInt(desiredType)) !== BigInt(0);
}

function parseIgnoringSuffix(x: string) {
// TODO: implement a proper parser.
try {
return JSON.parse(x);
} catch (error) {
const e = error as Error;
let m = e.message.match(/position\s+(\d+)/);
if (m) {
x = x.slice(0, Number(m[1]));
}
function parseIgnoringSuffix(x: string, format: FormatType) {
console.log("parseSuffix: ", x);
switch (format) {
case FormatType.Jsonl:
try {
console.log("parsing no error", JSON.parse(x));
return JSON.parse(x);
} catch (error) {
console.log("registered as an error");
const e = error as Error;
let m = e.message.match(/position\s+(\d+)/);
if (m) {
console.log(x.slice(0, Number(m[1])));
x = x.slice(0, Number(m[1]));
}
}
console.log(JSON.parse(x));
return JSON.parse(x);

case FormatType.Csv:
}
return JSON.parse(x);
}

function fieldRank(token: any) {
Expand Down Expand Up @@ -86,14 +96,16 @@ function cmp(a: any, b: any) {
export class Database<T extends Schema> {
private constructor(
private dataFile: DataFile,
private indexFile: VersionedIndexFile<T>
private indexFile: VersionedIndexFile<T>,
private formatType: FormatType
) {}

static forDataFileAndIndexFile<T extends Schema>(
dataFile: DataFile,
indexFile: VersionedIndexFile<T>
indexFile: VersionedIndexFile<T>,
format: FormatType
) {
return new Database(dataFile, indexFile);
return new Database(dataFile, indexFile, format);
}

async fields() {
Expand Down Expand Up @@ -125,7 +137,8 @@ export class Database<T extends Schema> {
indexRecord.fieldStartByteOffset,
indexRecord.fieldStartByteOffset + indexRecord.fieldLength
);
const dataFieldValue = parseIgnoringSuffix(data);
console.log("data looks like: ", data);
const dataFieldValue = parseIgnoringSuffix(data, this.formatType);
console.log(mid, dataFieldValue);
if (cmp(value, dataFieldValue) < 0) {
end = mid;
Expand All @@ -149,7 +162,8 @@ export class Database<T extends Schema> {
await this.dataFile.get(
indexRecord.fieldStartByteOffset,
indexRecord.fieldStartByteOffset + indexRecord.fieldLength
)
),
this.formatType
);
if (cmp(value, dataFieldValue) < 0) {
end = mid;
Expand Down Expand Up @@ -209,7 +223,8 @@ export class Database<T extends Schema> {
await this.dataFile.get(
dataRecord.startByteOffset,
dataRecord.endByteOffset
)
),
this.formatType
);
yield dataFieldValue;
}
Expand Down
13 changes: 11 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,24 @@ import { Database, FieldType, containsType } from "./database";
import { IndexFile } from "./index-file";
import { RangeResolver } from "./resolver";

export enum FormatType {
Csv = "csv",
Jsonl = "jsonl",
}

export async function init(
dataUrl: string | RangeResolver,
indexUrl: string | RangeResolver
indexUrl: string | RangeResolver,
format: FormatType
) {
return Database.forDataFileAndIndexFile(
typeof dataUrl === "string"
? DataFile.forUrl(dataUrl)
: DataFile.forResolver(dataUrl),
typeof indexUrl === "string"
? await IndexFile.forUrl(indexUrl)
: await IndexFile.forResolver(indexUrl)
: await IndexFile.forResolver(indexUrl),
format
);
}

Expand All @@ -22,6 +29,7 @@ interface GlobalMap {
init: Function;
FieldType: typeof FieldType;
containsType: typeof containsType;
FormatType: typeof FormatType;
};
}

Expand All @@ -33,4 +41,5 @@ globalThis.Appendable = {
init,
FieldType,
containsType,
FormatType,
};
3 changes: 2 additions & 1 deletion src/tests/database.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { Database, FieldType, Query, containsType } from "../database";
import { DataFile } from "../data-file";
import { IndexFile, VersionedIndexFile } from "../index-file";
import { FormatType } from "..";

jest.mock("../data-file");
jest.mock("../index-file");
Expand All @@ -25,7 +26,7 @@ describe("test query relation", () => {
} as jest.Mocked<VersionedIndexFile<any>>;

// instantiate a Database object with given mocked data file and index file
database = Database.forDataFileAndIndexFile(mockDataFile, mockIndexFile);
database = Database.forDataFileAndIndexFile(mockDataFile, mockIndexFile, FormatType.Jsonl);
});

/*
Expand Down

0 comments on commit 7f06d69

Please sign in to comment.