Skip to content

Commit

Permalink
feat: include format
Browse files Browse the repository at this point in the history
  • Loading branch information
friendlymatthew committed Feb 16, 2024
1 parent 5ad5baa commit 6d9d745
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 8 deletions.
4 changes: 2 additions & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ func main() {
switch {
case jsonlFlag:
dataHandler = handlers.JSONLHandler{}
// case csvFlag:
// dataHandler = handlers.CSVHandler{}
case csvFlag:
dataHandler = handlers.CSVHandler{}
default:
logger.Error("Please specify the file type with -jsonl or -csv.")
os.Exit(1)
Expand Down
21 changes: 17 additions & 4 deletions pkg/appendable/appendable.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,18 +93,31 @@ type FileMeta struct {
}

func (m *FileMeta) MarshalBinary() ([]byte, error) {
buf := make([]byte, 9)
buf := make([]byte, 10)
buf[0] = byte(m.Version)
binary.BigEndian.PutUint64(buf[1:], m.ReadOffset)
buf[1] = byte(m.Format)
binary.BigEndian.PutUint64(buf[2:], m.ReadOffset)
return buf, nil
}

func (m *FileMeta) UnmarshalBinary(buf []byte) error {
if len(buf) < 9 {
if len(buf) < 10 {
return fmt.Errorf("invalid metadata size: %d", len(buf))
}
m.Version = Version(buf[0])
m.ReadOffset = binary.BigEndian.Uint64(buf[1:])

fileFormat := buf[1]

switch fileFormat {
case byte(0):
m.Format = FormatJSONL
case byte(1):
m.Format = FormatCSV
default:
return fmt.Errorf("unrecognized file format: %v", buf[1])
}

m.ReadOffset = binary.BigEndian.Uint64(buf[2:])
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion src/index-file/index-file.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {LinkedMetaPage, ReadMultiBPTree} from "../btree/multi";
import { LengthIntegrityError, RangeResolver } from "../resolver";
import { IndexMeta, unmarshalBinaryForIndexMeta } from "./index-meta";
import {PageFile} from "../btree/pagefile";
import { IndexMeta, unmarshalBinaryForIndexMeta } from "./meta";



Expand Down
37 changes: 37 additions & 0 deletions src/index-file/index-meta.ts → src/index-file/meta.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,42 @@
import { RangeResolver } from "../resolver";


export enum FileFormat {
JSONL = 0,
CSV = 1,
}

export type FileMeta = {
version: number;
format: FileFormat;
readOffset: bigint;
}

export async function readFileMeta(buffer: ArrayBuffer): Promise<FileMeta> {
if (buffer.byteLength !== 10) {
throw new Error(`incorrect byte length! Want: 10, got ${buffer.byteLength}`);
}

const dataView = new DataView(buffer);

const version = dataView.getUint8(0);
const format = dataView.getUint8(1);

if (format !== FileFormat.CSV && format !== FileFormat.JSONL) {
throw new Error(`unexpected file format. Got: ${format}`)
}

const readOffset = dataView.getBigUint64(2);

return {
version,
format,
readOffset
}
}



export type IndexMeta = {
fieldName: string;
fieldType: bigint;
Expand Down
Binary file added src/tests/green_tripdata_2023-01.csv.index
Binary file not shown.
50 changes: 50 additions & 0 deletions src/tests/index-file.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { ReadMultiBPTree } from "../btree/multi";
import { PageFile } from "../btree/pagefile";
import { readFileMeta } from "../index-file/meta";
import { RangeResolver } from "../resolver";
import { readBinaryFile } from "./test-util";

describe("test index-file parsing", () => {

let mockRangeResolver: RangeResolver;

beforeEach(() => {
mockRangeResolver = async ({ start, end }) => {
const indexFile = await readBinaryFile("green_tripdata_2023-01.csv.index");
const slicedPart = indexFile.slice(start, end + 1);

const arrayBuffer = slicedPart.buffer.slice(slicedPart.byteOffset, slicedPart.byteOffset + slicedPart.byteLength);



console.log("indexFile", start, end, arrayBuffer.byteLength);

return {
data: arrayBuffer,
totalLength: arrayBuffer.byteLength,
}
}
});



it("should read the file meta", async () => {
const pageFile = new PageFile(mockRangeResolver);

const tree = ReadMultiBPTree(mockRangeResolver, pageFile);

const metadata = await tree.metadata();

const fileMeta = await readFileMeta(metadata);

console.log(fileMeta);

expect(fileMeta.format).toEqual(1);
expect(fileMeta.version).toEqual(1);

console.log(fileMeta.readOffset)

});


});
1 change: 0 additions & 1 deletion src/tests/multi.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import { LengthIntegrityError, RangeResolver } from "../resolver";
import { PageFile } from "../btree/pagefile";
import { ReadMultiBPTree } from "../btree/multi";
import { arrayBufferToString, readBinaryFile } from "./test-util";
import mock = jest.mock;

describe("test multi", () => {
it("storing metadata works", async () => {
Expand Down

0 comments on commit 6d9d745

Please sign in to comment.