Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Field Type + traverse index headers #110

Merged
merged 3 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion examples/client/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
Appendable.init(
"green_tripdata_2023-01.csv",
"green_tripdata_2023-01.csv.index",
Appendable.FormatType.Csv
friendlymatthew marked this conversation as resolved.
Show resolved Hide resolved
).then(async (db) => {
let dbFields = [];

Expand Down
18 changes: 9 additions & 9 deletions pkg/appendable/appendable.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,22 +127,22 @@ type IndexMeta struct {
}

func (m *IndexMeta) MarshalBinary() ([]byte, error) {
buf := make([]byte, 8+len(m.FieldName)+2)
binary.BigEndian.PutUint64(buf[0:], uint64(m.FieldType))
binary.BigEndian.PutUint16(buf[8:], uint16(len(m.FieldName)))
copy(buf[10:], m.FieldName)
buf := make([]byte, 2+len(m.FieldName)+2)
binary.BigEndian.PutUint16(buf[0:], uint16(m.FieldType))
binary.BigEndian.PutUint16(buf[2:], uint16(len(m.FieldName)))
copy(buf[4:], m.FieldName)
return buf, nil
}

func (m *IndexMeta) UnmarshalBinary(buf []byte) error {
if len(buf) < 10 {
if len(buf) < 4 {
return fmt.Errorf("invalid metadata size: %d", len(buf))
}
m.FieldType = FieldType(binary.BigEndian.Uint64(buf[0:]))
nameLength := binary.BigEndian.Uint16(buf[8:])
if len(buf) < 10+int(nameLength) {
m.FieldType = FieldType(binary.BigEndian.Uint16(buf[0:]))
nameLength := binary.BigEndian.Uint16(buf[2:])
if len(buf) < 4+int(nameLength) {
return fmt.Errorf("invalid metadata size: %d", len(buf))
}
m.FieldName = string(buf[10 : 10+nameLength])
m.FieldName = string(buf[4 : 4+nameLength])
return nil
}
182 changes: 96 additions & 86 deletions src/btree/multi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,96 +3,106 @@ import { MemoryPointer } from "./node";
import { PageFile } from "./pagefile";

const PAGE_SIZE_BYTES = 4096;
export const maxUint64 = 2n ** 64n - 1n;

export class LinkedMetaPage {
private resolver: RangeResolver;
private offset: bigint;
private metaPageData: ArrayBuffer | null;

constructor(resolver: RangeResolver, offset: bigint) {
this.resolver = resolver;
this.offset = offset;
this.metaPageData = null;
}

async root(): Promise<MemoryPointer | null> {
const pageData = await this.getMetaPage();

// we seek by 12 bytes since offset is 8 bytes, length is 4 bytes
const data = pageData.slice(0, 12);
const view = new DataView(data);

const pointerOffset = view.getBigUint64(0);
const lengthOffset = view.getUint32(8);

return {
offset: pointerOffset,
length: lengthOffset,
};
}

/**
* `metadata()` gets the page data. It does the following:
* (1) creates a slice from 24 to the end of the page
* (2) it reads the first four bytes of that slice which gives us the length to seek to
* (3) slices from [24, (24 + dataLength)] which contain metadata
*/
async metadata(): Promise<ArrayBuffer> {
const pageData = await this.getMetaPage();

const lengthView = new DataView(pageData, 24);

// read the first four because that represents length
const metadataLength = lengthView.getUint32(0);

return pageData.slice(28, 28 + metadataLength);
}

/**
* `getMetaPage()` seeks the index-file with the absolute bounds for a given page file.
* It caches the data in a pagefile. Note: all other methods that call this should be slicing with relative bounds.
*/
private async getMetaPage(): Promise<ArrayBuffer> {
if (this.metaPageData) {
return this.metaPageData;
}

const { data } = await this.resolver({
start: Number(this.offset),
end: Number(this.offset) + PAGE_SIZE_BYTES - 1,
});

this.metaPageData = data;

return data;
}

/**
* `next()` - returns a new LinkedMetaPage
*/
async next(): Promise<LinkedMetaPage | null> {
const pageData = await this.getMetaPage();

const view = new DataView(pageData, 12, 8);
const nextOffset = view.getBigUint64(0);
const maxUint64 = 2n ** 64n - 1n;
console.log("next offset: ", nextOffset);
if (nextOffset === maxUint64) {
return null;
}

return new LinkedMetaPage(this.resolver, nextOffset);
}

getOffset(): bigint {
return this.offset;
}
private resolver: RangeResolver;
private offset: bigint;
private metaPageData: ArrayBuffer | null;
private metaPagePromise: Promise<ArrayBuffer> | null = null;

constructor(resolver: RangeResolver, offset: bigint) {
this.resolver = resolver;
this.offset = offset;
this.metaPageData = null;
}

async root(): Promise<MemoryPointer | null> {
const pageData = await this.getMetaPage();

// we seek by 12 bytes since offset is 8 bytes, length is 4 bytes
const data = pageData.slice(0, 12);
const view = new DataView(data);

const pointerOffset = view.getBigUint64(0);
const lengthOffset = view.getUint32(8);

return {
offset: pointerOffset,
length: lengthOffset,
};
}

/**
* `metadata()` gets the page data. It does the following:
* (1) creates a slice from 24 to the end of the page
* (2) it reads the first four bytes of that slice which gives us the length to seek to
* (3) slices from [24, (24 + dataLength)] which contain metadata
*/
async metadata(): Promise<ArrayBuffer> {
const pageData = await this.getMetaPage();

const lengthView = new DataView(pageData, 24);

// read the first four because that represents length
const metadataLength = lengthView.getUint32(0);

return pageData.slice(28, 28 + metadataLength);
}

/**
* `getMetaPage()` seeks the index-file with the absolute bounds for a given page file.
* It caches the data in a pagefile. Note: all other methods that call this should be slicing with relative bounds.
*/
private async getMetaPage(): Promise<ArrayBuffer> {
if (this.metaPageData) {
friendlymatthew marked this conversation as resolved.
Show resolved Hide resolved
return this.metaPageData;
}

if (!this.metaPagePromise) {
this.metaPagePromise = this.resolver({
start: Number(this.offset),
end: Number(this.offset) + PAGE_SIZE_BYTES - 1,
})
.then(({ data }) => {
this.metaPageData = data;
this.metaPagePromise = null;
friendlymatthew marked this conversation as resolved.
Show resolved Hide resolved
return data;
})
.catch((error) => {
this.metaPagePromise = null;
throw error;
});
}

return this.metaPagePromise;
}

/**
* `next()` - returns a new LinkedMetaPage
*/
async next(): Promise<LinkedMetaPage | null> {
const pageData = await this.getMetaPage();

const view = new DataView(pageData, 12, 8);
const nextOffset = view.getBigUint64(0);

if (nextOffset === maxUint64) {
return null;
}

return new LinkedMetaPage(this.resolver, nextOffset);
}

getOffset(): bigint {
return this.offset;
}
}

export function ReadMultiBPTree(
resolver: RangeResolver,
pageFile: PageFile,
resolver: RangeResolver,
pageFile: PageFile
): LinkedMetaPage {
const offset = pageFile.page(0);
return new LinkedMetaPage(resolver, offset);
const offset = pageFile.page(0);
return new LinkedMetaPage(resolver, offset);
}
31 changes: 16 additions & 15 deletions src/db/database.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import { FormatType } from "..";
import { DataFile } from "../data-file";
import { IndexFile, VersionedIndexFile } from "../index-file/index-file";
import { VersionedIndexFile } from "../index-file/index-file";
import { FileFormat } from "../index-file/meta";
import { QueryBuilder } from "./query-builder";
import { validateQuery } from "./query-validation";

export type Schema = {
[key: string]: {};
Expand All @@ -29,19 +28,23 @@ export type Query<T extends Schema> = {
};

export enum FieldType {
String = 1 << 0,
Number = 1 << 1,
Boolean = 1 << 4,
Null = 1 << 5,
String = 0,
Int64 = 1,
Uint64 = 2,
Float64 = 3,
Object = 4,
Array = 5,
Boolean = 6,
Null = 7,
}

function parseIgnoringSuffix(
x: string,
format: FormatType,
format: FileFormat,
headerFields: string[]
) {
switch (format) {
case FormatType.Jsonl:
case FileFormat.JSONL:
try {
return JSON.parse(x);
} catch (error) {
Expand All @@ -55,7 +58,7 @@ function parseIgnoringSuffix(
console.log(JSON.parse(x));
return JSON.parse(x);

case FormatType.Csv:
case FileFormat.CSV:
const fields = x.split(",");

if (fields.length === 2) {
Expand Down Expand Up @@ -125,16 +128,14 @@ function cmp(a: any, b: any) {
export class Database<T extends Schema> {
private constructor(
private dataFile: DataFile,
private indexFile: VersionedIndexFile<T>,
private formatType: FormatType
private indexFile: VersionedIndexFile<T>
) {}

static forDataFileAndIndexFile<T extends Schema>(
dataFile: DataFile,
indexFile: VersionedIndexFile<T>,
format: FormatType
indexFile: VersionedIndexFile<T>
) {
return new Database(dataFile, indexFile, format);
return new Database(dataFile, indexFile);
}

async fields() {
Expand Down
22 changes: 11 additions & 11 deletions src/db/query-validation.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { IndexMeta } from "../index-file/meta";
import { IndexHeader, IndexMeta } from "../index-file/meta";
import {
FieldType,
OrderBy,
Expand All @@ -16,8 +16,8 @@ import {
* @param {FieldType} singleType - The specific type to check for within the compositeType.
* @returns {boolean} - Returns true if singleType is included in compositeType, false otherwise.
*/
function containsType(compositeType: bigint, singleType: FieldType): boolean {
return (compositeType & BigInt(singleType)) !== BigInt(0);
function containsType(compositeType: number[], singleType: number): boolean {
return compositeType.includes(singleType);
}

/**
Expand All @@ -29,7 +29,7 @@ function containsType(compositeType: bigint, singleType: FieldType): boolean {
*/
function validateWhere<T extends Schema>(
where: WhereNode<T>[] | undefined,
headers: IndexMeta[]
headers: IndexHeader[]
): void {
if (!where || !Array.isArray(where) || where.length === 0) {
throw new Error("Missing 'where' clause.");
Expand Down Expand Up @@ -57,27 +57,27 @@ function validateWhere<T extends Schema>(
throw new Error("'value' in 'where' clause is missing.");
}

const headerType = header.fieldType;
const headerType = header.fieldTypes;

if (whereNode.value === null) {
if (!containsType(headerType, FieldType.Null)) {
if (!containsType(headerType, 7)) {
throw new Error(`'key: ${whereNode.key} does not have type: null.`);
}
} else {
function fieldTypeError(
key: string,
actual: FieldType,
expected: bigint
expected: number[]
): string {
return `key: ${key} does not have type: ${actual}. Expected: ${expected}`;
}

switch (typeof whereNode.value) {
case "bigint":
case "number":
if (!containsType(headerType, FieldType.Number)) {
if (!containsType(headerType, FieldType.Int64)) {
throw new Error(
fieldTypeError(whereNode.key, FieldType.Number, headerType)
fieldTypeError(whereNode.key, FieldType.Int64, headerType)
);
}
break;
Expand Down Expand Up @@ -141,7 +141,7 @@ function validateOrderBy<T extends Schema>(
*/
function validateSelect<T extends Schema>(
select: SelectField<T>[] | undefined,
headers: IndexMeta[]
headers: IndexHeader[]
): void {
if (select) {
if (!Array.isArray(select) || select.length === 0) {
Expand Down Expand Up @@ -171,7 +171,7 @@ function validateSelect<T extends Schema>(
*/
export async function validateQuery<T extends Schema>(
query: Query<T>,
headers: IndexMeta[]
headers: IndexHeader[]
): Promise<void> {
validateWhere(query.where, headers);
validateOrderBy(query.orderBy, query.where![0].key as string);
Expand Down
Loading
Loading