From ede5f9450b1fae8b007bd2edcb1c0983a08ac898 Mon Sep 17 00:00:00 2001 From: Evert Pot Date: Sat, 27 Jan 2024 19:22:57 -0500 Subject: [PATCH 1/4] Displaystring work. Currently very incomplete, but tests are running. --- src/displaystring.ts | 16 ++++++++++++++++ src/index.ts | 1 + src/parser.ts | 30 ++++++++++++++++++++++++++++++ src/serializer.ts | 8 ++++++++ src/types.ts | 3 ++- test/httpwg-tests.js | 11 ++++++++++- 6 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 src/displaystring.ts diff --git a/src/displaystring.ts b/src/displaystring.ts new file mode 100644 index 0000000..fac2c58 --- /dev/null +++ b/src/displaystring.ts @@ -0,0 +1,16 @@ +export class DisplayString { + + private value: string; + constructor(value: string) { + + this.value = value; + + } + + toString(): string { + + return this.value; + + } + +} diff --git a/src/index.ts b/src/index.ts index 1bed8df..fa4a463 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,3 +3,4 @@ export * from './parser'; export * from './types'; export * from './util'; export { Token } from './token'; +export { DisplayString } from './displaystring'; diff --git a/src/parser.ts b/src/parser.ts index 9e454f5..d34e99c 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -11,6 +11,7 @@ import { import { Token } from './token'; import { isAscii } from './util'; +import { DisplayString } from './displaystring'; export function parseDictionary(input: string): Dictionary { @@ -185,6 +186,9 @@ export default class Parser { if (char === '@') { return this.parseDate(); } + if (char === '%') { + return this.parseDisplayString(); + } throw new ParseError(this.pos, 'Unexpected input'); } @@ -299,6 +303,32 @@ export default class Parser { } + private parseDisplayString(): DisplayString { + + this.expectChar('%'); + this.pos++; + this.expectChar('"'); + this.pos++; + + let result = new Uint8Array(); + + while (!this.eof()) { + + const char = this.getChar(); + if (char.charCodeAt(0) <= 0x1F || (char.charCodeAt(0) >= 0x7F && char.charCodeAt(0) <= 0xFF)) { + throw new ParseError('Invalid byte found at offset: ' + this.pos); + } + + if (char==='%') { + const hexChars = this.input.substr(this.pos,2); + if (/^[0-9a-f]{2}$/.test(hexChars)) { + + } + + return new DisplayString(result); + + } + private parseToken(): Token { // The specification wants this check, but it's an unreachable code block. diff --git a/src/serializer.ts b/src/serializer.ts index 4c6ac77..da31764 100644 --- a/src/serializer.ts +++ b/src/serializer.ts @@ -11,6 +11,7 @@ import { import { Token } from './token'; import { isAscii, isInnerList, isValidKeyStr } from './util'; +import { DisplayString } from './displaystring'; export class SerializeError extends Error {} @@ -80,6 +81,9 @@ export function serializeBareItem(input: BareItem): string { if (input instanceof ByteSequence) { return serializeByteSequence(input); } + if (input instanceof DisplayString) { + return serializeDisplayString(input); + } if (input instanceof Date) { return serializeDate(input); } @@ -114,6 +118,10 @@ export function serializeString(input: string): string { return `"${input.replace(/("|\\)/g, (v) => '\\' + v)}"`; } +export function serializeDisplayString(input: DisplayString): string { + return '%' + serializeString(input.toString()); +} + export function serializeBoolean(input: boolean): string { return input ? '?1' : '?0'; } diff --git a/src/types.ts b/src/types.ts index 7f293a5..cba19a9 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,4 +1,5 @@ import { Token } from './token'; +import { DisplayString } from './displaystring'; /** * Lists are arrays of zero or more members, each of which can be an Item @@ -47,6 +48,6 @@ export class ByteSequence { } -export type BareItem = number | string | Token | ByteSequence | Date | boolean; +export type BareItem = number | string | Token | ByteSequence | Date | boolean | DisplayString; export type Item = [BareItem, Parameters]; diff --git a/test/httpwg-tests.js b/test/httpwg-tests.js index b925a23..dbaea4c 100644 --- a/test/httpwg-tests.js +++ b/test/httpwg-tests.js @@ -10,7 +10,7 @@ const { ParseError, } = require('../dist'); -const { Token, ByteSequence } = require('../dist'); +const { Token, ByteSequence, DisplayString } = require('../dist'); const base32Encode = require('base32-encode'); const base32Decode = require('base32-decode'); const fs = require('fs'); @@ -24,6 +24,7 @@ describe('HTTP-WG tests', () => { 'string', 'token', 'date', + 'display-string', 'item', @@ -284,6 +285,12 @@ function packTestValue(input) { value: input.toString() } } + if(input instanceof DisplayString) { + return { + __type: 'displaystring', + value: input.toString() + } + } if (input instanceof ByteSequence) { return { __type: 'binary', @@ -340,6 +347,8 @@ function unpackTestValue(input) { return new ByteSequence(Buffer.from(base32Decode(input.value, 'RFC4648')).toString('base64')); case 'date' : return new Date(input.value * 1000); + case 'displaystring' : + return new DisplayString(input.value); default: throw new Error('Unknown input __type: ' + input.__type); } From 1789906ea4f53c7ccc1434590e35019d7d5e505e Mon Sep 17 00:00:00 2001 From: Evert Pot Date: Sat, 27 Jan 2024 22:49:38 -0500 Subject: [PATCH 2/4] Display String support --- changelog.md | 1 + src/parser.ts | 47 +++++++++++++++++++++++++++++++++++++---------- src/serializer.ts | 16 +++++++++++++++- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/changelog.md b/changelog.md index e068d11..a5accb5 100644 --- a/changelog.md +++ b/changelog.md @@ -5,6 +5,7 @@ ChangeLog ------------------ * Support for a new 'Date' type, from draft [draft-ietf-httpbis-sfbis-02][7]. +* Support for the "Display String" type. 1.0.1 (2023-08-03) diff --git a/src/parser.ts b/src/parser.ts index d34e99c..901ec13 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -305,27 +305,44 @@ export default class Parser { private parseDisplayString(): DisplayString { - this.expectChar('%'); - this.pos++; - this.expectChar('"'); - this.pos++; + const chars = this.getChars(2); + if (chars !== '%"') { + throw new ParseError(this.pos, 'Unexpected character. Display strings should start with %='); + } - let result = new Uint8Array(); + const result:number[] = []; while (!this.eof()) { const char = this.getChar(); if (char.charCodeAt(0) <= 0x1F || (char.charCodeAt(0) >= 0x7F && char.charCodeAt(0) <= 0xFF)) { - throw new ParseError('Invalid byte found at offset: ' + this.pos); + throw new ParseError(this.pos, 'Invalid char found in DisplayString. Did you forget to escape?'); } if (char==='%') { - const hexChars = this.input.substr(this.pos,2); + const hexChars = this.getChars(2); if (/^[0-9a-f]{2}$/.test(hexChars)) { - + result.push(parseInt(hexChars, 16)); + } else { + throw new ParseError(this.pos, `Unexpected sequence after % in DispalyString: "${hexChars}". Note that hexidecimals must be lowercase`); + } + continue; + } + if (char==='"') { + const textDecoder = new TextDecoder('utf-8', { + fatal: true + }); + try { + return new DisplayString( + textDecoder.decode(new Uint8Array(result)) + ); + } catch (err) { + throw new ParseError(this.pos, 'Fatal error decoding UTF-8 sequence in Display String'); + } + } + result.push(char.charCodeAt(0)); } - - return new DisplayString(result); + throw new ParseError(this.pos, 'Unexpected end of input'); } @@ -459,6 +476,16 @@ export default class Parser { return this.input[this.pos++]; + } + private getChars(count: number): string { + + const result = this.input.substr( + this.pos, + count + ); + this.pos += count; + return result; + } private eof():boolean { diff --git a/src/serializer.ts b/src/serializer.ts index da31764..5c2f19f 100644 --- a/src/serializer.ts +++ b/src/serializer.ts @@ -119,7 +119,21 @@ export function serializeString(input: string): string { } export function serializeDisplayString(input: DisplayString): string { - return '%' + serializeString(input.toString()); + let out = '%"'; + const textEncoder = new TextEncoder(); + for (const char of textEncoder.encode(input.toString())) { + if ( + char === 0x25 // % + || char === 0x22 // " + || char <= 0x1f + || char >= 0x7f + ) { + out += '%' + char.toString(16); + } else { + out += String.fromCharCode(char); + } + } + return out + '"'; } export function serializeBoolean(input: boolean): string { From 114d7f16bfd955036c20511cf6042419d1791ffd Mon Sep 17 00:00:00 2001 From: Evert Pot Date: Sat, 27 Jan 2024 22:54:48 -0500 Subject: [PATCH 3/4] LINT --- src/parser.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 901ec13..d3d1cb1 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -333,9 +333,9 @@ export default class Parser { fatal: true }); try { - return new DisplayString( - textDecoder.decode(new Uint8Array(result)) - ); + return new DisplayString( + textDecoder.decode(new Uint8Array(result)) + ); } catch (err) { throw new ParseError(this.pos, 'Fatal error decoding UTF-8 sequence in Display String'); } From efe24c76e4ce2b1f96a17499c57625ecb0a53968 Mon Sep 17 00:00:00 2001 From: Evert Pot Date: Sat, 27 Jan 2024 23:03:45 -0500 Subject: [PATCH 4/4] Update docs --- readme.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/readme.md b/readme.md index 06d81a5..6ac2dc8 100644 --- a/readme.md +++ b/readme.md @@ -59,12 +59,17 @@ The following are examples of `item` headers: Parsed as string ``` -# Parsed as string +# Parsed an ASCII string Header: "foo" # A simple string, called a 'Token' in the spec Header: foo +# A Unicode string, called a 'Display String' in the spec. They use +# percent encoding, but encode a different set of characters than +# URLs. +Header %"Frysl%C3%A2n" + # Parsed as number Header: 5 Header: -10 @@ -84,7 +89,6 @@ Header: "Hello world"; a="5" Header: @1686634251 ``` - To parse these header values, use the `parseItem`: ```typescript @@ -217,6 +221,9 @@ serializeItem([5.5, new Map()]); // Returns "hello world" serializeItem(["hello world", new Map()]); +// Returns %"Frysl%C3%A2n" +serializeItem(["Fryslân", new Map()]); + // Returns ?1 serializeItem([true, new Map()]);