diff --git a/changelog.md b/changelog.md index cdeab2f..26f2b8c 100644 --- a/changelog.md +++ b/changelog.md @@ -5,6 +5,7 @@ ChangeLog ------------------ * Support for a new 'Date' type, from draft [draft-ietf-httpbis-sfbis-02][7]. +* Support for the "Display String" type. * Now requires Node 18. diff --git a/readme.md b/readme.md index 06d81a5..6ac2dc8 100644 --- a/readme.md +++ b/readme.md @@ -59,12 +59,17 @@ The following are examples of `item` headers: Parsed as string ``` -# Parsed as string +# Parsed an ASCII string Header: "foo" # A simple string, called a 'Token' in the spec Header: foo +# A Unicode string, called a 'Display String' in the spec. They use +# percent encoding, but encode a different set of characters than +# URLs. +Header %"Frysl%C3%A2n" + # Parsed as number Header: 5 Header: -10 @@ -84,7 +89,6 @@ Header: "Hello world"; a="5" Header: @1686634251 ``` - To parse these header values, use the `parseItem`: ```typescript @@ -217,6 +221,9 @@ serializeItem([5.5, new Map()]); // Returns "hello world" serializeItem(["hello world", new Map()]); +// Returns %"Frysl%C3%A2n" +serializeItem(["Fryslân", new Map()]); + // Returns ?1 serializeItem([true, new Map()]); diff --git a/src/displaystring.ts b/src/displaystring.ts new file mode 100644 index 0000000..fac2c58 --- /dev/null +++ b/src/displaystring.ts @@ -0,0 +1,16 @@ +export class DisplayString { + + private value: string; + constructor(value: string) { + + this.value = value; + + } + + toString(): string { + + return this.value; + + } + +} diff --git a/src/index.ts b/src/index.ts index 1bed8df..fa4a463 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,3 +3,4 @@ export * from './parser'; export * from './types'; export * from './util'; export { Token } from './token'; +export { DisplayString } from './displaystring'; diff --git a/src/parser.ts b/src/parser.ts index 9e454f5..d3d1cb1 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -11,6 +11,7 @@ import { import { Token } from './token'; import { isAscii } from './util'; +import { DisplayString } from './displaystring'; export function parseDictionary(input: string): Dictionary { @@ -185,6 +186,9 @@ export default class Parser { if (char === '@') { return this.parseDate(); } + if (char === '%') { + return this.parseDisplayString(); + } throw new ParseError(this.pos, 'Unexpected input'); } @@ -299,6 +303,49 @@ export default class Parser { } + private parseDisplayString(): DisplayString { + + const chars = this.getChars(2); + if (chars !== '%"') { + throw new ParseError(this.pos, 'Unexpected character. Display strings should start with %='); + } + + const result:number[] = []; + + while (!this.eof()) { + + const char = this.getChar(); + if (char.charCodeAt(0) <= 0x1F || (char.charCodeAt(0) >= 0x7F && char.charCodeAt(0) <= 0xFF)) { + throw new ParseError(this.pos, 'Invalid char found in DisplayString. Did you forget to escape?'); + } + + if (char==='%') { + const hexChars = this.getChars(2); + if (/^[0-9a-f]{2}$/.test(hexChars)) { + result.push(parseInt(hexChars, 16)); + } else { + throw new ParseError(this.pos, `Unexpected sequence after % in DispalyString: "${hexChars}". Note that hexidecimals must be lowercase`); + } + continue; + } + if (char==='"') { + const textDecoder = new TextDecoder('utf-8', { + fatal: true + }); + try { + return new DisplayString( + textDecoder.decode(new Uint8Array(result)) + ); + } catch (err) { + throw new ParseError(this.pos, 'Fatal error decoding UTF-8 sequence in Display String'); + } + } + result.push(char.charCodeAt(0)); + } + throw new ParseError(this.pos, 'Unexpected end of input'); + + } + private parseToken(): Token { // The specification wants this check, but it's an unreachable code block. @@ -429,6 +476,16 @@ export default class Parser { return this.input[this.pos++]; + } + private getChars(count: number): string { + + const result = this.input.substr( + this.pos, + count + ); + this.pos += count; + return result; + } private eof():boolean { diff --git a/src/serializer.ts b/src/serializer.ts index 4c6ac77..5c2f19f 100644 --- a/src/serializer.ts +++ b/src/serializer.ts @@ -11,6 +11,7 @@ import { import { Token } from './token'; import { isAscii, isInnerList, isValidKeyStr } from './util'; +import { DisplayString } from './displaystring'; export class SerializeError extends Error {} @@ -80,6 +81,9 @@ export function serializeBareItem(input: BareItem): string { if (input instanceof ByteSequence) { return serializeByteSequence(input); } + if (input instanceof DisplayString) { + return serializeDisplayString(input); + } if (input instanceof Date) { return serializeDate(input); } @@ -114,6 +118,24 @@ export function serializeString(input: string): string { return `"${input.replace(/("|\\)/g, (v) => '\\' + v)}"`; } +export function serializeDisplayString(input: DisplayString): string { + let out = '%"'; + const textEncoder = new TextEncoder(); + for (const char of textEncoder.encode(input.toString())) { + if ( + char === 0x25 // % + || char === 0x22 // " + || char <= 0x1f + || char >= 0x7f + ) { + out += '%' + char.toString(16); + } else { + out += String.fromCharCode(char); + } + } + return out + '"'; +} + export function serializeBoolean(input: boolean): string { return input ? '?1' : '?0'; } diff --git a/src/types.ts b/src/types.ts index 7f293a5..cba19a9 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,4 +1,5 @@ import { Token } from './token'; +import { DisplayString } from './displaystring'; /** * Lists are arrays of zero or more members, each of which can be an Item @@ -47,6 +48,6 @@ export class ByteSequence { } -export type BareItem = number | string | Token | ByteSequence | Date | boolean; +export type BareItem = number | string | Token | ByteSequence | Date | boolean | DisplayString; export type Item = [BareItem, Parameters]; diff --git a/test/httpwg-tests.js b/test/httpwg-tests.js index b925a23..dbaea4c 100644 --- a/test/httpwg-tests.js +++ b/test/httpwg-tests.js @@ -10,7 +10,7 @@ const { ParseError, } = require('../dist'); -const { Token, ByteSequence } = require('../dist'); +const { Token, ByteSequence, DisplayString } = require('../dist'); const base32Encode = require('base32-encode'); const base32Decode = require('base32-decode'); const fs = require('fs'); @@ -24,6 +24,7 @@ describe('HTTP-WG tests', () => { 'string', 'token', 'date', + 'display-string', 'item', @@ -284,6 +285,12 @@ function packTestValue(input) { value: input.toString() } } + if(input instanceof DisplayString) { + return { + __type: 'displaystring', + value: input.toString() + } + } if (input instanceof ByteSequence) { return { __type: 'binary', @@ -340,6 +347,8 @@ function unpackTestValue(input) { return new ByteSequence(Buffer.from(base32Decode(input.value, 'RFC4648')).toString('base64')); case 'date' : return new Date(input.value * 1000); + case 'displaystring' : + return new DisplayString(input.value); default: throw new Error('Unknown input __type: ' + input.__type); }