forked from w3ctag/packaging-on-the-web
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
921 lines (867 loc) · 50.5 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta content="width=device-width,initial-scale=1" name="viewport" />
<title>Packaging on the Web</title>
<script class="remove" src="//www.w3.org/Tools/respec/respec-w3c-common">
</script>
<script class="remove">
var respecConfig = {
localBiblio: {
"OCF": "James Pritchett; Markus Gylling. <a href=\"http://www.idpf.org/epub/30/spec/epub30-ocf-20111011.html\"><cite>EPUB Open Container Format (OCF) 3.0</cite></a> 11 October 2011. International Digital Publishing Forum Recommended Specification. URL: <a href=\"http://www.idpf.org/epub/30/spec/epub30-ocf-20111011.html\">http://www.idpf.org/epub/30/spec/epub30-ocf-20111011.html</a>"
},
specStatus: "ED",
shortName: "web-packaging",
//publishDate: "2014-03-27",
// previousPublishDate: "2014-03-27",
// previousMaturity: "FPWD",
// previousURI: "http://www.w3.org/TR/2014/WD-tabular-data-model-20140327/",
edDraftURI: "http://w3ctag.github.io/packaging-on-the-web/",
// lcEnd: "3000-01-01",
// crEnd: "3000-01-01",
editors: [{
name: "Jeni Tennison",
company: "Open Data Institute",
companyURL: "http://theodi.org/"
}],
wg: "Technical Architecture Group",
wgURI: "http://www.w3.org/2001/tag/",
wgPublicList: "www-tag",
wgPatentURI: "http://www.w3.org/2001/tag/disclosures",
otherLinks: [{
key: "Repository",
data: [{
value: "We are on Github.",
href: "https://github.com/w3ctag/packaging-on-the-web"
}, {
value: "File a bug.",
href: "https://github.com/w3ctag/packaging-on-the-web/issues"
}, {
value: "Commit history.",
href: "https://github.com/w3ctag/packaging-on-the-web/commits/gh-pages"
}
]
}
],
inlineCSS: true,
noIDLIn: true,
noLegacyStyle: false
};
</script>
</head>
<body>
<section id="abstract">
<p>
This document describes an approach for creating packages of files for use on the web. The approach is to package them using a new <code>application/package</code> media type. To access packages related to other files on the web, clients that understand packages of files look for a <code>Link</code> header or (in HTML documents) a <code><link></code> element with a new link relation of <code>package</code>. Other formats may define format-specific mechanisms for locating related packages.
</p>
</section>
<section id="sotd">
<p>
The Technical Architecture Group has put together this draft for comment and to indicate future direction. It is likely this draft will be handed on to a task force or working group to go through the rest of the process.
</p>
<p>
The technical proposals within this draft (namely new media type and link relation) will eventually require review and registration at IETF. That process has not started yet.
</p>
</section>
<section id="intro">
<h2>Introduction</h2>
<p>
Today's websites rely on multiple files. If a user visits <a href="http://www.bbc.co.uk/">http://www.bbc.co.uk/</a> they will need to download about 160 files to view the page in its entirety. The HTML page they download at <a href="http://www.bbc.co.uk/">http://www.bbc.co.uk/</a> contains references to stylesheets, scripts, images and other files, each of which may contain references to further files themselves. Many of these resources will themselves have been originally developed as separate resources and merged to avoid the client having to make multiple requests: CSS and Javascript files are concatenated together; images are merged and used with CSS spriting. These concatenation and spriting techniques are rudimentary forms of packaging for performance.
</p>
<p>
The web is becoming more modular. <a href="http://www.w3.org/TR/components-intro/">Web components</a> [[components-intro]] provide developers with the means to create reusable components, such as date pickers, carousels or tab layouts, that can be reused in other web pages. These components are defined through HTML, Javascript and CSS but may also require other resources such as images and data to operate. Meanwhile <a href="http://people.mozilla.org/~jorendorff/es6-draft.html#sec-modules">ES6 Modules</a> [[ECMASCRIPT]] will result in smaller, self-contained and cross-referencing Javascript files. This modularity requires packaging for ease of distribution.
</p>
<p>
Many websites are no longer simple providers of content: they are applications that use web technologies to deliver their user interface. Initiatives such as <a href="http://www.mozilla.org/en-US/firefox/os/">Firefox OS</a> and <a href="http://www.chromium.org/chromium-os">Chrome OS</a> demonstrate the potential of trusted, installable applications built with web technologies. To be used in this way, applications must be self-contained packages of resources that can be tested and signed.
</p>
<p>
Packages of content and data are not only useful for web applications. <a href="http://www.w3.org/dpub/">Digital publishing</a> requires the packaging of content, stylesheets, fonts and media. Data publication typically requires data and metadata to be packaged together. Archiving requires the packaging of whole websites into dated bundles.
</p>
<p>
There are existing and upcoming solutions to these problems. On the performance side, <a href="http://tools.ietf.org/html/draft-ietf-httpbis-http2/">HTTP/2</a> includes the facility to push multiple representations to clients as the result of a single request. Providing packages is orthogonal to the use of HTTP/2. Developers who cannot yet use HTTP/2 may find that using packages can provide performance benefits through reducing numbers of requests. All developers will benefit from being able to package modules and components in ways that make them easy to deliver and reuse.
</p>
<p>
There are also a plethora of packaging formats, such as:
</p>
<ul>
<li><a href="http://www.w3.org/TR/widgets/">Packaged Web Apps (Widgets)</a></li>
<li><a href="http://www.idpf.org/epub3/latest/ocf">Open Container Format</a></li>
<li><a href="https://developers.google.com/public-data/">Dataset Publishing Language</a></li>
<li><a href="http://dataprotocols.org/tabular-data-package/">Tabular Data Package</a></li>
<li><a href="http://maf.mozdev.org/maff-specification.html">Mozilla Archive Format</a></li>
<li><a href="http://tools.ietf.org/html/rfc2557">MHTML (RFC 2557)</a></li>
<li><a href="http://en.wikipedia.org/wiki/Webarchive">Webarchive format</a></li>
<li><a href="http://www.digitalpreservation.gov/formats/fdd/fdd000236.shtml">WARC</a></li>
</ul>
<p>
Most of these formats are based on zips, which have three main drawbacks:
</p>
<ul>
<li>
<strong>Zips are streamable, but it's a bit of a hack.</strong> The <em>central directory record</em>, which lists the valid files within the zip archive, appears at the <em>end</em> of the zip. It's possible to use per-Entry file names, but conflicts with the central directory record are possible and the cdr is authoritative. Streaming zip files commonly might exascerbate the potential for mismatched file entries causing different visible behavior between consumers of packages.
</li>
<li>
<strong>Zips are hard to create well.</strong> Ordering of files within packages can be important. But the typical method to create a zip is simply to use the built-in operating system facility to do so, which gives no control over this ordering.
</li>
<li>
<strong>Zips have restricted metadata.</strong> While there are mechanisms for supplying additional information about individual files within the package (through <em>extra fields</em>), they are not sufficient for extended metadata. Each extra field is a 2-byte ID code with a 2-byte value. The list of valid core and extended ID codes are provided within section 4.5 and 4.6 of the <a href="http://www.pkware.com/documents/casestudies/APPNOTE.TXT">zip definition</a>. The file header within the zip, which includes these extra fields, must not exceed 64k in size.
</li>
</ul>
<p>
This document specifies a new format for packages which does not have these drawbacks: the Streamable Package Format defined in <a class="sectionRef" href="#streamable-package-format"></a>. It defines a <code>package</code> link relation to point to packages from other resources in <a class="sectionRef" href="#package-link-relation"></a>. But first, in <a class="sectionRef" href="#using-packages"></a>, this document describes patterns of use for packages that address the requirements described in this introduction.
</p>
</section>
<section class="informative">
<h2>Using Packages</h2>
<p>
This section demonstrates how package-aware applications could use packages.
</p>
<section>
<h3>Populating Caches</h3>
<p>
Packages can be used to populate caches associated with multiple URLs without making multiple requests. This may be beneficial in some cases to avoid the latency involved in making multiple connections, particularly for servers and clients that don't support SPDY or HTTP/2.
</p>
<p class="note">
Packaging can also damage performance: packages may be bloated by more resources than are actually necessary for the requested page, or may be badly ordered such that resources that are required early in the display of a web page are provided late in a package. Packaging is not a silver bullet for performance issues.
</p>
<section>
<h4>Example Scenario</h4>
<p>
In this scenario, a browser is pointed at the page <code>http://example.org/shop</code> which returns the HTML:
</p>
<pre class="example highlight">
<html>
<head>
<link rel="package" href="/lib/brand.pack" scope="/lib/brand/">
<link rel="stylesheet" href="/lib/brand/main.css">
...
</head>
...
</html>
</pre>
<p>
As it starts to receive the page, the browser starts to send off requests for other resources. It recognises the <code>rel="package"</code> links (described in <a href="#package-link-relation" class="sectionRef"></a>) as meaning that the files referenced by those links contain useful resources for the display of the page. It starts to download them, taking note of their scope (described in <a href="#package-scope" class="sectionRef"></a>).
</p>
<p>
When the browser comes to download the resource <code>http://example.org/lib/brand/style.css</code>, it checks to see if that resource is included in the scope of a package. In this case, the URL starts with <code>http://example.org/lib/brand/</code> so the browser can guess that it is included in the <code>http://example.org/lib/brand.pack</code> package and not make a separate request for the stylesheet.
</p>
<p>
The <code>http://example.org/lib/brand.pack</code> package contains the main CSS stylesheet, but also font files and images that are used as background images within the CSS:
</p>
<pre class="example">
Content-Location: http://example.org/lib/brand.pack
Expires: Mon, 07 Apr 2014 00:00:00 GMT
--mqt84qpfq8xf3qpmdicdma;
Content-Location: brand/main.css
Content-Type: text/css
@font-face {
font-family: 'Shop';
src: url('shop.woff') format('woff');
}
body {
background-image: url('images/bg.jpg') no-repeat center center fixed;
...
}
--mqt84qpfq8xf3qpmdicdma;
Content-Location: brand/shop.woff
Content-Type: application/font-woff
Expires: Mon, 06 Apr 2015 00:00:00 GMT
...
--mqt84qpfq8xf3qpmdicdma;
Content-Location: brand/images/bg.jpg
Content-Type: image/jpeg
Expires: Mon, 06 Apr 2015 00:00:00 GMT
...
--mqt84qpfq8xf3qpmdicdma;--
</pre>
<p>
As the package is loaded, the browser cache is populated with the content of <code>http://example.org/lib/brand/main.css</code>, <code>http://example.org/lib/brand/shop.woff</code> and <code>http://example.org/lib/brand/images/bg.jpg</code>. Because the package is streamed, the browser can use the CSS straight away, but the supporting resources are loaded rapidly after.
</p>
<p>
A couple of days later, on revisiting the page, the browser notices that its cached copy of <code>http://example.org/lib/brand/main.css</code> has expired. It requests that CSS file separately to update its cache. There is no need to request the font or the background image as these haven't expired.
</p>
</section>
<p class="issue">
If we progress further with the <code>2NN Contents of Related</code> status code, it would be good to add a scenario that shows that in action.
</p>
<section>
<h4>Guidelines</h4>
<p>
Package-aware caches SHOULD populate their caches using packages that are indicated through:
</p>
<ul>
<li>a <code>rel=package</code> <code>Link</code> HTTP header in the response to a request</li>
<li>a <code>rel=package</code> <code><link></code> tag in an HTML document</li>
</ul>
<p>
The <code>rel=package</code> link relation is described in <a href="#package-link-relation" class="sectionRef"></a>.
</p>
<p>
Package-aware caches SHOULD delay requesting resources within the <a title="package scope">scope</a> of a package that they are downloading, until the package has been processed.
</p>
<p>
If a package contains another package that is recognised by the package-aware cache, that package SHOULD be used to populate the cache in the same way.
</p>
<p>
If a package is in the Streamable Package Format (described in <a href="#streamable-package-format" class="sectionRef"></a>), package-aware caches SHOULD determine the cachability and other characteristics of each <a>part</a> of the package by examining (in order of priority):
</p>
<ol>
<li>the <a>header</a> of the part</li>
<li>the <a>package header</a></li>
<li>the HTTP headers in the response for the package</li>
</ol>
<p>
If a package is in the Streamable Package Format, package-aware caches should be aware that there may be multiple <a title="part">parts</a> with the same <a>part URL</a> but with different types or languages. Caches should be populated with attention paid to the <code>Vary</code> header to determine which headers are significant. Package-aware caches SHOULD only use the first <a>part</a> with the same content-negotiated characteristics.
</p>
</section>
</section>
<section>
<h3>Installing Web Applications</h3>
<p>
Packages can be used to provide installable web applications, both within marketplaces and simply on the web.
</p>
<section>
<h3>Example Scenario</h3>
<p>
In this scenario, a browser is pointed at <code>https://editor.example.com/</code>. It fetches the home page of the site, and sees:
</p>
<pre class="example highlight">
<html>
<head>
<link rel="package" href="/editor.zip" scope="/" type="application/widget">
...
</head>
...
</html>
</pre>
<p>
The browser recognises that there is a <code>rel=package</code> link in the HTML page whose scope is the entirety of the website, and whose type is a package format that it recognises. It pops up a dialog to ask the user whether they would like to install the application locally, and downloads and installs it if the user agrees.
</p>
</section>
<section>
<h3>Guidelines</h3>
<p>
Package-aware installers SHOULD prompt for the installation of packages that are indicated through:
</p>
<ul>
<li>a <code>rel=package</code> <code>Link</code> HTTP header in the response to a request</li>
<li>a <code>rel=package</code> <code><link></code> tag in an HTML document</li>
</ul>
<p>
where:
</p>
<ol>
<li>the <a title="package scope">package scope</a> is the same as the request URL</li>
<li>the media type of the package is recognised by the installer</li>
</ol>
<p>
The <code>rel=package</code> link relation is described in <a href="#package-link-relation" class="sectionRef"></a>.
</p>
<p>
If a package is in the Streamable Package Format (described in <a href="#streamable-package-format" class="sectionRef"></a>), it SHOULD include a <code>Link</code> header in the <a>package header</a> with <code>rel=describedby</code> to explicitly point to the resource within the package that describes the package. Alternatively, it SHOULD use a <code>rel=start</code> or <code>rel=first</code> to point to the resource which is the starting point for the application (ie the initial page to load). Otherwise, the first <a>part</a> of the package should be a suitable starting point.
</p>
</section>
</section>
<section>
<h3>Distributing Code Libraries</h3>
<p>
Packages can be used to distribute code libraries that are made up of multiple components (modules). For this to work smoothly, they need to be recognised by package management systems such as <a href="http://bower.io/">Bower</a>.
</p>
<section>
<h3>Example Scenario</h3>
<p>
A developer wants to use a time-handling Javascript package within their own code. They point their package management system at the location of the Javascript package, <code>https://example.org/time.pack</code>. The package management system retrieves the package:
</p>
<pre class="example">
Content-Type: application/package
Link: <component.json>; rel=describedby
--p993fj39qp3aj8pjacda
Content-Location: component.json
Content-Type: application/json
... <em>package definition</em> ...
--p993fj39qp3aj8pjacda
Content-Location: time.js
Content-Type: text/javascript
Link: <locale.pack>; rel=package; scope=locale/
... <em>main Javascript</em> ...
--p993fj39qp3aj8pjacda
Content-Location: locale.pack
Content-Type: application/package
... <em>package of locale-specific data</em> ...
--p993fj39qp3aj8pjacda--
</pre>
<p>
It unpacks the package into the relevant directory on the developer's machine, but also includes the package itself in the developer's file system so that the package can be referenced by the developer in the website itself:
</p>
<pre class="example highlight">
<html>
<head>
<link rel="package" href="/scripts/utils/time.pack" scope="/scripts/utils/time" type="application/package">
<script src="/scripts/utils/time.js"></script>
...
</head>
...
</html>
</pre>
</section>
<section>
<h3>Guidelines</h3>
<p>
Package managers that handle Streamable Package Format packages SHOULD unpack nested packages; these indicate implicit dependencies within the package.
</p>
<p>
Creators of Streamable Package Format packages for distribution SHOULD NOT include a <code>Content-Location</code> header in the <a>package header</a> as this prevents them from being reused in other locations.
</p>
</section>
</section>
<section>
<h3>Downloading Data For Local Processing</h3>
<p>
Data on the web often gains context from other, related, data and documentation. Packages can enable data reusers to quickly get hold of all the relevant data and documentation they need in a single, discoverable, bundle, while simultaneously making that data available directly on the web.
</p>
<p class="note">
This pattern is of particular relevance to the <a href="https://www.w3.org/2013/csvw/wiki/Main_Page">CSV on the Web Working Group</a>.
</p>
<section>
<h3>Example Scenario</h3>
<p>
An online news report references some recent demographic statistics, pointing to <code>http://example.org/stats/imd.csv</code>. When a data scientist happens on the reference, they fire up their data analysis application and points it at the URL. The HTTP response looks like:
</p>
<pre class="example highligh">
HTTP/1.1 200 OK
Content-Location: /stats/imd.csv
Content-Type: text/csv
Link: <imd.pack>; rel=package
... <em>CSV file</em> ...
</pre>
<p>
The CSV file is standard CSV: it contains no metadata that describes the types or meaning of any of the columns that it contains, or about the codes that have been used to identify the geographies that the data covers. Fortunately, the data analysis application is package aware. It recognises the <code>rel=package</code> link and automatically downloads the package, which looks like:
</p>
<pre class="example">
Content-Location: http://example.org/stats/imd.pack
Content-Type: application/package
Link: <datapackage.json>; rel=describedby
--nu0hv0ag70eahoea0e0a
Content-Location: datapackage.json
Content-Type: application/json
... <em>machine-readable description of the package</em> ...
--nu0hv0ag70eahoea0e0a
Content-Location: imd.csv
Content-Type: text/csv
... <em>statistical data</em> ...
--nu0hv0ag70eahoea0e0a
Content-Location: geographies.csv
Content-Type: text/csv
... <em>data about the geographies covered by the statistics</em> ...
--nu0hv0ag70eahoea0e0a
Content-Location: imd.pdf
Content-Type: application/pdf
... <em>human-readable documentation of the data</em> ...
--nu0hv0ag70eahoea0e0a--
</pre>
<p>
The data analysis application uses the information in <code>datapackage.json</code> to handle the types of the values in the original data correctly, and to display them with nice labels. It provides easy access to the linked information from <code>geographies.csv</code> and shows the data scientist the human-readable documentation that has been made available.
</p>
</section>
<section>
<h3>Guidelines</h3>
<p>
Package-aware data analysis software SHOULD follow <code>rel=package</code> links in HTTP headers to access additional information about data that it retrieves by HTTP.
</p>
<p>
Data publishers SHOULD use the <code>Link</code> header to provide packages of information when individual (context-free) data files are requested.
</p>
</section>
</section>
</section>
<section>
<h2>Streamable Package Format</h2>
<p>
This section defines the Streamable Package Format (SPF) which has the media type <code>application/package</code>.
</p>
<p class="note">
The term <dfn>streamable</dfn> is used to denote that processing of package does not mandate keeping all individual parts in memory, both for creation and consumption of the package content.
</p>
<p class="note">
SPF is designed to be consistent with <a href="http://tools.ietf.org/html/rfc2046#section-5.1">multipart media types</a>. However it is not defined as a <code>multipart/*</code> subtype because these types require the inclusion of a <code>boundary</code> parameter in their media type. Setting this parameter is burdensome in situations when the publisher is not able to configure the server. It is also unnecessary as the boundary can be ascertained from the content of the file.
</p>
<p>
An example SPF file is as follows:
</p>
<pre class="example">
--gc0pJq0M:08jU534c0p
Content-Location: /index.html
Content-Type: text/html
<html>
<head>
<script src="/scripts/app.js"></script>
...
</head>
...
</html>
--gc0pJq0M:08jU534c0p
Content-Location: /scripts/app.js
Content-Type: text/javascript
module Math from '/scripts/helpers/math.js';
...
--gc0pJq0M:08jU534c0p
Content-Location: /scripts/helpers/math.js
Content-Type: text/javascript
export function sum(nums) { ... }
...
--gc0pJq0M:08jU534c0p--
</pre>
<p>
This SPF file includes three <a title="part">parts</a> — <code>/index.html</code>, <code>/scripts/app.js</code> and <code>/scripts/helpers/math.js</code> — and uses the <a>boundary</a> <code>gc0pJq0M:08jU534c0p</code>.
</p>
<p>
The general structure of an SPF file is described by the following [[!ABNF]]:
</p>
<pre class="highlight">
spf = [ header CRLF ]
1*( part CRLF )
"--" boundary "--"
header = *( message-header CRLF )
part = "--" boundary CRLF
*( message-header CRLF )
CRLF
[ message-body ]
</pre>
<section>
<h3>Package Header</h3>
<p class="issue">
Should a package include a header for itself? Should it be in the same <code>message-header</code> format as is used elsewhere? This is flexible if somewhat verbose, and enables us to lean on well-known existing methods of expressing metadata rather than inventing a custom format.
</p>
<p>
An SPF file MAY start with an optional <dfn>package header</dfn> that provides metadata about the package itself. The syntax of this header is the same as that of the header within each <a>part</a> of the package. See <a class="sectionRef" href="#parts"></a> for details.
</p>
<p>
An example package that includes a package header is shown below.
</p>
<pre class="example">
Content-Type: application/package
Content-Location: http://example.org/exampleWidget.pack
Link: <config.xml>; rel=describedby
--j38n02qryf9n0eqny8cq0
Content-Location: config.xml
Content-Type: application/xml
<?xml version="1.0" encoding="UTF-8"?>
<widget xmlns = "http://www.w3.org/ns/widgets"
id = "http://example.org/exampleWidget"
version = "2.0 Beta"
height = "200"
width = "200"
viewmodes = "fullscreen">
...
</widget>
... <em>other parts</em> ...
--j38n02qryf9n0eqny8cq0--
</pre>
<p>
The package metadata explicitly:
</p>
<ul>
<li>states that the file is a Streamable Package Format file through the <code>Content-Type</code> header</li>
<li>indicates that the file originally came from <code>http://example.org/widget.pack</code> through the <code>Content-Location</code> header</li>
<li>points to the part within the package the describes the package (the configuration document for the widget) through the <code>Link</code> header</li>
</ul>
</section>
<section>
<h3>Parts</h3>
<p>
A package file is comprised of a number of parts, separated by boundaries. Each <dfn>part</dfn> comprises a <a>header</a> and a <a>body</a>.
</p>
<p>
In ABNF, the definition of <code>message-header</code> and of <code>message-body</code> are taken from [[RFC2616]] and are:
</p>
<pre class="highlight">
message-header = field-name ":" [ field-value ]
field-name = token
field-value = *( field-content | LWS )
field-content = <the OCTETs making up the field-value
and consisting of either *TEXT or combinations
of token, separators, and quoted-string>
message-body = entity-body
| <entity-body encoded as per Transfer-Encoding>
entity-body = *OCTET
</pre>
<p>
The <dfn>body</dfn> of a part can be any binary data. It MUST NOT include the <a>boundary</a> used in the package.
</p>
<p>
The <dfn>header</dfn> of a part follows the same syntax as the header for an HTTP response, and can reuse any HTTP header. This header MUST include a <code>Content-Location</code> header which provides the <dfn>part URL</dfn>. The URL provided by the <code>Content-Location</code> header MUST be a <a href="http://url.spec.whatwg.org/#concept-relative-url">relative URL</a> and MUST NOT be a <a href="http://url.spec.whatwg.org/#concept-scheme-relative-url">scheme-relative URL</a>. The URL is resolved using a <a href="http://url.spec.whatwg.org/#concept-base-url">base URL</a> that is the location of the package. Other URLs used within the part header (for example in <code>Link</code> headers) or in the <a>body</a> of the part (for example within <code>src</code> attributes if the part is an HTML document) MUST be resolved using the part URL as the base URL.
</p>
<p>
Applications that process packages SHOULD ignore parts which do not have a <code>Content-Location</code> header, or whose <code>Content-Location</code> header is not a relative URL or is a scheme-relative URL. Such parts contain information about resources from a different origin than the package itself, which might not be trustworthy.
</p>
<p>
It is possible for multiple parts within a package to have the same <a>part URL</a>, either because they have exactly the same <code>Content-Location</code> header or because their <code>Content-Location</code> headers resolve to the same URL. Parts with the same part URL SHOULD be distinguishable by having different values for other headers commonly used for content negotiation, such as <code>Accept</code> or <code>Accept-Language</code>. When these headers are used to distinguish between parts, they SHOULD be listed within the <code>Vary</code> header for the parts that share the same part URL. For example:
</p>
<pre class="example">
--r 87q0ewahn8o9fqrt0cadkm
Content-Location: /events/extensible-web-summit-2014
Content-Type: text/html
Vary: Accept
... <em>HTML page about the Extensible Web Summit 2014</em> ...
--r 87q0ewahn8o9fqrt0cadkm
Content-Location: /events/extensible-web-summit-2014
Content-Type: text/calendar
Vary: Accept
... <em>iCalendar event for the Extensible Web Summit 2014</em> ...
--r 87q0ewahn8o9fqrt0cadkm--
</pre>
<p>
When a <a>part</a> is itself a Streamable Package Format package, its <a>package header</a> should become the <a title="header">part header</a>. For example:
</p>
<pre class="example">
Content-Location: http://example.org/brand.pack
--uf8eanfueq8yr8x03qyx8fq
Content-Location: brand/main.css
Content-Type: text/css
Link: <brand/font.pack>; rel=package; scope=brand/font/
@font-face {
font-family: 'BrandFont';
src: url('font/brand.woff') format('woff')
url('font/brand.ttf') format('truetype');
}
... <em>rest of the CSS</em> ...
--uf8eanfueq8yr8x03qyx8fq
Content-Location: brand/font.pack
Content-Type: application/package
--j808fd8apfhac;fe9a
Content-Location: font/brand.woff
Content-Type: application/font-woff
... <em>font in WOFF format</em> ...
--j808fd8apfhac;fe9a
Content-Location: font/brand.ttf
Content-Type: application/font-sfnt
... <em>font in Truetype format</em> ...
--j808fd8apfhac;fe9a--
--uf8eanfueq8yr8x03qyx8fq--
</pre>
</section>
<section>
<h3>Boundaries</h3>
<p>
A <dfn>boundary</dfn> is used to separate the <a title="part">parts</a> within the package. The same boundary is used throughout the package. The boundary used within an SPF file MUST NOT appear within the <a>body</a> of any part in the package. The boundary can be identified by an application by taking the string after the initial <code>"--"</code> of the first line that starts with a <code>"--"</code>.
</p>
<p>
The definition of <code>boundary</code> is taken from [[RFC2046]] and is:
</p>
<pre class="highlight">
boundary = 0*69bchars bcharsnospace
bchars = bcharsnospace / " "
bcharsnospace = DIGIT / ALPHA / "'" / "(" / ")" /
"+" / "_" / "," / "-" / "." /
"/" / ":" / "=" / "?"
</pre>
<p>
Note that this places the restriction on the boundary that it must not be more than 70 characters long and it cannot end in a space character.
</p>
</section>
<section>
<h3>Fragment Identifiers</h3>
<p>
The fragment identifier scheme for the <code>application/package</code> media type can be used to identify a <a>part</a> of the package or a fragment of a part.
</p>
<p class="note">
In general, links should be made directly to resources on the web rather than to parts of packages. The particular package(s) that a resource appears in may be an ephemeral phenomenon.
</p>
<p>
The general syntax of a fragment identifier for Streamable Package Format documents is one or more <code><var>param</var>=<var>value</var></code> pairs, separated by semi-colons. In ABNF:
</p>
<pre class="highlight">
fragment = parameter *( ";" parameter )
parameter = ( "url" "=" value )
/ ( "rel" "=" relation-type )
/ ( "type" "=" ( media-type / quoted-mt ) )
/ ( "lang" "=" Language-Tag )
/ ( "fragment" "=" value )
value = quoted-string / string-no-sc
quoted-string = < as defined in [RFC2616] >
string-no-sc = < any string that does not contain a semicolon >
relation-type = < as defined in [RFC5988] >
media-type = < as defined in [RFC5988] >
quoted-mt = < as defined in [RFC5988] >
Language-Tag = < as defined in [RFC5646] >
</pre>
<p>
Note that when used within a URL, some characters within the fragment identifier, such as <code>#</code>, should be escaped.
</p>
<p>
The fragment identifier MUST contain either a <code>url</code> parameter or a <code>rel</code> parameter and MUST NOT contain both of these parameters. These parameters are used to construct an initial list of <var>candidate parts</var> as follows:
</p>
<ol>
<li>
If the <code>url</code> parameter is specified, this is interpreted as a (possibly quoted) URL that is resolved relative to the base URL of the package to create a <var>candidate parts URL</var>. The <var>candidate parts</var> are those parts whose <a>part URL</a> is equal to the <var>candidate parts URL</var>.
</li>
<li>
If the <code>rel</code> parameter is specified, the <a>package header</a> is examined for <code>Link</code> headers whose <code>rel</code> parameter is equal to the <code>rel</code> parameter of the fragment identifier. There may be several such <code>Link</code> headers, which are used to create a list of <var>candidate part URLs</var> by resolving the target URL of each link against the base URL of the package. The <var>candidate parts</var> are those parts whose <a>part URL</a> is equal to any of the <var>candidate part URLs</var>.
</li>
</ol>
<p>
If the <code>type</code> or <code>lang</code> parameters are specified in the fragment identifier, these are used to further narrow down the <var>candidate parts</var>:
</p>
<ol>
<li>
If the <code>type</code> parameter is specified, the <var>candidate parts</var> are filtered down to only those that have a <code>Content-Type</code> header whose media type matches the (possibly quoted) media type provided by the <code>type</code> parameter.
</li>
<li>
If the <code>lang</code> parameter is specified, the <var>candidate parts</var> are further filtered down to only those that have a <code>Content-Language</code> header whose media type matches the language tag provided by the <code>lang</code> parameter.
</li>
</ol>
<p>
If there are no parts in the list of <var>candidate parts</var> then the fragment identifier does not identify any fragment of the package. Otherwise, the identified part is the first part within the list of <var>candidate parts</var>.
</p>
<p>
If the fragment identifier specifies a <code>fragment</code> parameter, the value of that parameter is used to identify a fragment within the identified part, according to the media type for that part (as given by its <code>Content-Type</code> header).
</p>
<p>
For example, the URL:
</p>
<pre class="example">
http://example.org/downloads/editor.pack#url=/root.html;fragment=colophon
</pre>
<p>
refers to an element whose id is <code>colophon</code> within the part of the package whose <a>part URL</a> is <code>http://example.org/root.html</code>. This should be the same as:
</p>
<pre class="example">
http://example.org/root.html#colophon
</pre>
<p>
As an example of using the <code>rel</code> and <code>type</code> parameters, imagine a package like:
</p>
<pre class="example">
Content-Location: http://example.org/downloads/spending.pack
Link: </metadata/spending>; rel=describedby
--ux3pqy9qcpfmqfexptbv43q8p
Content-Location: /metadata/spending
Content-Type: text/turtle
Vary: Accept
... <var>Metadata about the package in Turtle format</var> ...
--ux3pqy9qcpfmqfexptbv43q8p
Content-Location: /metadata/spending
Content-Type: application/ld+json
Vary: Accept
... <var>Metadata about the package in JSON-LD format</var> ...
--ux3pqy9qcpfmqfexptbv43q8p--
</pre>
<p>
The URL:
</p>
<pre class="example">
http://example.org/downloads/spending.pack#rel=describedby;type=application/ld+json
</pre>
<p>
would identify the second of the parts in the package. The <code>rel=describedby</code> in the fragment identifier indicates that the target URL of the <code>Link</code> header should be used to locate the relevant part. Since there are two parts whose <a>part URL</a> is <code>http://example.org/metadata/spending</code>, the <code>type=application/ld+json</code> parameter is used to narrow the selection down to the second of the parts, whose <code>Content-Type</code> header matches.
</p>
</section>
<section>
<h3>Security Considerations</h3>
<p>
As it contains other files, Streamable Package Format files may contain active content (such as scripts) which, if run, may have devastating effects. Applications should treat all files contained within a package with the same care as they would if they had been received individually.
</p>
<p>
Packages in the Streamable Package Format contain assertions about the content of resources at other locations on the web (indicated through the <a>part URL</a>). Applications that process files in the Streamable Package Format should be aware that the content might not match that at the part URL. <a href="#populating-caches" class="sectionRef"></a> discusses the implications of this when packages are used to populate caches.
</p>
</section>
<section class="informative">
<h3>Creating and Publishing Packages</h3>
<p>
This section contains some recommendations and guidelines for the creation of packages in the Streamable Package Format.
</p>
<section>
<h4>Compression</h4>
<p>
Packages may be compressed in their entirety or the individual parts of the package may be compressed independently. In the latter case, the part headers should indicate the compression algorithm that has been used on the part during the packaging process using a <a href="http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-26#section-3.3.1"><code>Transfer-Encoding</code> header</a>.
</p>
<p class="issue">
Not sure what to recommend here. Presumably compressing the entire file undermines its streamability so the recommendation should be to compress individual parts?
</p>
</section>
<section>
<h4>Part Ordering</h4>
<p>
The first part in a package should usually provide a starting point for understanding or making use of the other resources in the package. For example, it might be:
</p>
<ul>
<li>a manifest file</li>
<li>the root page for a web application</li>
<li>the Javascript file into which the others are imported</li>
<li>the table of contents of a digital publication</li>
<li>the page that was the initial page crawled when creating a web archive</li>
</ul>
<p>
The remaining parts should be ordered based on the priority with which they need to be loaded to enable the contents of the package to be used (with high priority parts earlier in the package), and based on the size of the part (with larger parts later in the package). For a web application, a suitable order might be:
</p>
<ol>
<li>the root page of the web application</li>
<li>set-up scripts & data</li>
<li>stylesheets</li>
<li>fonts</li>
<li>logo & background images</li>
<li>deferred scripts</li>
<li>content images</li>
<li>secondary HTML pages</li>
<li>other resources used only on those HTML pages</li>
</ol>
</section>
<section>
<h4>Part Headers</h4>
<p>
As described in <a href="#parts" class="sectionRef"></a>, you can provide any HTTP header for the parts within a package and must provide a <code>Content-Location</code> header. Some HTTP headers are inappropriate because the part is not, in fact, an HTTP response. Other HTTP headers that may be useful for applications are:
</p>
<ul>
<li>
<a href="http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-26#section-3.3.1"><code>Transfer-Encoding</code></a>, if the part has been individually compressed
</li>
<li>
<a href="http://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-26#section-7.1.4"><code>Vary</code></a>, if there are multiple parts with the same <a>part URL</a>
</li>
<li>
<a href="http://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-26#section-3.1.1.5"><code>Content-Type</code></a>, <a href="http://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-26#section-3.1.2.2"><code>Content-Encoding</code></a> and/or <a href="http://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-26#section-3.1.3.2"><code>Content-Language</code></a> to provide different metadata about the representation
</li>
<li>
<a href="http://tools.ietf.org/html/draft-ietf-httpbis-p6-cache-26#section-5.2"><code>Cache-Control</code></a> and/or <a href="http://tools.ietf.org/html/draft-ietf-httpbis-p6-cache-26#section-5.3"><code>Expires</code></a> to control the caching of the individual parts
</li>
<li>
<a href="http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-2.3"><code>ETag</code></a> and/or <a href="http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-2.2"><code>Last-Modified</code></a> to support subsequent conditional fetching of the resource
</li>
<li>
<a href="http://tools.ietf.org/html/rfc5988#section-5"><code>Link</code></a> to indicate relationships between the file and other files in the package or on the web
</li>
</ul>
</section>
</section>
</section>
<section>
<h2><code>package</code> Link Relation</h2>
<p>
The <code>package</code> link relation refers to a package in which a representation of the link's context URL or related resources can be found. There are no constraints on the format of the package (the package may be a zip, or any of the other package formats listed in <a href="#intro" class="sectionRef"></a>, for example).
</p>
<p class="issue">
Would it be helpful to distinguish between a package in which the link's context resource itself can be found, and one which only contains related resources?
</p>
<p>
The <code>package</code> link relation can be used wherever link relations can be used, such as in links within web pages:
</p>
<pre class="example highlight">
<a href="editor.gzip" rel="package">download this application</a>
</pre>
<p>
or links within the header of an HTML page:
</p>
<pre class="example highlight">
<link rel="package" href="http://example.org/exampleWidget.zip" type="application/widget">
</pre>
<p>
or links within an HTTP header:
</p>
<pre class="example highlight">
Link: </downloads/spending.pack>; rel=package; type=application/package
</pre>
<section>
<h3>Package Scope</h3>
<p>
When referring to a package, it is often useful to provide an indication of what the package contains so that an application retrieving the package can anticipate which resources it can get from the package rather than directly. This can be indicated by specifying a <dfn>package scope</dfn>, which is a URL. Applications can use this as a hint that indicates that any resources whose URL starts with the package scope can be found within the package. If no package scope is indicated in a link, applications MUST NOT make any assumptions about the contents of the package.
</p>
<p>
In an HTML document, the <a>package scope</a> is indicated through a <code>scope</code> attribute on the <code><a></code> or <code><link></code> element that references the package. This attribute contains a URL that is resolved against the base URL of the HTML document. For example:
</p>
<pre class="example highlight">
<a href="editor.gzip" rel="package" <strong>scope="/"</strong>>download this application</a>
</pre>
<p>
indicates that the entire website that includes the page in which this link is found is contained within the package, while:
</p>
<pre class="example highlight">
<link rel="package" href="http://example.org/exampleWidget.zip" type="application/widget" <strong>scope="http://example.org/exampleWidget/"</strong>>
</pre>
<p>
indicates that any resources whose URLs start with <code>http://example.org/exampleWidget/</code> can be found within <code>http://example.org/exampleWidget.zip</code>.
</p>
<p>
In an HTTP <code>Link</code> header, the <a>package scope</a> is indicated through a <code>scope</code> link extension. The value of this link extension has the same syntax and is resolved in the same way as the <code>anchor</code> parameter as defined in [[!RFC5988]]. (In summary, it is a URL wrapped in double quotes, which is resolved against the requested URL.)
</p>
<p>
For example if the response to a request to <code>http://example.org/data/spending-201405.csv</code> includes the header:
</p>
<pre class="example highlight">
Link: </downloads/spending.pack>; rel=package; type=application/package; <strong>scope="spending-"</strong>
</pre>
<p>
this implies that the package at <code>http://example.org/downloads/spending.pack</code> contains all resources whose URL begins with <code>http://example.org/data/spending-</code> (including the requested <code>http://example.org/data/spending-201405.csv</code>).
</p>
</section>
</section>
<section>
<h2>IANA Considerations</h2>
<section>
<h3><code>application/package</code> Media Type Registration</h3>
<p>
This registration is for community review and will be submitted to the IESG for review, approval, and registration with IANA within the <a href="http://www.iana.org/assignments/media-types/">media type registry</a> in accordance with [[RFC6838]].
</p>
<dl>
<dt>Type name:</dt>
<dd><code>application</code></dd>
<dt>Subtype name:</dt>
<dd><code>package</code></dd>
<dt>Required parameters:</dt>
<dd>none</dd>
<dt>Optional parameters:</dt>
<dd>none</dd>
<dt>Encoding considerations:</dt>
<dd>binary</dd>
<dt>Security considerations:</dd>
<dd>See <a href="#security-considerations" class="sectionRef"></a></dd>
<dt>Interoperability considerations:</dt>
<dd>none</dd>
<dt>Published specification:</dt>
<dd><a href="">this document</a></dd>
<dt>Applications that use this media type:</dt>
<dd>no specific applications</dd>
<dt>Fragment identifier considerations:</dt>
<dd>See <a href="#fragment-identifiers" class="sectionRef"></a></dd>
<dt>Additional information:</dt>
<dd>
<dl>
<dt>Deprecated alias names for this type:</dt>
<dd>none</dd>
<dt>Magic number(s):</dt>
<dd>none</dd>
<dt>File extension(s):</dt>
<dd><code>pack</code></dd>
<dt>Macintosh file type code(s):</dt>
<dd>none</dd>
</dl>
</dd>
<dt>Person & email address to contact for further information:</dt>
<dd>Jeni Tennison <jeni@theodi.org></dd>
<dt>Intended usage:</dt>
<dd>COMMON</dd>
<dt>Restrictions on usage:</dt>
<dd>none</dd>
<dt>Author:</dt>
<dd>Jeni Tennison</dt>
<dt>Change controller:</dt>
<dd>W3C</dd>
</dl>
</section>
<section>
<h3><code>package</code> Link Relation Registration</h3>
<p>
This registration is for community review and will be submitted to the IESG for review, approval, and registration with IANA within the <a href="https://www.iana.org/assignments/link-relations/">Registry of Link Relations</a> in accordance with [[RFC5988]].
</p>
<dl>
<dt>Relation name:</dt>
<dd><code>package</code></dd>
<dt>Description:</dt>
<dd>Refers to a package in which a representation of the link's context or related resources can be found.</dd>
<dt>Reference:</dt>
<dd><a href="">this document</a></dd>
</dl>
</section>
</section>
</body>
</html>