-
Notifications
You must be signed in to change notification settings - Fork 59
/
videostreamer.c
474 lines (377 loc) · 12.1 KB
/
videostreamer.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
//
// This library provides remuxing from a video stream (such as an RTSP URL) to
// an MP4 container. It writes a fragmented MP4 so that it can be streamed to a
// pipe.
//
// There is no re-encoding. The stream is copied as is.
//
// The logic here is heavily based on remuxing.c by Stefano Sabatini.
//
#include <errno.h>
#include <libavdevice/avdevice.h>
#include <libavutil/timestamp.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "videostreamer.h"
static void
__vs_log_packet(const AVFormatContext * const,
const AVPacket * const, const char * const);
void
vs_setup(void)
{
// Set up library.
// Register muxers, demuxers, and protocols.
av_register_all();
// Make formats available.
avdevice_register_all();
avformat_network_init();
}
struct VSInput *
vs_open_input(const char * const input_format_name,
const char * const input_url, const bool verbose)
{
if (!input_format_name || strlen(input_format_name) == 0 ||
!input_url || strlen(input_url) == 0) {
printf("%s\n", strerror(EINVAL));
return NULL;
}
struct VSInput * const input = calloc(1, sizeof(struct VSInput));
if (!input) {
printf("%s\n", strerror(errno));
return NULL;
}
AVInputFormat * const input_format = av_find_input_format(input_format_name);
if (!input_format) {
printf("input format not found\n");
vs_destroy_input(input);
return NULL;
}
int const open_status = avformat_open_input(&input->format_ctx, input_url,
input_format, NULL);
if (open_status != 0) {
printf("unable to open input: %s\n", av_err2str(open_status));
vs_destroy_input(input);
return NULL;
}
if (avformat_find_stream_info(input->format_ctx, NULL) < 0) {
printf("failed to find stream info\n");
vs_destroy_input(input);
return NULL;
}
if (verbose) {
av_dump_format(input->format_ctx, 0, input_url, 0);
}
// Find the first video stream.
input->video_stream_index = -1;
for (unsigned int i = 0; i < input->format_ctx->nb_streams; i++) {
AVStream * const in_stream = input->format_ctx->streams[i];
if (in_stream->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
if (verbose) {
printf("skip non-video stream %u\n", i);
}
continue;
}
input->video_stream_index = (int) i;
break;
}
if (input->video_stream_index == -1) {
printf("no video stream found\n");
vs_destroy_input(input);
return NULL;
}
return input;
}
void
vs_destroy_input(struct VSInput * const input)
{
if (!input) {
return;
}
if (input->format_ctx) {
avformat_close_input(&input->format_ctx);
}
free(input);
}
struct VSOutput *
vs_open_output(const char * const output_format_name,
const char * const output_url, const struct VSInput * const input,
const bool verbose)
{
if (!output_format_name || strlen(output_format_name) == 0 ||
!output_url || strlen(output_url) == 0 ||
!input) {
printf("%s\n", strerror(EINVAL));
return NULL;
}
struct VSOutput * const output = calloc(1, sizeof(struct VSOutput));
if (!output) {
printf("%s\n", strerror(errno));
return NULL;
}
AVOutputFormat * const output_format = av_guess_format(output_format_name,
NULL, NULL);
if (!output_format) {
printf("output format not found\n");
vs_destroy_output(output);
return NULL;
}
if (avformat_alloc_output_context2(&output->format_ctx, output_format,
NULL, NULL) < 0) {
printf("unable to create output context\n");
vs_destroy_output(output);
return NULL;
}
// Copy the video stream.
AVStream * const out_stream = avformat_new_stream(output->format_ctx, NULL);
if (!out_stream) {
printf("unable to add stream\n");
vs_destroy_output(output);
return NULL;
}
AVStream * const in_stream = input->format_ctx->streams[
input->video_stream_index];
if (avcodec_parameters_copy(out_stream->codecpar,
in_stream->codecpar) < 0) {
printf("unable to copy codec parameters\n");
vs_destroy_output(output);
return NULL;
}
if (verbose) {
av_dump_format(output->format_ctx, 0, output_url, 1);
}
// Open output file.
if (avio_open(&output->format_ctx->pb, output_url, AVIO_FLAG_WRITE) < 0) {
printf("unable to open output file\n");
vs_destroy_output(output);
return NULL;
}
// Write file header.
AVDictionary * opts = NULL;
// -movflags frag_keyframe tells the mp4 muxer to fragment at each video
// keyframe. This is necessary for it to support output to a non-seekable
// file (e.g., pipe).
//
// -movflags isml+frag_keyframe is the same, except isml appears to be to
// make the output a live smooth streaming feed (as opposed to not live). I'm
// not sure the difference, but isml appears to be a microsoft
// format/protocol.
//
// To specify both, use isml+frag_keyframe as the value.
//
// I found that while Chrome had no trouble displaying the resulting mp4 with
// just frag_keyframe, Firefox would not until I also added empty_moov.
// empty_moov apparently writes some info at the start of the file.
if (av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov", 0) < 0) {
printf("unable to set movflags opt\n");
vs_destroy_output(output);
return NULL;
}
if (av_dict_set_int(&opts, "flush_packets", 1, 0) < 0) {
printf("unable to set flush_packets opt\n");
vs_destroy_output(output);
av_dict_free(&opts);
return NULL;
}
if (avformat_write_header(output->format_ctx, &opts) < 0) {
printf("unable to write header\n");
vs_destroy_output(output);
av_dict_free(&opts);
return NULL;
}
// Check any options that were not set. Because I'm not sure if all are
// appropriate to set through the avformat_write_header().
if (av_dict_count(opts) != 0) {
printf("some options not set\n");
vs_destroy_output(output);
av_dict_free(&opts);
return NULL;
}
av_dict_free(&opts);
output->last_dts = AV_NOPTS_VALUE;
return output;
}
void
vs_destroy_output(struct VSOutput * const output)
{
if (!output) {
return;
}
if (output->format_ctx) {
if (av_write_trailer(output->format_ctx) != 0) {
printf("unable to write trailer\n");
}
if (avio_closep(&output->format_ctx->pb) != 0) {
printf("avio_closep failed\n");
}
avformat_free_context(output->format_ctx);
}
free(output);
}
// Read a compressed and encoded frame as a packet.
//
// Returns:
// -1 if error
// 0 if nothing useful read (e.g., non-video packet)
// 1 if read a packet
int
vs_read_packet(const struct VSInput * input, AVPacket * const pkt,
const bool verbose)
{
if (!input || !pkt) {
printf("%s\n", strerror(errno));
return -1;
}
memset(pkt, 0, sizeof(AVPacket));
// Read encoded frame (as a packet).
if (av_read_frame(input->format_ctx, pkt) != 0) {
printf("unable to read frame\n");
return -1;
}
// Ignore it if it's not our video stream.
if (pkt->stream_index != input->video_stream_index) {
if (verbose) {
printf("skipping packet from input stream %d, our video is from stream %d\n",
pkt->stream_index, input->video_stream_index);
}
av_packet_unref(pkt);
return 0;
}
if (verbose) {
__vs_log_packet(input->format_ctx, pkt, "in");
}
return 1;
}
// We change the packet's pts, dts, duration, pos.
//
// We do not unref it.
//
// Returns:
// -1 if error
// 1 if we wrote the packet
int
vs_write_packet(const struct VSInput * const input,
struct VSOutput * const output, AVPacket * const pkt, const bool verbose)
{
if (!input || !output || !pkt) {
printf("%s\n", strerror(EINVAL));
return -1;
}
AVStream * const in_stream = input->format_ctx->streams[pkt->stream_index];
if (!in_stream) {
printf("input stream not found with stream index %d\n", pkt->stream_index);
return -1;
}
// If there are multiple input streams, then the stream index on the packet
// may not match the stream index in our output. We need to ensure the index
// matches. Note by this point we have checked that it is indeed a packet
// from the stream we want (we do this when reading the packet).
//
// As we only ever have a single output stream (one, video), the index will
// be 0.
if (pkt->stream_index != 0) {
if (verbose) {
printf("updating packet stream index to 0 (from %d)\n",
pkt->stream_index);
}
pkt->stream_index = 0;
}
AVStream * const out_stream = output->format_ctx->streams[pkt->stream_index];
if (!out_stream) {
printf("output stream not found with stream index %d\n", pkt->stream_index);
return -1;
}
// It is possible that the input is not well formed. Its dts (decompression
// timestamp) may fluctuate. av_write_frame() says that the dts must be
// strictly increasing.
//
// Packets from such inputs might look like:
//
// in: pts:18750 pts_time:0.208333 dts:18750 dts_time:0.208333 duration:3750 duration_time:0.0416667 stream_index:1
// in: pts:0 pts_time:0 dts:0 dts_time:0 duration:3750 duration_time:0.0416667 stream_index:1
//
// dts here is 18750 and then 0.
//
// If we try to write the second packet as is, we'll see this error:
// [mp4 @ 0x10f1ae0] Application provided invalid, non monotonically increasing dts to muxer in stream 1: 18750 >= 0
//
// This is apparently a fairly common problem. In ffmpeg.c (as of ffmpeg
// 3.2.4 at least) there is logic to rewrite the dts and warn if it happens.
// Let's do the same. Note my logic is a little different here.
bool fix_dts = pkt->dts != AV_NOPTS_VALUE &&
output->last_dts != AV_NOPTS_VALUE &&
pkt->dts <= output->last_dts;
// It is also possible for input streams to include a packet with
// dts/pts=NOPTS after packets with dts/pts set. These won't be caught by the
// prior case. If we try to send these to the encoder however, we'll generate
// the same error (non monotonically increasing DTS) since the output packet
// will have dts/pts=0.
fix_dts |= pkt->dts == AV_NOPTS_VALUE && output->last_dts != AV_NOPTS_VALUE;
if (fix_dts) {
int64_t const next_dts = output->last_dts+1;
if (verbose) {
printf("Warning: Non-monotonous DTS in input stream. Previous: %" PRId64 " current: %" PRId64 ". changing to %" PRId64 ".\n",
output->last_dts, pkt->dts, next_dts);
}
// We also apparently (ffmpeg.c does this too) need to update the pts.
// Otherwise we see an error like:
//
// [mp4 @ 0x555e6825ea60] pts (3780) < dts (22531) in stream 0
if (pkt->pts != AV_NOPTS_VALUE && pkt->pts >= pkt->dts) {
pkt->pts = FFMAX(pkt->pts, next_dts);
}
// In the case where pkt->dts was AV_NOPTS_VALUE, pkt->pts can be
// AV_NOPTS_VALUE too which we fix as well.
if (pkt->pts == AV_NOPTS_VALUE) {
pkt->pts = next_dts;
}
pkt->dts = next_dts;
}
// Set pts/dts if not set. Otherwise we will receive warnings like
//
// [mp4 @ 0x55688397bc40] Timestamps are unset in a packet for stream 0. This
// is deprecated and will stop working in the future. Fix your code to set
// the timestamps properly
//
// [mp4 @ 0x55688397bc40] Encoder did not produce proper pts, making some up.
if (pkt->pts == AV_NOPTS_VALUE) {
pkt->pts = 0;
} else {
pkt->pts = av_rescale_q_rnd(pkt->pts, in_stream->time_base,
out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
}
if (pkt->dts == AV_NOPTS_VALUE) {
pkt->dts = 0;
} else {
pkt->dts = av_rescale_q_rnd(pkt->dts, in_stream->time_base,
out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
}
pkt->duration = av_rescale_q(pkt->duration, in_stream->time_base,
out_stream->time_base);
pkt->pos = -1;
if (verbose) {
__vs_log_packet(output->format_ctx, pkt, "out");
}
// Track last dts we see (see where we use it for why).
output->last_dts = pkt->dts;
// Write encoded frame (as a packet).
// av_interleaved_write_frame() works too, but I don't think it is needed.
// Using av_write_frame() skips buffering.
const int write_res = av_write_frame(output->format_ctx, pkt);
if (write_res != 0) {
printf("unable to write frame: %s\n", av_err2str(write_res));
return -1;
}
return 1;
}
static void
__vs_log_packet(const AVFormatContext * const format_ctx,
const AVPacket * const pkt, const char * const tag)
{
AVRational * const time_base = &format_ctx->streams[pkt->stream_index]->time_base;
printf("%s: pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
tag, av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
pkt->stream_index);
}