Skip to content

Commit

Permalink
ttml to vtt (#129)
Browse files Browse the repository at this point in the history
* ttml to vtt

* update TtmlToVttConverter util class, add vtt header

* update

* save update

* update
  • Loading branch information
melindawangmsft authored Aug 14, 2023
1 parent 2aa641c commit 96221d9
Show file tree
Hide file tree
Showing 3 changed files with 224 additions and 10 deletions.
9 changes: 4 additions & 5 deletions Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,11 @@ amsmigrate assets -s <subscription id> -g <resource group> -n <ams account name>

var cleanupOptionsBinder = new CleanupOptionsBinder();
var cleanupCommand = cleanupOptionsBinder.GetCommand("cleanup", @"Do the cleanup of AMS account or Storage account
Examples to cleanup account:
cleanup -s <subscriptionid> -g <resourcegroup> -n <account> -ax true
This command forcefully removes the Azure Media Services (AMS) account.
Examples to cleanup asset:
Examples:
cleanup -s <subscriptionid> -g <resourcegroup> -n <account> -x true
This command forcefully removes all assets in the given account.");
This command forcefully removes all assets in the given account.
cleanup -s <subscriptionid> -g <resourcegroup> -n <account> -ax true
This command forcefully removes the Azure Media Services (AMS) account.");
rootCommand.Add(cleanupCommand);
cleanupCommand.SetHandler(
async context =>
Expand Down
48 changes: 43 additions & 5 deletions pipes/MultiFileStream.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
using AMSMigrate.Contracts;
using AMSMigrate.Decryption;
using AMSMigrate.Fmp4;
using AMSMigrate.Transform;
using Azure.ResourceManager.Media.Models;
using Azure.Storage.Blobs;
using Azure.Storage.Blobs.Specialized;
using Microsoft.Extensions.Logging;
using System.IO;
using System.IO.Pipes;
using System.Text;
using System.Xml;

namespace AMSMigrate.Pipes
{
Expand Down Expand Up @@ -51,6 +55,14 @@ public async Task DownloadAsync(Stream stream, CancellationToken cancellationTok
blob = _container.GetBlockBlobClient(chunkName);
await DownloadClearBlobContent(blob, stream, cancellationToken);
}
else
{
byte[] webvttBytes = Encoding.UTF8.GetBytes("WEBVTT");
using (MemoryStream headerStream = new MemoryStream(webvttBytes))
{
headerStream.CopyTo(stream);
}
}

// Report progress every 10%.
var i = 0;
Expand Down Expand Up @@ -164,15 +176,41 @@ private void GenerateVttContent(IList<Box> inputBoxes, MP4Writer mp4Writer)
}

var ttmlText = mdatBox.SampleData;
try
{
// Call API to convert ttmlText to VTT text.
var vttText = TtmlToVttConverter.Convert(ttmlText);

byte[] vttText = { 0 };
if (vttText != null)
{
byte[] contentBytes = Encoding.UTF8.GetBytes(RemoveXmlControlCharacters(vttText));
mp4Writer.Write(contentBytes);
}
else
{
_logger.LogInformation("vttText is null");
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Error converting TTML to VTT.");
}
}

// Call API to convert ttmlText to VTT text.
private string RemoveXmlControlCharacters(string input)
{
StringBuilder output = new StringBuilder();

// Uncomment this line, it was put here to pass the compiler.
vttText = ttmlText!;
foreach (char c in input)
{
// Exclude XML control characters (0x00 to 0x1F, except for whitespace characters)
if (c >= 0x20 || char.IsWhiteSpace(c))
{
output.Append(c);
}
}

mp4Writer.Write(vttText);
return output.ToString();
}

private async Task DownloadClearBlobContent(BlockBlobClient sourceBlob, Stream outputStream, CancellationToken cancellationToken)
Expand Down
177 changes: 177 additions & 0 deletions transform/CTtml2WebVttConv.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
using AMSMigrate.Contracts;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
using System.Xml.Serialization;

public static class TtmlToVttConverter
{
public static string? Convert(byte[]? ttmlText)
{
if (ttmlText == null)
{
throw new InvalidOperationException("TtmlToVttConverter: Ttml is null");
}

string? webVttContentRes = null;

XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Prohibit;

using (MemoryStream mdatStream = new MemoryStream(ttmlText))
using (XmlReader reader = XmlReader.Create(mdatStream, settings))
{
StringBuilder webVttContent = new StringBuilder();

string strStartCue = string.Empty, strCueSize = string.Empty, strAlign = string.Empty;

while (!reader.EOF)
{
StringBuilder strText = new StringBuilder();
string? strStartTime = null, strEndTime = null, strEntry = null;
reader.Read();
switch (reader.NodeType)
{
case XmlNodeType.Element:
if (reader.Name.Equals("region", StringComparison.OrdinalIgnoreCase))
{
ParseRegionAttributes(reader, out strStartCue, out strCueSize, out strAlign);
continue;
}
else if (!reader.Name.Equals("p", StringComparison.OrdinalIgnoreCase))
{
continue;
}
break;

default:
continue;
}

bool isEOF = reader.MoveToFirstAttribute();
while (isEOF)
{
string pszName = reader.Name;
string pszValue = reader.Value;

if (pszName.Equals("begin", StringComparison.OrdinalIgnoreCase))
{
strStartTime = pszValue;
}
else if (pszName.Equals("end", StringComparison.OrdinalIgnoreCase))
{
strEndTime = pszValue;
}
isEOF = reader.MoveToNextAttribute();
}

if (string.IsNullOrEmpty(strStartTime) || string.IsNullOrEmpty(strEndTime))
{
throw new InvalidOperationException("Missing begin or end attributes.");
}

XmlNodeType nodeType;
bool fDone = false;

do
{
reader.Read();
nodeType = reader.NodeType;

switch (nodeType)
{
case XmlNodeType.Text:
string pszValue = reader.Value;
strText.Append(pszValue);
break;

case XmlNodeType.Element:
string pszName = reader.Name;
if (pszName.Equals("br", StringComparison.OrdinalIgnoreCase))
{
strText.AppendLine();
}
break;

case XmlNodeType.EndElement:
string endElementName = reader.Name;
if (endElementName.Equals("p", StringComparison.OrdinalIgnoreCase))
{
fDone = true;
}
break;

default:
break;
}
} while (!fDone);
var formattedString = strText.ToString();
if (!string.IsNullOrEmpty(strAlign) && !string.IsNullOrEmpty(strStartCue) && !string.IsNullOrEmpty(strCueSize)&& !string.IsNullOrEmpty(formattedString))
{
strEntry = string.Format(
"\n\n{0} --> {1} position:{2} align:{3} size:{4}\n{5}",
strStartTime, strEndTime, strStartCue, strAlign, strCueSize, formattedString);
}
else if(!string.IsNullOrEmpty(formattedString))
{
strEntry = string.Format(
"\n\n{0} --> {1}\n{2}",
strStartTime, strEndTime, formattedString);
}
if (!string.IsNullOrEmpty(strEntry))
{
webVttContent.Append(strEntry);
}
}

webVttContentRes= webVttContent.ToString();
}
return !string.IsNullOrEmpty(webVttContentRes) ? webVttContentRes: null; //webVttContentRes;Regex.Replace(webVttContentRes, @"[\x00-\x1F\x7F]", "\n")
}

private static void ParseRegionAttributes(XmlReader pReader, out string strStartCue, out string strCueSize, out string strAlign)
{
strStartCue = strCueSize = strAlign = string.Empty;

while (pReader.MoveToNextAttribute())
{
string pszName = pReader.Name;
string pszValue = pReader.Value;
string? pszTmp = null;

if (pszName.Equals("tts:origin", StringComparison.OrdinalIgnoreCase))
{
int index = pszValue.IndexOf('%');
if (index >= 0)
{
pszTmp = pszValue.Substring(0, index + 1);
}
}
else if (pszName.Equals("tts:extent", StringComparison.OrdinalIgnoreCase))
{
int index = pszValue.IndexOf('%');
if (index >= 0)
{
pszTmp = pszValue.Substring(0, index + 1);
}
}
else if (pszName.Equals("tts:textAlign", StringComparison.OrdinalIgnoreCase))
{
strAlign = pszValue;
}

if (pszTmp != null)
{
if (pszName.Equals("tts:origin", StringComparison.OrdinalIgnoreCase))
{
strStartCue = pszTmp;
}
else if (pszName.Equals("tts:extent", StringComparison.OrdinalIgnoreCase))
{
strCueSize = pszTmp;
}
}
}
}
}

0 comments on commit 96221d9

Please sign in to comment.