Skip to content

Commit

Permalink
Fix LT-22008: Do not crash importing flextext with empty word content (
Browse files Browse the repository at this point in the history
  • Loading branch information
jasonleenaylor authored Dec 13, 2024
1 parent 60bafa5 commit d8c3326
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 1 deletion.
5 changes: 4 additions & 1 deletion Src/LexText/Interlinear/BIRDInterlinearImporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,8 @@ private static IAnalysis CreateWordAnalysisStack(LcmCache cache, Word word)

foreach (var wordItem in word.Items)
{
if (wordItem.Value == null)
continue;
ITsString wordForm = null;
switch (wordItem.type)
{
Expand Down Expand Up @@ -758,7 +760,8 @@ private static IAnalysis CreateWordAnalysisStack(LcmCache cache, Word word)
}
else
{
Debug.Assert(analysis != null, "What else could this do?");
// There was an invalid analysis in the file. We can't do anything with it.
return null;
}

// Fill in morphemes, lex. entries, lex. gloss, and lex.gram.info
Expand Down
71 changes: 71 additions & 0 deletions Src/LexText/Interlinear/ITextDllTests/BIRDFormatImportTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,77 @@ public void UglyEmptyDataShouldNotCrash()
}
}

[Test]
public void EmptyTxtItemUnderWordShouldNotCrash()
{
// an interlinear text example xml string
const string xml =
"<?xml version=\"1.0\" encoding=\"utf-8\"?>" +
"<document version=\"2\">" +
"<interlinear-text guid=\"6a424526-aa64-4912-b8c7-9e5ae0ab4aae\">" +
"<item type=\"title\" lang=\"en\">Test</item>" +
"<paragraphs>" +
"<paragraph guid=\"ccd62b33-c9f6-4e52-917f-00c67c3638c8\">" +
"<phrases>" +
"<phrase begin-time-offset=\"0\" end-time-offset=\"3750\" guid=\"b57a2665-402a-4ab1-af16-655c70df062f\">" +
"<item lang=\"fr\" type=\"txt\">testing paragraph without words</item>" +
"<words>" +
"<word guid=\"093fe3c6-d467-4c28-a03e-d511b94185da\">" +
"<item lang=\"fr\" type=\"txt\"/>" + // empty txt item
"</word>" +
"</words>" +
"<item lang=\"en\" type=\"gls\">In the country of a Mongol king lived three sisters.</item>" +
"</phrase>" +
"<phrase guid=\"441c1171-78a1-481b-9732-b9c558948ce5\">" +
"<item type=\"txt\" lang=\"fr\">This is a test.</item>" +
"<item type=\"segnum\" lang=\"en\">1</item>" +
"<words>" +
"<word guid=\"d161bf25-b6df-418d-b9c1-a396ff3ec5b1\">" +
"<item type=\"txt\" lang=\"fr\">This</item>" +
"</word>" +
"<word guid=\"ab9e81c7-3157-4011-a8a1-68eb1afc0be1\">" +
"<item type=\"txt\" lang=\"fr\">is</item>" +
"</word>" +
"<word guid=\"0ef6172b-07c3-4c91-b0b2-afbebca5fca0\">" +
"<item type=\"txt\" lang=\"fr\">a</item>" +
"</word>" +
"<word guid=\"48949d94-869c-45d5-9251-b68ce7d66cee\">" +
"<item type=\"txt\" lang=\"fr\">test</item>" +
"</word>" +
"<word>" +
"<item type=\"punct\" lang=\"fr\">.</item>" +
"</word>" +
"</words>" +
"<item type=\"gls\" lang=\"en\"></item>" +
"</phrase>" +
"</phrases>" +
"</paragraph>" +
"</paragraphs>" +
"<languages>" +
"<language lang=\"en\" font=\"Charis SIL\" />" +
"<language lang=\"fr\" font=\"Times New Roman\" vernacular=\"true\" />" +
"</languages>" +
"</interlinear-text>" +
"</document>";

var li = new LinguaLinksImport(Cache, null, null);
LCModel.IText text = null;
using(var stream = new MemoryStream(Encoding.ASCII.GetBytes(xml.ToCharArray())))
{
// SUT - Verify that no crash occurs importing this data: see LT-22008
Assert.DoesNotThrow(()=> li.ImportInterlinear(new DummyProgressDlg(), stream, 0, ref text));
using(var firstEntry = Cache.LanguageProject.Texts.GetEnumerator())
{
firstEntry.MoveNext();
var imported = firstEntry.Current;
Assert.That(imported.ContentsOA.ParagraphsOS.Count, Is.EqualTo(1));
Assert.That(((IStTxtPara)imported.ContentsOA.ParagraphsOS[0]).SegmentsOS.Count, Is.EqualTo(2));
// Verify that the words with non-empty txt were imported
Assert.That(((IStTxtPara)imported.ContentsOA.ParagraphsOS[0]).SegmentsOS[1].AnalysesRS.Count, Is.EqualTo(5));
}
}
}

[Test]
public void TestImportMergeFlexTextWithSegnumItem()
{
Expand Down

0 comments on commit d8c3326

Please sign in to comment.