diff --git a/CHANGELOG.md b/CHANGELOG.md index f091916..7b4ebd3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# 1.1.3.14 - 20/04/2024 +- We now scan pre-existing destination files for apparent source log data, and refuse to overwrite them if it exists. + # 1.1.3.13 - 17/04/2024 - File and folder selection dialogs now open by default to the standard F-Chat 3.0 log location (%appdata%/fchat/data) when selecting source logs, and to the user's desktop when selecting a destination. - Minor fix: HTML output files containing a second, empty HTML body. diff --git a/Common.cs b/Common.cs index 9730602..3675a87 100644 --- a/Common.cs +++ b/Common.cs @@ -65,24 +65,24 @@ static class Common "; public static bool plaintext = true; public static string lastException = string.Empty; - public static uint lastTimestamp; + public static uint lastTimestamp = 0U; public readonly static Dictionary tagClosings = new() { { "b", "" }, + { "big", "" }, + { "color", "" }, + { "eicon", ".gif\" />" }, { "i", "" }, + { "icon", ".png\" />" }, + { "noparse", "" }, { "s", "" }, - { "u", "" }, + { "session", "" }, + { "spoiler", "" }, { "sub", "" }, { "sup", "" }, - { "big", "" }, - { "noparse", "" }, + { "u", "" }, { "url", "" }, - { "icon", ".png\" />" }, - { "eicon", ".gif\" />" }, { "user", "" }, - { "spoiler", "" }, - { "session", "" }, - { "color", "" }, }; public static DateTime timeBegin; @@ -123,7 +123,7 @@ public static bool IsValidPattern(string? pattern = null) return true; } - public static bool IsValidTimestamp(uint timestamp) + public static bool IsValidTimestamp(uint timestamp, bool TSTestOverride = false) { if (timestamp < 1) // If it came before Jan. 1, 1970, there's a problem. return false; @@ -131,7 +131,7 @@ public static bool IsValidTimestamp(uint timestamp) return false; if ((DTFromStamp(timestamp).ToString(dateFormat) ?? string.Empty).Equals(string.Empty)) // If it can't be translated to a date, also a problem. return false; - if (timestamp < lastTimestamp) // If it isn't sequential, also a problem, because F-Chat would never save it that way. + if (!TSTestOverride && timestamp < lastTimestamp) // If it isn't sequential, also a problem, because F-Chat would never save it that way. // In this case specifically, there's an extremely high chance we're about to produce garbage data in the output. return false; return true; @@ -145,6 +145,40 @@ public static void LogException(Exception e) return; } + public static bool LogTest(string targetFile) + { + byte[] idBuffer = new byte[4]; + byte[] srcBuffer; + using FileStream srcFS = new FileInfo(targetFile).OpenRead(); + + if (srcFS.Read(idBuffer, 0, 4) < 4) + return false; + if (!IsValidTimestamp(BEInt(idBuffer), true)) + return false; + + if (srcFS.ReadByte() > 6) + return false; + + int profLen = srcFS.ReadByte(); + if (profLen == -1) + return false; + + if (profLen > 0) + { + srcBuffer = new byte[profLen]; + if (srcFS.Read(srcBuffer, 0, profLen) < profLen) + return false; + } + + if (srcFS.Read(idBuffer, 0, 2) < 2) + return false; + + // We assume a valid log file starting from here, as the header format of the message is now confirmed - + // and we can't assume the very first message in a file *didn't* happen to just be truncated or empty. + + return true; + } + public static uint UNIXTimestamp() { return (uint)Math.Floor(DateTime.UtcNow.Subtract(epoch).TotalSeconds); diff --git a/FLogS.csproj b/FLogS.csproj index 91edd7a..a715d51 100644 --- a/FLogS.csproj +++ b/FLogS.csproj @@ -7,7 +7,7 @@ true False FLogS - 1.1.3.13 + 1.1.3.14 True none True diff --git a/KNOWN.md b/KNOWN.md index 256759b..c30078b 100644 --- a/KNOWN.md +++ b/KNOWN.md @@ -2,5 +2,4 @@ - Ads are read correctly as they are in later versions, but much older clients didn't record ads as uniquely ID'd messages - they appear to have just been plaintext, with no delimiter. Reading them causes patches of garbage. # Todo -- Preliminary scan of selected destination files, if they exist. If they appear at a glance to be un-translated user logs, refuse to overwrite them. (i.e. if it appears the user accidentally used a source log as the destination) - Android version for mobile users of F-Chat. \ No newline at end of file diff --git a/MainWindow.xaml.cs b/MainWindow.xaml.cs index 7062edb..3bd1dc3 100644 --- a/MainWindow.xaml.cs +++ b/MainWindow.xaml.cs @@ -51,7 +51,7 @@ private enum FLogS_ERROR DEST_NOT_DIRECTORY, DEST_NOT_FILE, DEST_NOT_FOUND, - DEST_SENSITIVE, // Todo. + DEST_SENSITIVE, NO_DEST, NO_DEST_DIR, NO_REGEX, @@ -253,7 +253,7 @@ private void FormatOverride(object? sender, RoutedEventArgs e) (FLogS_ERROR.DEST_NOT_DIRECTORY, _) => "Destination is not a directory.", (FLogS_ERROR.DEST_NOT_FILE, _) => "Destination is not a file.", (FLogS_ERROR.DEST_NOT_FOUND, _) => "Destination directory does not exist.", - (FLogS_ERROR.DEST_SENSITIVE, _) => "Destination appears to contain source log data.", // Todo. + (FLogS_ERROR.DEST_SENSITIVE, _) => "Destination appears to contain source log data.", (FLogS_ERROR.NO_DEST, _) => "No destination file selected.", (FLogS_ERROR.NO_DEST_DIR, _) => "No destination directory selected.", (FLogS_ERROR.NO_REGEX, _) => "No search text entered.", @@ -497,8 +497,17 @@ private void TextboxUpdated(object? sender, EventArgs e) directoryError = FLogS_ERROR.SOURCES_NOT_FOUND; else if (file.Equals(outFile)) directoryError = FLogS_ERROR.SOURCE_CONFLICT; - else if (directoryError == FLogS_ERROR.NONE && File.Exists(outFile)) + + if (File.Exists(outFile)) + { + if (Common.LogTest(outFile)) + { + directoryError = FLogS_ERROR.DEST_SENSITIVE; + break; + } + directoryWarning = FLogS_WARNING.MULTI_OVERWRITE; + } } } @@ -515,7 +524,12 @@ private void TextboxUpdated(object? sender, EventArgs e) else if (FileSource.Text.Equals(FileOutput.Text)) fileError = FLogS_ERROR.SOURCE_EQUALS_DEST; else if (File.Exists(FileOutput.Text)) + { + if (Common.LogTest(FileOutput.Text)) + fileError = FLogS_ERROR.DEST_SENSITIVE; + fileWarning = FLogS_WARNING.SINGLE_OVERWRITE; + } if (PhraseSource.Text.Length == 0) phraseError = FLogS_ERROR.NO_SOURCES; @@ -534,16 +548,27 @@ private void TextboxUpdated(object? sender, EventArgs e) foreach (string file in PhraseSource.Text.Split(';')) { string outFile = Path.Join(PhraseOutput.Text, Path.GetFileNameWithoutExtension(file)); + if (!Common.plaintext) outFile += ".html"; else outFile += ".txt"; + if (!File.Exists(file)) phraseError = FLogS_ERROR.SOURCES_NOT_FOUND; else if (file.Equals(outFile)) phraseError = FLogS_ERROR.SOURCE_CONFLICT; - else if (phraseError == FLogS_ERROR.NONE && File.Exists(outFile)) + + if (File.Exists(outFile)) + { + if (Common.LogTest(outFile)) + { + phraseError = FLogS_ERROR.DEST_SENSITIVE; + break; + } + phraseWarning = FLogS_WARNING.MULTI_OVERWRITE; + } } } } diff --git a/MessagePool.cs b/MessagePool.cs index 58f3065..6aca541 100644 --- a/MessagePool.cs +++ b/MessagePool.cs @@ -41,20 +41,20 @@ internal partial class MessagePool private readonly Dictionary tagCounts = new() { { "b", 0 }, + { "big", 0 }, + { "color", 0 }, + { "eicon", 0 }, { "i", 0 }, + { "icon", 0 }, + { "noparse", 0 }, { "s", 0 }, - { "u", 0 }, + { "session", 0 }, + { "spoiler", 0 }, { "sub", 0 }, { "sup", 0 }, - { "big", 0 }, - { "noparse", 0 }, + { "u", 0 }, { "url", 0 }, - { "icon", 0 }, - { "eicon", 0 }, { "user", 0 }, - { "spoiler", 0 }, - { "session", 0 }, - { "color", 0 }, }; private Stack? tagHistory; private uint thisDate = 1U; @@ -272,7 +272,7 @@ private bool TranslateIDX(FileStream srcFS) /* * I have not reverse-engineered the IDX format beyond reading channel/profile names from it. * It appears to contain 8-byte blocks of numerical data in ascending value; there are more such blocks in IDX files paired with older logs. - * I don't know what the blocks represent, but they are almost certainly not timestamps. + * I don't know what the blocks represent, but they are almost certainly not timestamps - they're several powers of ten too large. */ string? fileName = Path.GetFileNameWithoutExtension(srcFile); @@ -587,8 +587,8 @@ private bool TranslateMessage(FileStream srcFS, StreamWriter dstFS) } lastPosition = (uint)srcFS.Position; - bool nextTimestamp = false; - while (!nextTimestamp) // Search for the next message by locating its timestamp and delimiter. It's the latter we're *really* looking for; the timestamp just helps us identify it. + bool nextID = false; + while (!nextID) // Search for the next message by locating its delimiter. { srcFS.ReadByte(); srcFS.Read(idBuffer, 0, 4); @@ -607,7 +607,7 @@ private bool TranslateMessage(FileStream srcFS, StreamWriter dstFS) discrepancy = (int)srcFS.Position - (int)lastPosition; lastDiscrepancy += discrepancy; lastPosition = (uint)srcFS.Position; - nextTimestamp = true; + nextID = true; unreadBytes += discrepancy; srcFS.ReadByte(); } @@ -626,7 +626,7 @@ private bool TranslateMessage(FileStream srcFS, StreamWriter dstFS) discrepancy = (int)srcFS.Position - (int)lastPosition - 2; lastDiscrepancy += discrepancy; lastPosition = (uint)srcFS.Position; - nextTimestamp = true; + nextID = true; unreadBytes += discrepancy; } }