Skip to content

Commit

Permalink
Decode HTML entities when reading PuzzleMe text.
Browse files Browse the repository at this point in the history
A recent indie puzzle has instances of entities like `"` and
`'`. These are valid HTML, but unnecessary. So the simplest way to
handle these is to decode any valid HTML entities before re-encoding the
essential ones, & and <.
  • Loading branch information
jpd236 committed Dec 8, 2023
1 parent 2ae6be2 commit d72f05f
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -335,12 +335,12 @@ class PuzzleMe(val json: String) : DelegatingPuzzleable() {
/**
* Convert a PuzzleMe JSON string to HTML.
*
* PuzzleMe mixes unescaped special XML characters (&, <) with HTML tags. This method escapes the special
* characters while leaving supported HTML tags untouched. <br> tags are replaced with newlines. Attributes
* are stripped out.
* PuzzleMe mixes unescaped special XML characters (&, <) with HTML tags. Sometimes there are real HTML escape
* sequences as well. This method escapes the special characters while leaving supported HTML tags untouched.
* <br> tags are replaced with newlines. Attributes are stripped out.
*/
internal fun toHtml(clue: String): String {
return clue
return Encodings.decodeHtmlEntities(clue)
.replace("&", "&amp;")
.replace("\\s*<br/?>\\s*".toRegex(RegexOption.IGNORE_CASE), "\n")
// Strip other unsupported tags.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import kotlin.test.assertTrue
import kotlin.test.fail

class PuzzleMeTest {

@Test
fun extractPuzzleJson() = runTest {
assertEquals(
Expand Down Expand Up @@ -169,4 +170,11 @@ class PuzzleMeTest {
"test link and <b>test bold link</b>",
PuzzleMe.toHtml("<a href=\"abc\">test link</a> and <a href=\"def\"><b>test bold link</b></a>")
)

@Test
fun toHtml_htmlEntities() =
assertEquals(
"Test with \"Quotes\" ' &lt; &amp; &amp;",
PuzzleMe.toHtml("Test with &#34;Quotes&#34; &#39; < & &amp;")
)
}

0 comments on commit d72f05f

Please sign in to comment.