From 1c5bdd1d4ae8a288e7a2e68a5d110996ef6cf325 Mon Sep 17 00:00:00 2001 From: Yetrina Battad Date: Tue, 11 Jun 2024 14:51:35 +1000 Subject: [PATCH] fix: normalize note paragraphs and HTML wrapping --- app/models/solr_document.rb | 40 ++++++++++++++++++--- spec/models/solr_document_spec.rb | 58 ++++++++++++++++++++++++++++--- 2 files changed, 89 insertions(+), 9 deletions(-) diff --git a/app/models/solr_document.rb b/app/models/solr_document.rb index a1c258c2..32a896cf 100644 --- a/app/models/solr_document.rb +++ b/app/models/solr_document.rb @@ -51,12 +51,42 @@ def extents_information end def extract_notes_by_header(header) + filtered_notes = filter_notes_by_header(header) + if filtered_notes.present? + wrap_paragraphs_in_html(parse_note_paragraphs(filtered_notes)) + end + end + + private + + def filter_notes_by_header(header) # compares against the parameterized value of the header to ignore case and punctuation - # rubocop:disable Rails/OutputSafety notes.select { |note| JSON.parse(note)["head"].parameterize == I18n.t("ead_notes.#{header}").parameterize } - .map { |note| JSON.parse(note)["p"] } - .flatten - .map { |para| "

#{para}

".html_safe } - # rubocop:enable Rails/OutputSafety + end + + def parse_note_paragraphs(notes) + # Sometimes note paragraphs are returned as a simple JSON string, and other times, as an array. + # Ensure we always have an array by wrapping the paragraphs in an array, then flatten the array, + # so we only return a single dimensional array of paragraphs. + notes.map! { |note| [JSON.parse(note)["p"]] } + .flatten! + end + + def wrap_paragraphs_in_html(paragraphs) + # Wraps note paragrphs in HTML `

` tags, then scrubs the HTML to remove any unknown or + # unsafe tags. + paragraphs.map! do |para| + Loofah.xml_fragment(wrap_in_paragraph(para)) + .scrub!(:strip) + .to_html + end + end + + def wrap_in_paragraph(value) + if value.start_with?("<") + value + else + ActionController::Base.helpers.content_tag(:p, value) + end end end diff --git a/spec/models/solr_document_spec.rb b/spec/models/solr_document_spec.rb index a2baf5fe..4e5ef44c 100644 --- a/spec/models/solr_document_spec.rb +++ b/spec/models/solr_document_spec.rb @@ -141,20 +141,70 @@ end it "returns an empty array" do - expect(notes_value).to eq [] + expect(notes_value).to be_nil end end context "when header literal contains punctuation" do subject(:notes_value) do - document = described_class.new( - note_json_ssm: ["{\"head\":\"Biographical / Historical\",\"p\":\"Testing\",\"audience\":\"internal\"}"] + document.extract_notes_by_header("biog_hist") + end + + let(:document) { + described_class.new( + note_json_ssm: ["{\"head\":\"Biographical / Historical\",\"p\":[\"Testing\",\"Testing 2\"],\"audience\":\"internal\"}"] ) + } - document.extract_notes_by_header("biog_hist") + it "returns the notes with the matching header" do + expect(notes_value).to eq ["

Testing

", "

Testing 2

"] + end + end + + context "when there is a single note paragraph" do + subject(:notes_value) do + document.extract_notes_by_header("references") + end + + let(:document) { + described_class.new( + note_json_ssm: ["{\"head\":\"References\",\"p\":\"Testing\",\"audience\":\"internal\"}"] + ) + } + + it "returns the notes with the matching header" do + expect(notes_value).to eq ["

Testing

"] + end + end + + context "when there are multiple note paragraphs" do + subject(:notes_value) do + document.extract_notes_by_header("references") end + let(:document) { + described_class.new( + note_json_ssm: ["{\"head\":\"References\",\"p\":[\"Testing\", \"Testing 2\"],\"audience\":\"internal\"}"] + ) + } + it "returns the notes with the matching header" do + expect(notes_value).to eq ["

Testing

", "

Testing 2

"] + end + end + + context "when note already contains HTML tags" do + subject(:notes_value) do + document.extract_notes_by_header("references") + end + + let(:document) { + described_class.new( + note_json_ssm: ["{\"head\":\"References\",\"p\":\"

Testing

\",\"audience\":\"internal\"}"] + ) + } + + it "doesn't wrap the content again" do expect(notes_value).to eq ["

Testing

"] end end