Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Strip HTML tags from DublinCore Creator tags #2058

Merged
merged 1 commit into from
Sep 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,16 +1,30 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package rdf // import "miniflux.app/v2/internal/reader/rdf"
package dublincore // import "miniflux.app/v2/internal/reader/dublincore"

import (
"strings"

"miniflux.app/v2/internal/reader/sanitizer"
)

// DublinCoreFeedElement represents Dublin Core feed XML elements.
type DublinCoreFeedElement struct {
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"`
}

// DublinCoreEntryElement represents Dublin Core entry XML elements.
type DublinCoreEntryElement struct {
func (feed *DublinCoreFeedElement) GetSanitizedCreator() string {
return strings.TrimSpace(sanitizer.StripTags(feed.DublinCoreCreator))
}

// DublinCoreItemElement represents Dublin Core entry XML elements.
type DublinCoreItemElement struct {
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
}

func (item *DublinCoreItemElement) GetSanitizedCreator() string {
return strings.TrimSpace(sanitizer.StripTags(item.DublinCoreCreator))
}
28 changes: 28 additions & 0 deletions internal/reader/rdf/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,34 @@ func TestParseItemWithDublicCoreDate(t *testing.T) {
}
}

func TestParseItemWithEncodedHTMLInDCCreatorField(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
<channel>
<title>Example</title>
<link>http://example.org</link>
</channel>

<item>
<title>Title</title>
<description>Test</description>
<link>http://example.org/test.html</link>
<dc:creator>&lt;a href=&quot;http://example.org/author1&quot;>Author 1&lt;/a&gt; (University 1), &lt;a href=&quot;http://example.org/author2&quot;>Author 2&lt;/a&gt; (University 2)</dc:creator>
<dc:date>2018-04-10T05:00:00+00:00</dc:date>
</item>
</rdf:RDF>`

feed, err := Parse("http://example.org", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}

expectedAuthor := "Author 1 (University 1), Author 2 (University 2)"
if feed.Entries[0].Author != expectedAuthor {
t.Errorf("Incorrect entry author, got: %s, want: %s", feed.Entries[0].Author, expectedAuthor)
}
}

func TestParseItemWithoutDate(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
Expand Down
9 changes: 5 additions & 4 deletions internal/reader/rdf/rdf.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"miniflux.app/v2/internal/logger"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/date"
"miniflux.app/v2/internal/reader/dublincore"
"miniflux.app/v2/internal/reader/sanitizer"
"miniflux.app/v2/internal/urllib"
)
Expand All @@ -22,7 +23,7 @@ type rdfFeed struct {
Title string `xml:"channel>title"`
Link string `xml:"channel>link"`
Items []rdfItem `xml:"item"`
DublinCoreFeedElement
dublincore.DublinCoreFeedElement
}

func (r *rdfFeed) Transform(baseURL string) *model.Feed {
Expand All @@ -38,7 +39,7 @@ func (r *rdfFeed) Transform(baseURL string) *model.Feed {
for _, item := range r.Items {
entry := item.Transform()
if entry.Author == "" && r.DublinCoreCreator != "" {
entry.Author = strings.TrimSpace(r.DublinCoreCreator)
entry.Author = r.GetSanitizedCreator()
}

if entry.URL == "" {
Expand All @@ -60,7 +61,7 @@ type rdfItem struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
DublinCoreEntryElement
dublincore.DublinCoreItemElement
}

func (r *rdfItem) Transform() *model.Entry {
Expand Down Expand Up @@ -88,7 +89,7 @@ func (r *rdfItem) entryContent() string {
}

func (r *rdfItem) entryAuthor() string {
return strings.TrimSpace(r.DublinCoreCreator)
return r.GetSanitizedCreator()
}

func (r *rdfItem) entryURL() string {
Expand Down
11 changes: 0 additions & 11 deletions internal/reader/rss/dublincore.go

This file was deleted.

5 changes: 3 additions & 2 deletions internal/reader/rss/rss.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"miniflux.app/v2/internal/logger"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/date"
"miniflux.app/v2/internal/reader/dublincore"
"miniflux.app/v2/internal/reader/media"
"miniflux.app/v2/internal/reader/sanitizer"
"miniflux.app/v2/internal/urllib"
Expand Down Expand Up @@ -182,7 +183,7 @@ type rssItem struct {
CommentLinks []rssCommentLink `xml:"comments"`
EnclosureLinks []rssEnclosure `xml:"enclosure"`
Categories []rssCategory `xml:"category"`
DublinCoreElement
dublincore.DublinCoreItemElement
FeedBurnerElement
PodcastEntryElement
media.Element
Expand Down Expand Up @@ -250,7 +251,7 @@ func (r *rssItem) entryAuthor() string {
}

if author == "" {
author = r.DublinCoreCreator
author = r.GetSanitizedCreator()
}

return sanitizer.StripTags(strings.TrimSpace(author))
Expand Down
Loading