Skip to content

Commit

Permalink
squash keep rss & web content
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Yrovas committed Aug 15, 2023
1 parent e5d9f2f commit a93cf57
Show file tree
Hide file tree
Showing 13 changed files with 151 additions and 60 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ miniflux
*.rpm
*.deb
.idea
.vscode
.vscode
6 changes: 5 additions & 1 deletion internal/api/entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,11 @@ func (h *handler) fetchContent(w http.ResponseWriter, r *http.Request) {
return
}

json.OK(w, r, map[string]string{"content": entry.Content})
if err := h.store.UpdateEntryContent(entry); err != nil {
json.ServerError(w, r, err)
}

json.OK(w, r, map[string]string{"content": entry.Content, "web_content": entry.WebContent})
}

func configureFilters(builder *storage.EntryQueryBuilder, r *http.Request) {
Expand Down
7 changes: 7 additions & 0 deletions internal/database/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -752,4 +752,11 @@ var migrations = []func(tx *sql.Tx) error{
_, err = tx.Exec(sql)
return err
},
func(tx *sql.Tx) (err error) {
sql := `
ALTER TABLE entries ADD COLUMN web_content text default '';
`
_, err = tx.Exec(sql)
return err
},
}
2 changes: 2 additions & 0 deletions internal/locale/translations/en_US.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
"entry.save.title": "Save this entry",
"entry.save.completed": "Done!",
"entry.save.toast.completed": "Entry saved",
"entry.scraper.label.rss": "Show RSS Content",
"entry.scraper.title.rss": "Fetch RSS content",
"entry.scraper.label": "Download",
"entry.scraper.title": "Fetch original content",
"entry.scraper.completed": "Done!",
Expand Down
1 change: 1 addition & 0 deletions internal/model/entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type Entry struct {
CreatedAt time.Time `json:"created_at"`
ChangedAt time.Time `json:"changed_at"`
Content string `json:"content"`
WebContent string `json:"web_content,omitempty"`
Author string `json:"author"`
ShareCode string `json:"share_code"`
Starred bool `json:"starred"`
Expand Down
11 changes: 6 additions & 5 deletions internal/reader/processor/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,15 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
logger.Error(`[Processor] Unable to crawl this entry: %q => %v`, entry.URL, scraperErr)
} else if content != "" {
// We replace the entry content only if the scraper doesn't return any error.
entry.Content = content
// TODO: document change
entry.WebContent = content
}
}

rewrite.Rewriter(url, entry, feed.RewriteRules)

// The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered.
entry.Content = sanitizer.Sanitize(url, entry.Content)
entry.WebContent = sanitizer.Sanitize(url, entry.WebContent)

if entryIsNew {
intg, err := store.Integration(feed.UserID)
Expand Down Expand Up @@ -169,18 +170,18 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
}

if content != "" {
entry.Content = content
entry.WebContent = content
entry.ReadingTime = calculateReadingTime(content, user)
}

rewrite.Rewriter(url, entry, entry.Feed.RewriteRules)
entry.Content = sanitizer.Sanitize(url, entry.Content)
entry.WebContent = sanitizer.Sanitize(url, entry.WebContent)

return nil
}

func getUrlFromEntry(feed *model.Feed, entry *model.Entry) string {
var url = entry.URL
url := entry.URL
if feed.UrlRewriteRules != "" {
parts := customReplaceRuleRegex.FindStringSubmatch(feed.UrlRewriteRules)

Expand Down
38 changes: 19 additions & 19 deletions internal/reader/rewrite/rewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,55 +61,55 @@ func parseRules(rulesText string) (rules []rule) {
func applyRule(entryURL string, entry *model.Entry, rule rule) {
switch rule.name {
case "add_image_title":
entry.Content = addImageTitle(entryURL, entry.Content)
entry.WebContent = addImageTitle(entryURL, entry.WebContent)
case "add_mailto_subject":
entry.Content = addMailtoSubject(entryURL, entry.Content)
entry.WebContent = addMailtoSubject(entryURL, entry.WebContent)
case "add_dynamic_image":
entry.Content = addDynamicImage(entryURL, entry.Content)
entry.WebContent = addDynamicImage(entryURL, entry.WebContent)
case "add_youtube_video":
entry.Content = addYoutubeVideo(entryURL, entry.Content)
entry.WebContent = addYoutubeVideo(entryURL, entry.WebContent)
case "add_invidious_video":
entry.Content = addInvidiousVideo(entryURL, entry.Content)
entry.WebContent = addInvidiousVideo(entryURL, entry.WebContent)
case "add_youtube_video_using_invidious_player":
entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content)
entry.WebContent = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.WebContent)
case "add_youtube_video_from_id":
entry.Content = addYoutubeVideoFromId(entry.Content)
entry.WebContent = addYoutubeVideoFromId(entry.WebContent)
case "add_pdf_download_link":
entry.Content = addPDFLink(entryURL, entry.Content)
entry.WebContent = addPDFLink(entryURL, entry.WebContent)
case "nl2br":
entry.Content = replaceLineFeeds(entry.Content)
entry.WebContent = replaceLineFeeds(entry.WebContent)
case "convert_text_link", "convert_text_links":
entry.Content = replaceTextLinks(entry.Content)
entry.WebContent = replaceTextLinks(entry.WebContent)
case "fix_medium_images":
entry.Content = fixMediumImages(entryURL, entry.Content)
entry.WebContent = fixMediumImages(entryURL, entry.WebContent)
case "use_noscript_figure_images":
entry.Content = useNoScriptImages(entryURL, entry.Content)
entry.WebContent = useNoScriptImages(entryURL, entry.WebContent)
case "replace":
// Format: replace("search-term"|"replace-term")
if len(rule.args) >= 2 {
entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1])
entry.WebContent = replaceCustom(entry.WebContent, rule.args[0], rule.args[1])
} else {
logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule)
}
case "remove":
// Format: remove("#selector > .element, .another")
if len(rule.args) >= 1 {
entry.Content = removeCustom(entry.Content, rule.args[0])
entry.WebContent = removeCustom(entry.WebContent, rule.args[0])
} else {
logger.Debug("[Rewrite] Cannot find selector for remove rule %s", rule)
}
case "add_castopod_episode":
entry.Content = addCastopodEpisode(entryURL, entry.Content)
entry.WebContent = addCastopodEpisode(entryURL, entry.WebContent)
case "base64_decode":
if len(rule.args) >= 1 {
entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content)
entry.WebContent = applyFuncOnTextContent(entry.WebContent, rule.args[0], decodeBase64Content)
} else {
entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content)
entry.WebContent = applyFuncOnTextContent(entry.WebContent, "body", decodeBase64Content)
}
case "parse_markdown":
entry.Content = parseMarkdown(entry.Content)
entry.WebContent = parseMarkdown(entry.WebContent)
case "remove_tables":
entry.Content = removeTables(entry.Content)
entry.WebContent = removeTables(entry.WebContent)
case "remove_clickbait":
entry.Title = removeClickbait(entry.Title)
}
Expand Down
30 changes: 16 additions & 14 deletions internal/storage/entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
UPDATE
entries
SET
content=$1, reading_time=$2
content=$1, web_content=$2, reading_time=$3
WHERE
id=$3 AND user_id=$4
id=$4 AND user_id=$5
`
_, err = tx.Exec(query, entry.Content, entry.ReadingTime, entry.ID, entry.UserID)
_, err = tx.Exec(query, entry.Content, entry.WebContent, entry.ReadingTime, entry.ID, entry.UserID)
if err != nil {
tx.Rollback()
return fmt.Errorf(`store: unable to update content of entry #%d: %v`, entry.ID, err)
Expand All @@ -89,7 +89,7 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
UPDATE
entries
SET
document_vectors = setweight(to_tsvector(left(coalesce(title, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce(content, ''), 500000)), 'B')
document_vectors = setweight(to_tsvector(left(coalesce(title, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce(content, ''), 500000)), 'B') || setweight(to_tsvector(left(coalesce(web_content, ''), 500000)), 'C')
WHERE
id=$1 AND user_id=$2
`
Expand All @@ -98,7 +98,6 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
tx.Rollback()
return fmt.Errorf(`store: unable to update content of entry #%d: %v`, entry.ID, err)
}

return tx.Commit()
}

Expand All @@ -113,6 +112,7 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
comments_url,
published_at,
content,
web_content,
author,
user_id,
feed_id,
Expand All @@ -133,9 +133,10 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
$8,
$9,
$10,
$11,
now(),
setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($6, ''), 500000)), 'B'),
$11
setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($6, ''), 500000)), 'B') || setweight(to_tsvector(left(coalesce($7, ''), 500000)), 'C'),
$12
)
RETURNING
id, status
Expand All @@ -148,13 +149,13 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
entry.CommentsURL,
entry.Date,
entry.Content,
entry.WebContent,
entry.Author,
entry.UserID,
entry.FeedID,
entry.ReadingTime,
pq.Array(removeDuplicates(entry.Tags)),
).Scan(&entry.ID, &entry.Status)

if err != nil {
return fmt.Errorf(`store: unable to create entry %q (feed #%d): %v`, entry.URL, entry.FeedID, err)
}
Expand Down Expand Up @@ -183,12 +184,13 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
url=$2,
comments_url=$3,
content=$4,
author=$5,
reading_time=$6,
document_vectors = setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($4, ''), 500000)), 'B'),
tags=$10
web_content=$5,
author=$6,
reading_time=$7,
document_vectors = setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($4, ''), 500000)), 'B') || setweight(to_tsvector(left(coalesce($5, ''), 500000)), 'C'),
tags=$11
WHERE
user_id=$7 AND feed_id=$8 AND hash=$9
user_id=$8 AND feed_id=$9 AND hash=$10
RETURNING
id
`
Expand All @@ -198,14 +200,14 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
entry.URL,
entry.CommentsURL,
entry.Content,
entry.WebContent,
entry.Author,
entry.ReadingTime,
entry.UserID,
entry.FeedID,
entry.Hash,
pq.Array(removeDuplicates(entry.Tags)),
).Scan(&entry.ID)

if err != nil {
return fmt.Errorf(`store: unable to update entry %q: %v`, entry.URL, err)
}
Expand Down
3 changes: 2 additions & 1 deletion internal/storage/entry_query_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
e.author,
e.share_code,
e.content,
e.web_content,
e.status,
e.starred,
e.reading_time,
Expand Down Expand Up @@ -314,6 +315,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
&entry.Author,
&entry.ShareCode,
&entry.Content,
&entry.WebContent,
&entry.Status,
&entry.Starred,
&entry.ReadingTime,
Expand All @@ -335,7 +337,6 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
&iconID,
&tz,
)

if err != nil {
return nil, fmt.Errorf("unable to fetch entry row: %v", err)
}
Expand Down
44 changes: 29 additions & 15 deletions internal/template/templates/views/entry.html
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,17 @@ <h1 dir="auto">
</li>
<li>
<a href="#"
title="{{ t "entry.scraper.title" }}"
title="{{ if .entry.WebContent }}{{ t "entry.scraper.title.rss" }}{{ else }}{{ t "entry.scraper.title" }}{{ end }}"
data-title="{{ t "entry.scraper.title" }}"
data-title-rss="{{ t "entry.scraper.title.rss" }}"
data-label="{{ t "entry.scraper.label" }}"
data-label-rss="{{ t "entry.scraper.label.rss" }}"
data-label-loading="{{ t "entry.state.loading" }}"
data-fetch-content-entry="true"
data-current-content="{{ if .entry.WebContent }}web{{ else }}rss{{ end }}"
data-fetch-content-url="{{ route "fetchContent" "entryID" .entry.ID }}"
data-label-loading="{{ t "entry.state.loading" }}"
>{{ icon "scraper" }}<span class="icon-label">{{ t "entry.scraper.label" }}</span></a>
data-fetch-original-content-url="{{ route "fetchOriginal" "entryID" .entry.ID }}"
>{{ icon "scraper" }}<span class="icon-label">{{ if .entry.WebContent }}{{ t "entry.scraper.label.rss" }}{{ else }}{{ t "entry.scraper.label" }}{{ end }}</span></a>
</li>
{{ if .entry.CommentsURL }}
<li>
Expand Down Expand Up @@ -186,9 +192,17 @@ <h1 dir="auto">
{{ end }}
{{end}}
{{ if .user }}
{{ noescape (proxyFilter .entry.Content) }}
{{ if .entry.WebContent }}
{{ noescape (proxyFilter .entry.WebContent) }}
{{ else }}
{{ noescape (proxyFilter .entry.Content) }}
{{ end }}
{{ else }}
{{ noescape .entry.Content }}
{{ if .entry.WebContent }}
{{ noescape .entry.WebContent }}
{{ else }}
{{ noescape .entry.Content }}
{{ end }}
{{ end }}
</article>
{{ if .entry.Enclosures }}
Expand All @@ -203,11 +217,11 @@ <h1 dir="auto">
data-last-position="{{ .MediaProgression }}"
data-save-url="{{ route "saveEnclosureProgression" "enclosureID" .ID }}"
>
{{ if (and $.user (mustBeProxyfied "audio")) }}
<source src="{{ proxyURL .URL }}" type="{{ .Html5MimeType }}">
{{ else }}
<source src="{{ .URL | safeURL }}" type="{{ .Html5MimeType }}">
{{ end }}
{{ if (and $.user (mustBeProxyfied "audio")) }}
<source src="{{ proxyURL .URL }}" type="{{ .Html5MimeType }}">
{{ else }}
<source src="{{ .URL | safeURL }}" type="{{ .Html5MimeType }}">
{{ end }}
</audio>
</div>
{{ else if hasPrefix .MimeType "video/" }}
Expand All @@ -216,11 +230,11 @@ <h1 dir="auto">
data-last-position="{{ .MediaProgression }}"
data-save-url="{{ route "saveEnclosureProgression" "enclosureID" .ID }}"
>
{{ if (and $.user (mustBeProxyfied "video")) }}
<source src="{{ proxyURL .URL }}" type="{{ .Html5MimeType }}">
{{ else }}
<source src="{{ .URL | safeURL }}" type="{{ .Html5MimeType }}">
{{ end }}
{{ if (and $.user (mustBeProxyfied "video")) }}
<source src="{{ proxyURL .URL }}" type="{{ .Html5MimeType }}">
{{ else }}
<source src="{{ .URL | safeURL }}" type="{{ .Html5MimeType }}">
{{ end }}
</video>
</div>
{{ else if hasPrefix .MimeType "image/" }}
Expand Down
Loading

0 comments on commit a93cf57

Please sign in to comment.