Skip to content

Commit

Permalink
Merge pull request #17314 from brave/feed-item-parse-cleanup
Browse files Browse the repository at this point in the history
[brave-today] Feed item parsing clean up
  • Loading branch information
cdesouza-chromium authored Mar 2, 2023
2 parents 0169e0a + 180be60 commit b31c48f
Show file tree
Hide file tree
Showing 11 changed files with 234 additions and 171 deletions.
12 changes: 12 additions & 0 deletions components/brave_news/api/BUILD.gn
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (c) 2023 The Brave Authors. All rights reserved.
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at https://mozilla.org/MPL/2.0/.

import("//tools/json_schema_compiler/json_schema_api.gni")

generated_types("api") {
sources = [ "combined_feed.idl" ]

root_namespace = "brave_news::api::%(namespace)s"
}
50 changes: 50 additions & 0 deletions components/brave_news/api/combined_feed.idl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright (c) 2023 The Brave Authors. All rights reserved.
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.

// Schema types used by brave today feed.

[generate_error_messages]
namespace combined_feed {

// A dictionary with fields representing a feed item.
dictionary Item {
// A url for the item.
DOMString url;

// A string with a url for an image used for padding.
DOMString padded_img;

// The id code for the publisher.
DOMString publisher_id;

// The category the feed belongs to.
DOMString category;

// The title of the feed.
DOMString title;

// A description field for the feed item to be shown.
DOMString description;

// The name of the publisher of the item.
DOMString publisher_name;

// The score of the item. Should be logged when not present.
double? score;

// A publish date/time string in the format YYYY-MM-DD HH:MM:SS.
DOMString publish_time;

// The type of content of this feed.
DOMString content_type;

// An id for the creative instance.
DOMString? creative_instance_id;

// An offers category field.
DOMString? offers_category;
};

};
5 changes: 3 additions & 2 deletions components/brave_news/browser/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ static_library("browser") {
"brave_news_p3a.h",
"channels_controller.cc",
"channels_controller.h",
"combined_feed_parsing.cc",
"combined_feed_parsing.h",
"direct_feed_controller.cc",
"direct_feed_controller.h",
"feed_building.cc",
"feed_building.h",
"feed_controller.cc",
"feed_controller.h",
"feed_parsing.cc",
"feed_parsing.h",
"html_parsing.cc",
"html_parsing.h",
"locales_helper.cc",
Expand All @@ -44,6 +44,7 @@ static_library("browser") {
"//base",
"//brave/components/api_request_helper",
"//brave/components/brave_ads/browser",
"//brave/components/brave_news/api",
"//brave/components/brave_news/common",
"//brave/components/brave_news/common:mojom",
"//brave/components/brave_private_cdn",
Expand Down
148 changes: 148 additions & 0 deletions components/brave_news/browser/combined_feed_parsing.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// Copyright (c) 2021 The Brave Authors. All rights reserved.
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.

#include "brave/components/brave_news/browser/combined_feed_parsing.h"

#include <string>
#include <utility>
#include <vector>

#include "base/logging.h"
#include "base/notreached.h"
#include "base/strings/strcat.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "base/types/expected.h"
#include "brave/components/brave_news/api/combined_feed.h"
#include "brave/components/brave_news/common/brave_news.mojom-forward.h"
#include "brave/components/brave_news/common/brave_news.mojom-shared.h"
#include "brave/components/brave_news/common/brave_news.mojom.h"
#include "third_party/abseil-cpp/absl/types/optional.h"
#include "ui/base/l10n/time_format.h"
#include "url/gurl.h"

namespace brave_news {

namespace {

base::expected<mojom::FeedItemPtr, std::string> ParseFeedItem(
const base::Value& value) {
std::u16string error;
auto parsed_feed_item = api::combined_feed::Item::FromValue(value, &error);
if (!parsed_feed_item) {
return base::unexpected(base::StrCat(
{"Failed to parse feed item. ", base::UTF16ToASCII(error)}));
}
api::combined_feed::Item& feed_item = *parsed_feed_item;

auto url = GURL(feed_item.url);
if (url.is_empty() || !url.has_host()) {
return base::unexpected(base::StrCat(
{"Found feed item with an invalid url value. title=", feed_item.title,
", url=", feed_item.url}));
}

if (!url.SchemeIsHTTPOrHTTPS()) {
return base::unexpected(
base::StrCat({"Item url was not HTTP or HTTPS: url=", url.spec()}));
}

if (feed_item.padded_img.empty()) {
return base::unexpected(base::StrCat(
{"Found feed item with missing image. url=", feed_item.url}));
}

if (feed_item.publisher_id.empty()) {
return base::unexpected(base::StrCat(
{"Found feed item with missing publisher id. url=", feed_item.url}));
}

if (feed_item.title.empty()) {
return base::unexpected(base::StrCat(
{"Found feed item with missing title. url=", feed_item.url}));
}

if (!feed_item.score.has_value()) {
// We just warn, as this is an optional field.
VLOG(1) << "Item was missing score: " << feed_item.url;
}

auto metadata = mojom::FeedItemMetadata::New();
metadata->category_name = feed_item.category;
metadata->title = feed_item.title;
metadata->description = feed_item.description;
metadata->publisher_id = feed_item.publisher_id;
metadata->publisher_name = feed_item.publisher_name;
metadata->image = mojom::Image::NewPaddedImageUrl(GURL(feed_item.padded_img));
metadata->url = std::move(url);

// Further weight according to history
metadata->score = feed_item.score.value_or(20.0);

// Extract time
if (!base::Time::FromUTCString(feed_item.publish_time.c_str(),
&metadata->publish_time)) {
VLOG(1) << "Bad time string for feed item: " << feed_item.publish_time;
} else {
// Successful, get language-specific relative time
base::TimeDelta relative_time_delta =
base::Time::Now() - metadata->publish_time;
metadata->relative_time_description =
base::UTF16ToUTF8(ui::TimeFormat::Simple(
ui::TimeFormat::Format::FORMAT_ELAPSED,
ui::TimeFormat::Length::LENGTH_LONG, relative_time_delta));
}
// Detect type
if (feed_item.content_type == "brave_partner") {
if (!feed_item.creative_instance_id ||
feed_item.creative_instance_id->empty()) {
return base::unexpected(
base::StrCat({"Promoted item has empty creative_instance_id. url=",
feed_item.url}));
}

auto item = mojom::PromotedArticle::New();
item->creative_instance_id = *feed_item.creative_instance_id;
item->data = std::move(metadata);
return mojom::FeedItem::NewPromotedArticle(std::move(item));
} else if (feed_item.content_type == "product") {
auto item = mojom::Deal::New();
if (feed_item.offers_category) {
item->offers_category = *feed_item.offers_category;
}
item->data = std::move(metadata);
return mojom::FeedItem::NewDeal(std::move(item));
} else if (feed_item.content_type == "article") {
auto item = mojom::Article::New();
item->data = std::move(metadata);
return mojom::FeedItem::NewArticle(std::move(item));
}

// An unknown content_type could be something introduced for future use.
return base::unexpected(
base::StrCat({"Feed item of unknown content type. content_type=",
feed_item.content_type}));
}

} // namespace

std::vector<mojom::FeedItemPtr> ParseFeedItems(const base::Value& value) {
std::vector<mojom::FeedItemPtr> items;
if (!value.is_list()) {
NOTREACHED();
return items;
}
for (const base::Value& feed_item : value.GetList()) {
auto item = ParseFeedItem(feed_item);
if (item.has_value()) {
items.push_back(std::move(*item));
} else {
VLOG(1) << item.error();
}
}
return items;
}

} // namespace brave_news
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.

#ifndef BRAVE_COMPONENTS_BRAVE_NEWS_BROWSER_FEED_PARSING_H_
#define BRAVE_COMPONENTS_BRAVE_NEWS_BROWSER_FEED_PARSING_H_
#ifndef BRAVE_COMPONENTS_BRAVE_NEWS_BROWSER_COMBINED_FEED_PARSING_H_
#define BRAVE_COMPONENTS_BRAVE_NEWS_BROWSER_COMBINED_FEED_PARSING_H_

#include <string>
#include <vector>
Expand All @@ -15,12 +15,8 @@
namespace brave_news {

// Convert from the "combined feed" hosted remotely to Brave News mojom items.
// TODO(petemill): Rename this file to combined_feed_parsing.h or similar,
// in order to differentiate the "Combined Feed" from
// a "Direct Feed" (a.k.a RSS).
bool ParseFeedItems(const base::Value& json_value,
std::vector<mojom::FeedItemPtr>* feed_items);
std::vector<mojom::FeedItemPtr> ParseFeedItems(const base::Value& value);

} // namespace brave_news

#endif // BRAVE_COMPONENTS_BRAVE_NEWS_BROWSER_FEED_PARSING_H_
#endif // BRAVE_COMPONENTS_BRAVE_NEWS_BROWSER_COMBINED_FEED_PARSING_H_
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include <vector>

#include "base/values.h"
#include "brave/components/brave_news/browser/feed_parsing.h"
#include "brave/components/brave_news/browser/combined_feed_parsing.h"
#include "brave/components/brave_news/common/brave_news.mojom-shared.h"
#include "brave/components/brave_news/common/brave_news.mojom.h"
#include "testing/gtest/include/gtest/gtest.h"
Expand All @@ -34,31 +34,29 @@ base::Value GetItem(std::string url) {

} // namespace

TEST(BraveNewsFeedParsing, Success) {
TEST(BraveNewCombinedFeedParsing, Success) {
// Create an entry which should be valid as a Brave News item
auto item = GetItem("https://www.hello.com");
base::Value::List list;
list.Append(std::move(item));
base::Value json_value = base::Value(std::move(list));

std::vector<mojom::FeedItemPtr> feed_items;

EXPECT_TRUE(ParseFeedItems(std::move(json_value), &feed_items));
std::vector<mojom::FeedItemPtr> feed_items =
ParseFeedItems(std::move(json_value));
// The single item should be successfully parsed to a FeedItem
EXPECT_EQ(feed_items.size(), 1u);
}

TEST(BraveNewsFeedParsing, FailBadProtocol) {
TEST(BraveNewCombinedFeedParsing, FailBadProtocol) {
// Create an entry which should be invalid as a Brave News item
// A chrome: protocol should not be allowed
auto item = GetItem("chrome://settings");
base::Value::List list;
list.Append(std::move(item));
base::Value json_value = base::Value(std::move(list));

std::vector<mojom::FeedItemPtr> feed_items;

EXPECT_TRUE(ParseFeedItems(std::move(json_value), &feed_items));
std::vector<mojom::FeedItemPtr> feed_items =
ParseFeedItems(std::move(json_value));
// The single item should not be parsed to a FeedItem
EXPECT_EQ(feed_items.size(), 0u);
}
Expand Down
1 change: 0 additions & 1 deletion components/brave_news/browser/feed_building.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include "base/strings/stringprintf.h"
#include "base/time/time.h"
#include "brave/components/brave_news/browser/channels_controller.h"
#include "brave/components/brave_news/browser/feed_parsing.h"
#include "brave/components/brave_news/browser/urls.h"
#include "brave/components/brave_news/common/brave_news.mojom-forward.h"
#include "brave/components/brave_news/common/brave_news.mojom-shared.h"
Expand Down
15 changes: 6 additions & 9 deletions components/brave_news/browser/feed_building_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
#include <utility>
#include <vector>

#include "base/containers/extend.h"
#include "base/containers/flat_map.h"
#include "base/test/scoped_feature_list.h"
#include "base/test/values_test_util.h"
#include "base/time/time.h"
#include "base/values.h"
#include "brave/components/brave_news/browser/channels_controller.h"
#include "brave/components/brave_news/browser/combined_feed_parsing.h"
#include "brave/components/brave_news/browser/feed_building.h"
#include "brave/components/brave_news/browser/feed_parsing.h"
#include "brave/components/brave_news/common/brave_news.mojom-shared.h"
#include "brave/components/brave_news/common/brave_news.mojom.h"
#include "brave/components/brave_news/common/features.h"
Expand Down Expand Up @@ -189,8 +190,7 @@ TEST_F(BraveNewsFeedBuildingTest, BuildFeedV1) {

std::unordered_set<std::string> history_hosts = {"www.espn.com"};

std::vector<mojom::FeedItemPtr> feed_items;
ParseFeedItems(GetFeedJson(), &feed_items);
std::vector<mojom::FeedItemPtr> feed_items = ParseFeedItems(GetFeedJson());

mojom::Feed feed;

Expand Down Expand Up @@ -233,8 +233,7 @@ TEST_F(BraveNewsFeedBuildingTest, BuildFeedV2) {

std::unordered_set<std::string> history_hosts = {"www.espn.com"};

std::vector<mojom::FeedItemPtr> feed_items;
ParseFeedItems(GetFeedJson(), &feed_items);
std::vector<mojom::FeedItemPtr> feed_items = ParseFeedItems(GetFeedJson());

mojom::Feed feed;

Expand Down Expand Up @@ -465,11 +464,9 @@ TEST_F(BraveNewsFeedBuildingTest, DuplicateItemsAreNotIncluded) {

std::unordered_set<std::string> history_hosts = {"www.espn.com"};

std::vector<mojom::FeedItemPtr> feed_items;

// Parse the feed items twice so we get two copies of everything.
ParseFeedItems(GetFeedJson(), &feed_items);
ParseFeedItems(GetFeedJson(), &feed_items);
std::vector<mojom::FeedItemPtr> feed_items = ParseFeedItems(GetFeedJson());
base::Extend(feed_items, ParseFeedItems(GetFeedJson()));

mojom::Feed feed;

Expand Down
7 changes: 3 additions & 4 deletions components/brave_news/browser/feed_controller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
#include "base/strings/string_util.h"
#include "brave/components/api_request_helper/api_request_helper.h"
#include "brave/components/brave_news/browser/channels_controller.h"
#include "brave/components/brave_news/browser/combined_feed_parsing.h"
#include "brave/components/brave_news/browser/direct_feed_controller.h"
#include "brave/components/brave_news/browser/feed_building.h"
#include "brave/components/brave_news/browser/feed_parsing.h"
#include "brave/components/brave_news/browser/locales_helper.h"
#include "brave/components/brave_news/browser/publishers_controller.h"
#include "brave/components/brave_news/browser/urls.h"
Expand Down Expand Up @@ -355,9 +355,8 @@ void FeedController::FetchCombinedFeed(GetFeedItemsCallback callback) {
// Only mark cache time of remote request if
// parsing was successful
controller->locale_feed_etags_[locale] = etag;
FeedItems feed_items;
ParseFeedItems(api_request_result.value_body(), &feed_items);
std::move(callback).Run(std::move(feed_items));
std::move(callback).Run(
ParseFeedItems(api_request_result.value_body()));
},
base::Unretained(controller), locale, locales_fetched_callback);
// Send the request
Expand Down
Loading

0 comments on commit b31c48f

Please sign in to comment.