From 9ed101db33292bc801767134f783fca9a06c7862 Mon Sep 17 00:00:00 2001 From: Darnell Andries Date: Thu, 1 Aug 2024 13:40:55 -0700 Subject: [PATCH] Add Web Discovery content scraper, payload generator and privacy guard --- browser/brave_tab_helpers.cc | 19 + browser/sources.gni | 1 + .../chrome_content_renderer_client.cc | 13 + components/web_discovery/browser/BUILD.gn | 47 + components/web_discovery/browser/DEPS | 2 + .../web_discovery/browser/content_scraper.cc | 381 +++++ .../web_discovery/browser/content_scraper.h | 119 ++ .../browser/content_scraper_browsertest.cc | 283 ++++ .../document_extractor/rust/.gitignore | 2 + .../browser/document_extractor/rust/BUILD.gn | 24 + .../document_extractor/rust/Cargo.lock | 767 +++++++++ .../document_extractor/rust/Cargo.toml | 15 + .../document_extractor/rust/src/lib.rs | 124 ++ .../web_discovery/browser/hash_detection.cc | 1489 +++++++++++++++++ .../web_discovery/browser/hash_detection.h | 24 + .../browser/hash_detection_unittest.cc | 29 + .../browser/payload_generator.cc | 208 +++ .../web_discovery/browser/payload_generator.h | 38 + .../browser/payload_generator_unittest.cc | 283 ++++ .../web_discovery/browser/privacy_guard.cc | 259 +++ .../web_discovery/browser/privacy_guard.h | 44 + .../browser/privacy_guard_unittest.cc | 169 ++ .../web_discovery/browser/regex_util.cc | 121 ++ components/web_discovery/browser/regex_util.h | 50 + .../browser/web_discovery_service.cc | 61 +- .../browser/web_discovery_service.h | 18 + .../browser/web_discovery_tab_helper.cc | 45 + .../browser/web_discovery_tab_helper.h | 47 + components/web_discovery/common/BUILD.gn | 8 + .../web_discovery/common/web_discovery.mojom | 36 + components/web_discovery/renderer/BUILD.gn | 24 + components/web_discovery/renderer/DEPS | 5 + .../renderer/blink_document_extractor.cc | 94 ++ .../renderer/blink_document_extractor.h | 47 + renderer/sources.gni | 8 + script/brave_license_helper.py | 2 +- test/BUILD.gn | 4 + .../rust/chromium_crates_io/Cargo.lock | 10 + .../rust/chromium_crates_io/Cargo.toml | 5 + .../document_extractor/v0_1/README.chromium | 8 + 40 files changed, 4931 insertions(+), 2 deletions(-) create mode 100644 components/web_discovery/browser/content_scraper.cc create mode 100644 components/web_discovery/browser/content_scraper.h create mode 100644 components/web_discovery/browser/content_scraper_browsertest.cc create mode 100644 components/web_discovery/browser/document_extractor/rust/.gitignore create mode 100644 components/web_discovery/browser/document_extractor/rust/BUILD.gn create mode 100644 components/web_discovery/browser/document_extractor/rust/Cargo.lock create mode 100644 components/web_discovery/browser/document_extractor/rust/Cargo.toml create mode 100644 components/web_discovery/browser/document_extractor/rust/src/lib.rs create mode 100644 components/web_discovery/browser/hash_detection.cc create mode 100644 components/web_discovery/browser/hash_detection.h create mode 100644 components/web_discovery/browser/hash_detection_unittest.cc create mode 100644 components/web_discovery/browser/payload_generator.cc create mode 100644 components/web_discovery/browser/payload_generator.h create mode 100644 components/web_discovery/browser/payload_generator_unittest.cc create mode 100644 components/web_discovery/browser/privacy_guard.cc create mode 100644 components/web_discovery/browser/privacy_guard.h create mode 100644 components/web_discovery/browser/privacy_guard_unittest.cc create mode 100644 components/web_discovery/browser/regex_util.cc create mode 100644 components/web_discovery/browser/regex_util.h create mode 100644 components/web_discovery/browser/web_discovery_tab_helper.cc create mode 100644 components/web_discovery/browser/web_discovery_tab_helper.h create mode 100644 components/web_discovery/common/web_discovery.mojom create mode 100644 components/web_discovery/renderer/BUILD.gn create mode 100644 components/web_discovery/renderer/DEPS create mode 100644 components/web_discovery/renderer/blink_document_extractor.cc create mode 100644 components/web_discovery/renderer/blink_document_extractor.h create mode 100644 third_party/rust/document_extractor/v0_1/README.chromium diff --git a/browser/brave_tab_helpers.cc b/browser/brave_tab_helpers.cc index f205805a2a57..9229089568e8 100644 --- a/browser/brave_tab_helpers.cc +++ b/browser/brave_tab_helpers.cc @@ -71,6 +71,12 @@ #include "brave/components/ai_chat/content/browser/ai_chat_tab_helper.h" #endif // BUILDFLAG(ENABLE_AI_CHAT) +#if BUILDFLAG(ENABLE_WEB_DISCOVERY_NATIVE) +#include "brave/browser/web_discovery/web_discovery_service_factory.h" +#include "brave/components/web_discovery/browser/web_discovery_tab_helper.h" +#include "brave/components/web_discovery/common/features.h" +#endif + #if BUILDFLAG(ENABLE_WIDEVINE) #include "brave/browser/brave_drm_tab_helper.h" #endif @@ -213,6 +219,19 @@ void AttachTabHelpers(content::WebContents* web_contents) { } } #endif // BUILDFLAG(ENABLE_PLAYLIST) + +#if BUILDFLAG(ENABLE_WEB_DISCOVERY_NATIVE) + if (base::FeatureList::IsEnabled( + web_discovery::features::kBraveWebDiscoveryNative)) { + auto* web_discovery_service = + web_discovery::WebDiscoveryServiceFactory::GetForBrowserContext( + web_contents->GetBrowserContext()); + if (web_discovery_service) { + web_discovery::WebDiscoveryTabHelper::CreateForWebContents( + web_contents, web_discovery_service); + } + } +#endif } } // namespace brave diff --git a/browser/sources.gni b/browser/sources.gni index b7fbdf0d1b77..35e3f801d0df 100644 --- a/browser/sources.gni +++ b/browser/sources.gni @@ -364,6 +364,7 @@ if (enable_web_discovery_native) { brave_chrome_browser_deps += [ "//brave/browser/web_discovery", "//brave/components/web_discovery/browser", + "//brave/components/web_discovery/browser:tab_helper", "//brave/components/web_discovery/common", ] } diff --git a/chromium_src/chrome/renderer/chrome_content_renderer_client.cc b/chromium_src/chrome/renderer/chrome_content_renderer_client.cc index 33e73297a60f..74156b707978 100644 --- a/chromium_src/chrome/renderer/chrome_content_renderer_client.cc +++ b/chromium_src/chrome/renderer/chrome_content_renderer_client.cc @@ -7,6 +7,7 @@ #include "brave/components/ai_chat/core/common/buildflags/buildflags.h" #include "brave/components/ai_rewriter/common/buildflags/buildflags.h" #include "brave/components/content_settings/renderer/brave_content_settings_agent_impl.h" +#include "brave/components/web_discovery/common/buildflags/buildflags.h" #include "chrome/common/chrome_isolated_world_ids.h" #include "chrome/renderer/process_state.h" #include "components/dom_distiller/content/renderer/distillability_agent.h" @@ -23,6 +24,11 @@ #include "brave/components/ai_rewriter/renderer/ai_rewriter_agent.h" #endif +#if BUILDFLAG(ENABLE_WEB_DISCOVERY_NATIVE) +#include "brave/components/web_discovery/common/features.h" +#include "brave/components/web_discovery/renderer/blink_document_extractor.h" +#endif + namespace { void RenderFrameWithBinderRegistryCreated( @@ -42,6 +48,13 @@ void RenderFrameWithBinderRegistryCreated( new ai_rewriter::AIRewriterAgent(render_frame, registry); } #endif + +#if BUILDFLAG(ENABLE_WEB_DISCOVERY_NATIVE) + if (base::FeatureList::IsEnabled( + web_discovery::features::kBraveWebDiscoveryNative)) { + new web_discovery::BlinkDocumentExtractor(render_frame, registry); + } +#endif } } // namespace diff --git a/components/web_discovery/browser/BUILD.gn b/components/web_discovery/browser/BUILD.gn index e315bcd16884..406029f346ac 100644 --- a/components/web_discovery/browser/BUILD.gn +++ b/components/web_discovery/browser/BUILD.gn @@ -12,12 +12,22 @@ component("browser") { sources = [ "background_credential_helper.cc", "background_credential_helper.h", + "content_scraper.cc", + "content_scraper.h", "credential_manager.cc", "credential_manager.h", "credential_signer.h", + "hash_detection.cc", + "hash_detection.h", "patterns.cc", "patterns.h", + "payload_generator.cc", + "payload_generator.h", "pref_names.h", + "privacy_guard.cc", + "privacy_guard.h", + "regex_util.cc", + "regex_util.h", "rsa.cc", "rsa.h", "server_config_loader.cc", @@ -29,10 +39,12 @@ component("browser") { ] deps = [ "anonymous_credentials/rust:rust_lib", + "document_extractor/rust:rust_lib", "//base", "//brave/brave_domains", "//brave/components/constants", "//brave/components/web_discovery/common", + "//brave/components/web_discovery/common:mojom", "//components/keyed_service/core", "//components/prefs", "//crypto", @@ -46,11 +58,28 @@ component("browser") { ] } +component("tab_helper") { + output_name = "web_discovery_tab_helper" + sources = [ + "web_discovery_tab_helper.cc", + "web_discovery_tab_helper.h", + ] + deps = [ + "//base", + "//brave/components/web_discovery/browser", + "//content/public/browser", + "//services/service_manager/public/cpp", + ] +} + source_set("unit_tests") { testonly = true sources = [ "credential_manager_unittest.cc", + "hash_detection_unittest.cc", "patterns_unittest.cc", + "payload_generator_unittest.cc", + "privacy_guard_unittest.cc", "server_config_loader_unittest.cc", ] deps = [ @@ -64,3 +93,21 @@ source_set("unit_tests") { "//testing/gtest", ] } + +source_set("browser_tests") { + testonly = true + sources = [ "content_scraper_browsertest.cc" ] + deps = [ + ":browser", + "//base/test:test_support", + "//brave/components/constants", + "//brave/components/web_discovery/common", + "//brave/components/web_discovery/common:mojom", + "//chrome/test:test_support", + "//content/test:test_support", + "//net:test_support", + "//services/service_manager/public/cpp", + "//testing/gtest", + ] + defines = [ "HAS_OUT_OF_PROC_TEST_RUNNER" ] +} diff --git a/components/web_discovery/browser/DEPS b/components/web_discovery/browser/DEPS index 1c0aaedc075e..fec5d49105b7 100644 --- a/components/web_discovery/browser/DEPS +++ b/components/web_discovery/browser/DEPS @@ -1,9 +1,11 @@ include_rules = [ "-content", "+services/network/public", + "+content/public/browser", "+extensions/buildflags/buildflags.h", "+services/network/test", "+services/service_manager/public/cpp", + "+third_party/boringssl/src/include", "+third_party/re2", "+third_party/zlib", ] diff --git a/components/web_discovery/browser/content_scraper.cc b/components/web_discovery/browser/content_scraper.cc new file mode 100644 index 000000000000..d1a4887f0b60 --- /dev/null +++ b/components/web_discovery/browser/content_scraper.cc @@ -0,0 +1,381 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#include "brave/components/web_discovery/browser/content_scraper.h" + +#include + +#include "base/json/json_reader.h" +#include "base/json/json_writer.h" +#include "base/ranges/algorithm.h" +#include "base/strings/string_split.h" +#include "base/task/thread_pool.h" +#include "brave/components/web_discovery/browser/patterns.h" +#include "brave/components/web_discovery/browser/privacy_guard.h" +#include "brave/components/web_discovery/browser/util.h" + +namespace web_discovery { + +namespace { + +constexpr char kUrlAttrId[] = "url"; +constexpr char kCountryCodeAttrId[] = "ctry"; +constexpr char kFieldsValueKey[] = "fields"; +constexpr char kIdValueKey[] = "id"; +constexpr char kUrlValueKey[] = "url"; + +constexpr char kRefineSplitFuncId[] = "splitF"; +constexpr char kRefineMaskURLFuncId[] = "maskU"; +constexpr char kRefineParseURLFuncId[] = "parseU"; +constexpr char kRefineJsonExtractFuncId[] = "json"; + +constexpr char kParseURLQueryExtractType[] = "qs"; + +std::string RefineSplit(const std::string& value, + const std::string& delimiter, + int index) { + auto split = base::SplitStringUsingSubstr( + value, delimiter, base::WhitespaceHandling::KEEP_WHITESPACE, + base::SPLIT_WANT_ALL); + std::string encoded_result; + if (index < 0 || static_cast(index) >= split.size()) { + encoded_result = value; + } else { + encoded_result = split[index]; + } + return DecodeURLComponent(encoded_result); +} + +std::optional RefineParseURL(const std::string& value, + const std::string& extract_type, + const std::string& key) { + if (extract_type != kParseURLQueryExtractType) { + return std::nullopt; + } + GURL url(value); + if (!url.is_valid() || !url.has_query()) { + return std::nullopt; + } + auto query_value = ExtractValueFromQueryString(url.query_piece(), key); + return query_value; +} + +std::optional RefineJsonExtract(const std::string& value, + const std::string& path, + bool extract_objects) { + auto parsed = base::JSONReader::Read(value); + if (!parsed || !parsed->is_dict()) { + return std::nullopt; + } + const auto* found_value = parsed->GetDict().FindByDottedPath(path); + if (!found_value) { + return std::nullopt; + } + if (found_value->is_string()) { + return found_value->GetString(); + } + if ((found_value->is_dict() || found_value->is_list()) && !extract_objects) { + return std::nullopt; + } + std::string encoded_value; + if (!base::JSONWriter::Write(*found_value, &encoded_value)) { + return std::nullopt; + } + return encoded_value; +} + +} // namespace + +PageScrapeResult::PageScrapeResult(GURL url, std::string id) + : url(url), id(id) {} +PageScrapeResult::~PageScrapeResult() = default; + +ContentScraper::ContentScraper(const ServerConfigLoader* server_config_loader, + RegexUtil* regex_util) + : sequenced_task_runner_(base::ThreadPool::CreateSequencedTaskRunner({})), + server_config_loader_(server_config_loader), + regex_util_(regex_util) {} + +ContentScraper::~ContentScraper() = default; + +base::Value PageScrapeResult::SerializeToValue() { + base::Value::Dict result; + base::Value::Dict fields_dict; + + for (const auto& [root_selector, inner_fields] : fields) { + base::Value::List list; + for (const auto& values : inner_fields) { + list.Append(values.Clone()); + } + fields_dict.Set(root_selector, std::move(list)); + } + + result.Set(kFieldsValueKey, std::move(fields_dict)); + result.Set(kIdValueKey, id); + result.Set(kUrlValueKey, url.spec()); + return base::Value(std::move(result)); +} + +std::unique_ptr PageScrapeResult::FromValue( + const base::Value& value) { + if (!value.is_dict()) { + return nullptr; + } + const auto& dict = value.GetDict(); + const auto* fields_dict = dict.FindDict(kFieldsValueKey); + const auto* id = dict.FindString(kIdValueKey); + const auto* url = dict.FindString(kUrlValueKey); + + if (!fields_dict || !id || !url) { + return nullptr; + } + + auto result = std::make_unique(GURL(*url), *id); + for (const auto [root_selector, inner_fields_val] : *fields_dict) { + const auto* inner_fields_list = inner_fields_val.GetIfList(); + if (!inner_fields_list) { + continue; + } + for (const auto& values : *inner_fields_list) { + const auto* values_dict = values.GetIfDict(); + if (!values_dict) { + continue; + } + result->fields[root_selector].push_back(values_dict->Clone()); + } + } + + return result; +} + +void ContentScraper::ScrapePage(const GURL& url, + bool is_strict_scrape, + mojom::DocumentExtractor* document_extractor, + PageScrapeResultCallback callback) { + const auto* url_details = + server_config_loader_->GetLastPatterns().GetMatchingURLPattern( + url, is_strict_scrape); + if (!url_details) { + return; + } + auto interim_result = + std::make_unique(url, url_details->id); + + std::vector select_requests; + for (const auto& [selector, group] : url_details->scrape_rule_groups) { + auto select_request = mojom::SelectRequest::New(); + select_request->root_selector = selector; + for (const auto& [report_key, rule] : group) { + if (rule->rule_type == ScrapeRuleType::kStandard) { + ProcessStandardRule(report_key, *rule, selector, url, + interim_result.get()); + continue; + } + auto attribute_request = mojom::SelectAttributeRequest::New(); + attribute_request->sub_selector = rule->sub_selector; + attribute_request->attribute = rule->attribute; + attribute_request->key = report_key; + + select_request->attribute_requests.push_back( + std::move(attribute_request)); + } + select_requests.push_back(std::move(select_request)); + } + + document_extractor->QueryElementAttributes( + std::move(select_requests), + base::BindOnce(&ContentScraper::OnScrapedElementAttributes, + base::Unretained(this), is_strict_scrape, + std::move(interim_result), std::move(callback))); +} + +void ContentScraper::ParseAndScrapePage( + const GURL& url, + bool is_strict_scrape, + std::unique_ptr prev_result, + std::string html, + PageScrapeResultCallback callback) { + const auto* url_details = + server_config_loader_->GetLastPatterns().GetMatchingURLPattern( + url, is_strict_scrape); + if (!url_details) { + return; + } + auto interim_result = std::move(prev_result); + + std::vector select_requests; + for (const auto& [selector, group] : url_details->scrape_rule_groups) { + rust_document_extractor::SelectRequest select_request; + select_request.root_selector = selector; + for (const auto& [report_key, rule] : group) { + if (rule->rule_type == ScrapeRuleType::kStandard) { + ProcessStandardRule(report_key, *rule, selector, url, + interim_result.get()); + continue; + } + rust_document_extractor::SelectAttributeRequest attribute_request{ + .sub_selector = rule->sub_selector.value_or(""), + .key = report_key, + .attribute = rule->attribute, + }; + select_request.attribute_requests.push_back(std::move(attribute_request)); + } + select_requests.push_back(std::move(select_request)); + } + + sequenced_task_runner_->PostTaskAndReplyWithResult( + FROM_HERE, + base::BindOnce(&rust_document_extractor::query_element_attributes, html, + select_requests), + base::BindOnce(&ContentScraper::OnRustElementAttributes, + weak_ptr_factory_.GetWeakPtr(), is_strict_scrape, + std::move(interim_result), std::move(callback))); +} + +void ContentScraper::ProcessStandardRule(const std::string& report_key, + const ScrapeRule& rule, + const std::string& root_selector, + const GURL& url, + PageScrapeResult* scrape_result) { + std::string value; + if (rule.attribute == kUrlAttrId) { + value = url.spec(); + } else if (rule.attribute == kCountryCodeAttrId) { + value = server_config_loader_->GetLastServerConfig().location; + } + auto refined_value = ExecuteRefineFunctions(rule.functions_applied, value); + if (!refined_value) { + return; + } + auto& fields = scrape_result->fields[root_selector]; + if (fields.empty()) { + fields.emplace_back(); + } + fields[0].Set(report_key, *refined_value); +} + +void ContentScraper::OnScrapedElementAttributes( + bool is_strict_scrape, + std::unique_ptr scrape_result, + PageScrapeResultCallback callback, + std::vector attribute_results) { + const auto* url_details = + server_config_loader_->GetLastPatterns().GetMatchingURLPattern( + scrape_result->url, is_strict_scrape); + if (!url_details) { + return; + } + for (const auto& attribute_result : attribute_results) { + const auto rule_group = + url_details->scrape_rule_groups.find(attribute_result->root_selector); + if (rule_group == url_details->scrape_rule_groups.end()) { + continue; + } + base::Value::Dict attribute_values; + for (const auto& [key, value_str] : attribute_result->attribute_values) { + ProcessAttributeValue(rule_group->second, *scrape_result, key, value_str, + attribute_values); + } + scrape_result->fields[attribute_result->root_selector].push_back( + std::move(attribute_values)); + } + std::move(callback).Run(std::move(scrape_result)); +} + +void ContentScraper::OnRustElementAttributes( + bool is_strict_scrape, + std::unique_ptr scrape_result, + PageScrapeResultCallback callback, + rust::Vec attribute_results) { + const auto* url_details = + server_config_loader_->GetLastPatterns().GetMatchingURLPattern( + scrape_result->url, is_strict_scrape); + if (!url_details) { + return; + } + for (const auto& attribute_result : attribute_results) { + const auto root_selector = std::string(attribute_result.root_selector); + const auto rule_group = url_details->scrape_rule_groups.find(root_selector); + if (rule_group == url_details->scrape_rule_groups.end()) { + continue; + } + base::Value::Dict attribute_values; + for (const auto& pair : attribute_result.attribute_pairs) { + ProcessAttributeValue( + rule_group->second, *scrape_result, std::string(pair.key), + pair.value.empty() ? std::nullopt + : std::make_optional(pair.value), + attribute_values); + } + scrape_result->fields[root_selector].push_back(std::move(attribute_values)); + } + std::move(callback).Run(std::move(scrape_result)); +} + +std::optional ContentScraper::ExecuteRefineFunctions( + const RefineFunctionList& function_list, + std::string value) { + std::optional result = value; + for (const auto& function_args : function_list) { + if (function_args.empty()) { + continue; + } + const auto& func_name = function_args[0]; + if (func_name == kRefineSplitFuncId) { + if (function_args.size() >= 3 && function_args[1].is_string() && + function_args[2].is_int()) { + result = RefineSplit(*result, function_args[1].GetString(), + function_args[2].GetInt()); + } + } else if (func_name == kRefineMaskURLFuncId) { + result = MaskURL(*regex_util_, GURL(value)); + } else if (func_name == kRefineParseURLFuncId) { + if (function_args.size() >= 3 && function_args[1].is_string() && + function_args[2].is_string()) { + result = RefineParseURL(*result, function_args[1].GetString(), + function_args[2].GetString()); + } + } else if (func_name == kRefineJsonExtractFuncId) { + if (function_args.size() >= 3 && function_args[1].is_string() && + function_args[2].is_bool()) { + result = RefineJsonExtract(*result, function_args[1].GetString(), + function_args[2].GetBool()); + } + } + if (!result) { + break; + } + } + return result; +} + +void ContentScraper::ProcessAttributeValue( + const ScrapeRuleGroup& rule_group, + PageScrapeResult& scrape_result, + std::string key, + std::optional value_str, + base::Value::Dict& attribute_values) { + const auto rule = rule_group.find(key); + if (rule == rule_group.end()) { + return; + } + base::Value value; + if (value_str) { + value_str = + ExecuteRefineFunctions(rule->second->functions_applied, *value_str); + + if (value_str) { + if (rule->second->rule_type == ScrapeRuleType::kSearchQuery || + rule->second->rule_type == ScrapeRuleType::kWidgetTitle) { + scrape_result.query = value_str; + } + + value = base::Value(*value_str); + } + } + attribute_values.Set(key, std::move(value)); +} + +} // namespace web_discovery diff --git a/components/web_discovery/browser/content_scraper.h b/components/web_discovery/browser/content_scraper.h new file mode 100644 index 000000000000..4c6eea43c388 --- /dev/null +++ b/components/web_discovery/browser/content_scraper.h @@ -0,0 +1,119 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#ifndef BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_CONTENT_SCRAPER_H_ +#define BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_CONTENT_SCRAPER_H_ + +#include +#include +#include + +#include "base/containers/flat_map.h" +#include "base/functional/callback.h" +#include "base/values.h" +#include "brave/components/web_discovery/browser/document_extractor/rust/src/lib.rs.h" +#include "brave/components/web_discovery/browser/patterns.h" +#include "brave/components/web_discovery/browser/regex_util.h" +#include "brave/components/web_discovery/browser/server_config_loader.h" +#include "brave/components/web_discovery/common/web_discovery.mojom.h" +#include "url/gurl.h" + +namespace web_discovery { + +struct PageScrapeResult { + PageScrapeResult(GURL url, std::string id); + ~PageScrapeResult(); + + PageScrapeResult(const PageScrapeResult&) = delete; + PageScrapeResult& operator=(const PageScrapeResult&) = delete; + + base::Value SerializeToValue(); + static std::unique_ptr FromValue(const base::Value& dict); + + GURL url; + // A map of DOM selectors to list of scraped values embedded in a Dict. + // Each dict contains arbitrary keys (defined in the patterns) to scraped + // values. + base::flat_map> fields; + std::string id; + + // Only available for non-strict scrapes with "searchQuery"/"widgetTitle" + // scrape rules + std::optional query; +}; + +// Extracts attribute values from the page DOM for reporting purposes. +// ContentScraper utilizes the following techniques: +// +// a) Extraction within the current page in the renderer (via `ScrapePage`). +// The `mojom::DocumentExtractor` is used to request attribute values +// from the current DOM in the view. Typically, this is used to exact a +// search query, and decide whether the page is worthy of investigation +// and reporting. +// b) Parsing and extracting HTML from a double fetch. This follows +// the extraction in a). Used to extract all other needed details +// from the page i.e. search results. Uses a Rust library for DOM +// operations, in respect of Rule of Two. +class ContentScraper { + public: + using PageScrapeResultCallback = + base::OnceCallback)>; + + ContentScraper(const ServerConfigLoader* server_config_loader, + RegexUtil* regex_util); + ~ContentScraper(); + + ContentScraper(const ContentScraper&) = delete; + ContentScraper& operator=(const ContentScraper&) = delete; + + // For initial page scrape in renderer + void ScrapePage(const GURL& url, + bool is_strict_scrape, + mojom::DocumentExtractor* document_extractor, + PageScrapeResultCallback callback); + // For subsequent double fetches after initial scrape + void ParseAndScrapePage(const GURL& url, + bool is_strict_scrape, + std::unique_ptr prev_result, + std::string html, + PageScrapeResultCallback callback); + + private: + void ProcessStandardRule(const std::string& report_key, + const ScrapeRule& rule, + const std::string& root_selector, + const GURL& url, + PageScrapeResult* scrape_result); + void OnScrapedElementAttributes( + bool is_strict_scrape, + std::unique_ptr scrape_result, + PageScrapeResultCallback callback, + std::vector attribute_results); + void OnRustElementAttributes( + bool is_strict_scrape, + std::unique_ptr scrape_result, + PageScrapeResultCallback callback, + rust::Vec attribute_results); + + std::optional ExecuteRefineFunctions( + const RefineFunctionList& function_list, + std::string value); + void ProcessAttributeValue(const ScrapeRuleGroup& rule_group, + PageScrapeResult& scrape_result, + std::string key, + std::optional value_str, + base::Value::Dict& attribute_values); + + scoped_refptr sequenced_task_runner_; + + raw_ptr server_config_loader_; + raw_ptr regex_util_; + + base::WeakPtrFactory weak_ptr_factory_{this}; +}; + +} // namespace web_discovery + +#endif // BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_CONTENT_SCRAPER_H_ diff --git a/components/web_discovery/browser/content_scraper_browsertest.cc b/components/web_discovery/browser/content_scraper_browsertest.cc new file mode 100644 index 000000000000..f2705d149019 --- /dev/null +++ b/components/web_discovery/browser/content_scraper_browsertest.cc @@ -0,0 +1,283 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#include "brave/components/web_discovery/browser/content_scraper.h" + +#include +#include + +#include "base/files/file_path.h" +#include "base/files/file_util.h" +#include "base/path_service.h" +#include "base/test/bind.h" +#include "base/test/scoped_feature_list.h" +#include "brave/components/constants/brave_paths.h" +#include "brave/components/web_discovery/browser/patterns.h" +#include "brave/components/web_discovery/browser/server_config_loader.h" +#include "brave/components/web_discovery/common/features.h" +#include "brave/components/web_discovery/common/web_discovery.mojom.h" +#include "chrome/test/base/chrome_test_utils.h" +#include "content/public/browser/web_contents.h" +#include "content/public/test/browser_test.h" +#include "content/public/test/browser_test_base.h" +#include "content/public/test/browser_test_utils.h" +#include "content/public/test/content_mock_cert_verifier.h" +#include "net/dns/mock_host_resolver.h" +#include "services/service_manager/public/cpp/interface_provider.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/re2/src/re2/re2.h" + +#if BUILDFLAG(IS_ANDROID) +#include "chrome/test/base/android/android_browser_test.h" +#else +#include "chrome/test/base/in_process_browser_test.h" +#endif + +namespace web_discovery { + +class WebDiscoveryContentScraperTest : public PlatformBrowserTest { + public: + WebDiscoveryContentScraperTest() + : scoped_features_(features::kBraveWebDiscoveryNative) {} + + // PlatformBrowserTest: + void SetUpOnMainThread() override { + PlatformBrowserTest::SetUpOnMainThread(); + base::FilePath data_path = + base::PathService::CheckedGet(brave::DIR_TEST_DATA); + data_path = data_path.AppendASCII("web_discovery"); + + host_resolver()->AddRule("*", "127.0.0.1"); + test_server_.ServeFilesFromDirectory(data_path); + mock_cert_verifier_.mock_cert_verifier()->set_default_result(net::OK); + ASSERT_TRUE(test_server_.Start()); + + InitScraper(); + run_loop_ = std::make_unique(); + + ASSERT_TRUE(base::ReadFileToString(data_path.AppendASCII("page.html"), + &page_content_)); + } + + void SetUpCommandLine(base::CommandLine* command_line) override { + PlatformBrowserTest::SetUpCommandLine(command_line); + mock_cert_verifier_.SetUpCommandLine(command_line); + } + + void SetUpInProcessBrowserTestFixture() override { + PlatformBrowserTest::SetUpInProcessBrowserTestFixture(); + mock_cert_verifier_.SetUpInProcessBrowserTestFixture(); + } + + void TearDownInProcessBrowserTestFixture() override { + mock_cert_verifier_.TearDownInProcessBrowserTestFixture(); + PlatformBrowserTest::TearDownInProcessBrowserTestFixture(); + } + + protected: + mojo::Remote LoadTestPageAndGetExtractor() { + mojo::Remote remote; + + auto url = test_server_.GetURL("example.com", "/page.html"); + auto* contents = chrome_test_utils::GetActiveWebContents(this); + EXPECT_TRUE(content::NavigateToURL(contents, url)); + auto* render_frame_host = contents->GetPrimaryMainFrame(); + + if (render_frame_host) { + render_frame_host->GetRemoteInterfaces()->GetInterface( + remote.BindNewPipeAndPassReceiver()); + } + return remote; + } + + std::string page_content_; + std::unique_ptr scraper_; + std::unique_ptr run_loop_; + + private: + void InitScraper() { + server_config_loader_ = std::make_unique( + nullptr, base::FilePath(), nullptr, base::DoNothing(), + base::DoNothing()); + auto server_config = std::make_unique(); + server_config->location = "us"; + server_config_loader_->SetLastServerConfigForTesting( + std::move(server_config)); + + auto patterns_group = std::make_unique(); + std::vector normal_patterns(1); + std::vector strict_patterns(1); + normal_patterns[0].url_regex = + std::make_unique("^https:\\/\\/example\\.com"); + normal_patterns[0].id = "ex1"; + strict_patterns[0].url_regex = + std::make_unique("^https:\\/\\/example\\.com"); + strict_patterns[0].id = "ex1"; + + auto* normal_rule_group = + &normal_patterns[0].scrape_rule_groups["#b .result1"]; + auto scrape_rule1 = std::make_unique(); + scrape_rule1->sub_selector = "a"; + scrape_rule1->rule_type = ScrapeRuleType::kOther; + scrape_rule1->attribute = "href"; + normal_rule_group->insert_or_assign("href", std::move(scrape_rule1)); + auto scrape_rule2 = std::make_unique(); + scrape_rule2->sub_selector = "a"; + scrape_rule2->rule_type = ScrapeRuleType::kOther; + scrape_rule2->attribute = "textContent"; + normal_rule_group->insert_or_assign("text", std::move(scrape_rule2)); + auto scrape_rule3 = std::make_unique(); + scrape_rule3->sub_selector = "#query"; + scrape_rule3->rule_type = ScrapeRuleType::kSearchQuery; + scrape_rule3->attribute = "textContent"; + normal_rule_group->insert_or_assign("q", std::move(scrape_rule3)); + normal_rule_group = &normal_patterns[0].scrape_rule_groups["dont>match"]; + auto scrape_rule4 = std::make_unique(); + scrape_rule4->rule_type = ScrapeRuleType::kStandard; + scrape_rule4->attribute = "url"; + scrape_rule4->functions_applied.emplace_back(); + scrape_rule4->functions_applied[0].Append(base::Value("parseU")); + scrape_rule4->functions_applied[0].Append(base::Value("qs")); + scrape_rule4->functions_applied[0].Append(base::Value("q")); + + normal_rule_group->insert_or_assign("q2", std::move(scrape_rule4)); + + patterns_group->normal_patterns = std::move(normal_patterns); + + auto* strict_rule_group = + &strict_patterns[0].scrape_rule_groups["#b #result2"]; + scrape_rule1 = std::make_unique(); + scrape_rule1->sub_selector = "a"; + scrape_rule1->rule_type = ScrapeRuleType::kOther; + scrape_rule1->attribute = "textContent"; + strict_rule_group->insert_or_assign("text", std::move(scrape_rule1)); + scrape_rule2 = std::make_unique(); + scrape_rule2->sub_selector = "#input1"; + scrape_rule2->rule_type = ScrapeRuleType::kOther; + scrape_rule2->attribute = "value"; + strict_rule_group->insert_or_assign("input", std::move(scrape_rule2)); + strict_rule_group = &strict_patterns[0].scrape_rule_groups["dont>match"]; + scrape_rule3 = std::make_unique(); + scrape_rule3->rule_type = ScrapeRuleType::kStandard; + scrape_rule3->attribute = "ctry"; + strict_rule_group->insert_or_assign("ctry", std::move(scrape_rule3)); + + patterns_group->strict_patterns = std::move(strict_patterns); + + server_config_loader_->SetLastPatternsForTesting(std::move(patterns_group)); + + scraper_ = std::make_unique(server_config_loader_.get(), + ®ex_util_); + } + + content::ContentMockCertVerifier mock_cert_verifier_; + net::EmbeddedTestServer test_server_{net::EmbeddedTestServer::TYPE_HTTPS}; + base::test::ScopedFeatureList scoped_features_; + RegexUtil regex_util_; + std::unique_ptr server_config_loader_; +}; + +IN_PROC_BROWSER_TEST_F(WebDiscoveryContentScraperTest, RendererScrape) { + auto extractor = LoadTestPageAndGetExtractor(); + ASSERT_TRUE(extractor.is_bound() && extractor.is_connected()); + + GURL url("https://example.com/page?q=testquery"); + scraper_->ScrapePage( + url, false, extractor.get(), + base::BindLambdaForTesting( + [&](std::unique_ptr scrape_result) { + [&] { + ASSERT_TRUE(scrape_result); + EXPECT_EQ(scrape_result->url, url); + EXPECT_EQ(scrape_result->fields.size(), 2u); + EXPECT_EQ(scrape_result->id, "ex1"); + + EXPECT_EQ(scrape_result->query, "A query"); + + auto field_map_it = scrape_result->fields.find("#b .result1"); + ASSERT_TRUE(field_map_it != scrape_result->fields.end()); + const auto* fields = &field_map_it->second; + + ASSERT_EQ(fields->size(), 2u); + + const auto* href_value = (*fields)[0].FindString("href"); + const auto* text_value = (*fields)[0].FindString("text"); + const auto* query_value_str = (*fields)[0].FindString("q"); + ASSERT_TRUE(href_value); + ASSERT_TRUE(text_value); + ASSERT_TRUE(query_value_str); + EXPECT_EQ(*href_value, "https://example.com/foo1"); + EXPECT_EQ(*text_value, "Foo1"); + EXPECT_EQ(*query_value_str, "A query"); + + href_value = (*fields)[1].FindString("href"); + text_value = (*fields)[1].FindString("text"); + const auto* query_value = (*fields)[1].Find("q"); + ASSERT_TRUE(href_value); + ASSERT_TRUE(text_value); + ASSERT_TRUE(query_value); + EXPECT_EQ(*href_value, "https://example.com/foo2"); + EXPECT_EQ(*text_value, "Foo2"); + EXPECT_TRUE(query_value->is_none()); + + field_map_it = scrape_result->fields.find("dont>match"); + ASSERT_TRUE(field_map_it != scrape_result->fields.end()); + fields = &field_map_it->second; + + ASSERT_EQ(fields->size(), 1u); + const auto* url_query_value = (*fields)[0].FindString("q2"); + ASSERT_TRUE(url_query_value); + EXPECT_EQ(*url_query_value, "testquery"); + }(); + run_loop_->Quit(); + })); + run_loop_->Run(); +} + +IN_PROC_BROWSER_TEST_F(WebDiscoveryContentScraperTest, RustParseAndScrape) { + GURL url("https://example.com/page.html"); + + auto prev_scrape_result = std::make_unique(url, "ex1"); + scraper_->ParseAndScrapePage( + url, true, std::move(prev_scrape_result), page_content_, + base::BindLambdaForTesting( + [&](std::unique_ptr scrape_result) { + [&] { + ASSERT_TRUE(scrape_result); + EXPECT_EQ(scrape_result->url, url); + EXPECT_EQ(scrape_result->fields.size(), 2u); + EXPECT_EQ(scrape_result->id, "ex1"); + + EXPECT_FALSE(scrape_result->query); + + auto field_map_it = scrape_result->fields.find("#b #result2"); + ASSERT_TRUE(field_map_it != scrape_result->fields.end()); + const auto* fields = &field_map_it->second; + + ASSERT_EQ(fields->size(), 1u); + + const auto* text_value = (*fields)[0].FindString("text"); + const auto* input_value = (*fields)[0].FindString("input"); + ASSERT_TRUE(text_value); + ASSERT_TRUE(input_value); + EXPECT_EQ(*text_value, "Foo3"); + EXPECT_EQ(*input_value, "Foo4"); + + field_map_it = scrape_result->fields.find("dont>match"); + ASSERT_TRUE(field_map_it != scrape_result->fields.end()); + fields = &field_map_it->second; + + ASSERT_EQ(fields->size(), 1u); + + const auto* ctry_value = (*fields)[0].FindString("ctry"); + ASSERT_TRUE(ctry_value); + EXPECT_EQ(*ctry_value, "us"); + }(); + run_loop_->Quit(); + })); + run_loop_->Run(); +} + +} // namespace web_discovery diff --git a/components/web_discovery/browser/document_extractor/rust/.gitignore b/components/web_discovery/browser/document_extractor/rust/.gitignore new file mode 100644 index 000000000000..436cbd991cdd --- /dev/null +++ b/components/web_discovery/browser/document_extractor/rust/.gitignore @@ -0,0 +1,2 @@ +target +example/main diff --git a/components/web_discovery/browser/document_extractor/rust/BUILD.gn b/components/web_discovery/browser/document_extractor/rust/BUILD.gn new file mode 100644 index 000000000000..70314a5d9669 --- /dev/null +++ b/components/web_discovery/browser/document_extractor/rust/BUILD.gn @@ -0,0 +1,24 @@ +# Copyright (c) 2024 The Brave Authors. All rights reserved. +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at https://mozilla.org/MPL/2.0/. + +import("//build/rust/rust_static_library.gni") + +rust_static_library("rust_lib") { + crate_name = "document_extractor" + crate_root = "src/lib.rs" + allow_unsafe = true + + edition = "2021" + sources = [ "src/lib.rs" ] + + visibility = [ "//brave/components/web_discovery/browser:*" ] + + cxx_bindings = [ "src/lib.rs" ] + + deps = [ + "//brave/third_party/rust/html5ever/v0_25:lib", + "//brave/third_party/rust/kuchikiki/v0_8:lib", + ] +} diff --git a/components/web_discovery/browser/document_extractor/rust/Cargo.lock b/components/web_discovery/browser/document_extractor/rust/Cargo.lock new file mode 100644 index 000000000000..dd4a2a1e7410 --- /dev/null +++ b/components/web_discovery/browser/document_extractor/rust/Cargo.lock @@ -0,0 +1,767 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cc" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "cssparser" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "matches", + "phf", + "proc-macro2", + "quote", + "smallvec", + "syn 1.0.109", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn 2.0.66", +] + +[[package]] +name = "cxx" +version = "1.0.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb497fad022245b29c2a0351df572e2d67c1046bcef2260ebc022aec81efea82" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "688c799a4a846f1c0acb9f36bb9c6272d9b3d9457f3633c7753c6057270df13c" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928bc249a7e3cd554fd2e8e08a426e9670c50bbfc9a621653cfa9accc9641783" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn 1.0.109", +] + +[[package]] +name = "document-extractor-cxx" +version = "0.1.0" +dependencies = [ + "cxx", + "html5ever", + "kuchikiki", +] + +[[package]] +name = "dtoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" + +[[package]] +name = "dtoa-short" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbaceec3c6e4211c79e7b1800fb9680527106beb2f9c51904a3210c03a448c74" +dependencies = [ + "dtoa", +] + +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "html5ever" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5c13fb08e5d4dfc151ee5e88bae63f7773d61852f3bdc73c9f4b9e1bde03148" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "kuchikiki" +version = "0.8.4-speedreader" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af921794306a6885fc7510012ce12015d2cf0bfdba82fb217b9c2caf324a4618" +dependencies = [ + "cssparser", + "html5ever", + "indexmap", + "matches", + "selectors", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "link-cplusplus" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9" +dependencies = [ + "cc", +] + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a24f40fb03852d1cdd84330cddcaf98e9ec08a7b7768e952fad3b4cf048ec8fd" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "matches" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" + +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_macros", + "phf_shared 0.8.0", + "proc-macro-hack", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared 0.8.0", + "rand 0.7.3", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand 0.8.5", +] + +[[package]] +name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "proc-macro-hack" +version = "0.5.20+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" + +[[package]] +name = "proc-macro2" +version = "1.0.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", + "rand_pcg", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.15", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "redox_syscall" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +dependencies = [ + "bitflags 2.5.0", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "selectors" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" +dependencies = [ + "bitflags 1.3.2", + "cssparser", + "derive_more", + "fxhash", + "log", + "matches", + "phf", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", + "thin-slice", +] + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "servo_arc" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" +dependencies = [ + "nodrop", + "stable_deref_trait", +] + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared 0.10.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro2", + "quote", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + +[[package]] +name = "thin-slice" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" diff --git a/components/web_discovery/browser/document_extractor/rust/Cargo.toml b/components/web_discovery/browser/document_extractor/rust/Cargo.toml new file mode 100644 index 000000000000..b96f953a1aed --- /dev/null +++ b/components/web_discovery/browser/document_extractor/rust/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "document-extractor" +version = "0.1.0" +authors = ["Darnell Andries "] +edition = "2021" +license = "MPL-2.0" + +[dependencies] +cxx = "1" +kuchikiki = "0.8.4-speedreader" +html5ever = "0.25" + +[lib] +name = "document_extractor_cxx" +crate-type = ["rlib"] diff --git a/components/web_discovery/browser/document_extractor/rust/src/lib.rs b/components/web_discovery/browser/document_extractor/rust/src/lib.rs new file mode 100644 index 000000000000..4db51614a395 --- /dev/null +++ b/components/web_discovery/browser/document_extractor/rust/src/lib.rs @@ -0,0 +1,124 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use std::collections::HashMap; + +use cxx::{CxxString, CxxVector}; +use html5ever::tree_builder::TreeSink; +use kuchikiki::{ + iter::{Descendants, Elements, Select}, + parse_html, + traits::TendrilSink, +}; + +#[cxx::bridge(namespace = "rust_document_extractor")] +mod ffi { + pub struct SelectAttributeRequest { + /// An optional selector for an element within the current selected element. + /// The attribute will be retrieved from the embedded element. + /// If not needed, an empty string should be provided. + pub sub_selector: String, + /// Arbitrary ID used for storing the scraped result. + pub key: String, + /// Name of the attribute to scrape. + pub attribute: String, + } + + pub struct SelectRequest { + /// The DOM selector for the element to scrape. + pub root_selector: String, + /// Scrape requests for the selected element. + pub attribute_requests: Vec, + } + + pub struct AttributePair { + /// Arbitrary ID for the scraped result. + pub key: String, + /// The scraped value. Will be empty if attribute is not available. + pub value: String, + } + + pub struct AttributeResult { + /// The DOM selector for the scraped element. + pub root_selector: String, + /// A list of arbitrary IDs and scraped value pairs. + pub attribute_pairs: Vec, + } + + extern "Rust" { + /// Extracts DOM attributes from the result of a double fetch. + fn query_element_attributes( + html: &CxxString, + requests: &CxxVector, + ) -> Vec; + } +} + +use ffi::*; + +const TEXT_CONTENT_ATTRIBUTE_NAME: &str = "textContent"; + +fn extract_attributes_from_nodes( + root_selector: &str, + attribute_requests: &[SelectAttributeRequest], + nodes: Select>, + results: &mut Vec, +) { + for node in nodes { + let mut attribute_map = HashMap::new(); + for attribute_request in attribute_requests { + let sub_node = match attribute_request.sub_selector.is_empty() { + false => match node.as_node().select_first(&attribute_request.sub_selector) { + Ok(e) => Some(e), + Err(_) => { + attribute_map.insert(attribute_request.key.clone(), String::new()); + continue; + } + }, + true => None, + }; + let node_to_query = sub_node.as_ref().unwrap_or(&node).as_node(); + + let attribute_value = match attribute_request.attribute == TEXT_CONTENT_ATTRIBUTE_NAME { + true => node_to_query.text_contents(), + false => node_to_query + .as_element() + .and_then(|element| { + let attributes = element.attributes.borrow(); + attributes.get(attribute_request.attribute.as_str()).map(|v| v.to_string()) + }) + .unwrap_or_default(), + }; + attribute_map.insert(attribute_request.key.clone(), attribute_value); + } + results.push(AttributeResult { + root_selector: root_selector.to_string(), + attribute_pairs: attribute_map + .into_iter() + .map(|(key, value)| AttributePair { key, value }) + .collect(), + }); + } +} + +pub fn query_element_attributes( + html: &CxxString, + requests: &CxxVector, +) -> Vec { + let mut sink = parse_html().one(html.to_str().unwrap_or_default()); + let mut results = Vec::new(); + let document = sink.get_document(); + for request in requests { + if let Ok(nodes) = document.select(&request.root_selector) { + extract_attributes_from_nodes( + &request.root_selector, + &request.attribute_requests, + nodes, + &mut results, + ); + } + } + results +} diff --git a/components/web_discovery/browser/hash_detection.cc b/components/web_discovery/browser/hash_detection.cc new file mode 100644 index 000000000000..d2970c2faf0b --- /dev/null +++ b/components/web_discovery/browser/hash_detection.cc @@ -0,0 +1,1489 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#include "brave/components/web_discovery/browser/hash_detection.h" + +#include + +#include "base/containers/fixed_flat_map.h" +#include "brave/components/web_discovery/browser/util.h" + +namespace web_discovery { + +constexpr size_t kClassifierTokenCount = 62; + +// Markov chain transition matrix for hash classifier +constexpr float + kClassifierTransitionMatrix[kClassifierTokenCount][kClassifierTokenCount] = + {{ + -1.839225984234144, -1.8009413231413045, -2.5864601561900273, + -2.779077348461893, -2.41154163187108, -2.8701669802592216, + -3.010853183897427, -2.9831997811803244, -3.0258844950086354, + -3.028549665472028, -4.922879838696041, -4.503195676571833, + -4.598154161855354, -5.027344698225922, -5.217723368153992, + -5.366375752342653, -5.195909278900248, -3.2940850310881387, + -5.181468249477294, -6.110371416838184, -5.766202229535159, + -5.393352786265537, -4.920024681004049, -6.049294408676102, + -6.399015490326156, -4.690163484884073, -7.459851804835359, + -5.825337077308845, -4.675000760327133, -5.3635239851672845, + -6.331713706782754, -6.169284763337202, -5.951151191317628, + -5.768078242705815, -7.332129653597336, -6.766704624275413, + -6.01222435569866, -5.268617743244878, -6.279416117827666, + -5.861972559492891, -5.838071468059843, -6.267250173459698, + -6.283981033030117, -6.454759516784266, -6.5195694451700765, + -6.83526729571271, -6.458157413359671, -6.359696067797872, + -6.250241538141395, -6.647172527007435, -6.973987597161365, + -6.4598607011160425, -7.685547265665763, -6.5912026847359835, + -5.794718807999361, -6.474629988987015, -7.135098972554303, + -6.880960523371609, -6.747429130747086, -7.733118660641065, + -8.398636456057481, -7.6323973914922005, + }, + { + -2.11332750230783, -2.3060786022344897, -2.3740965556720726, + -2.5398307619465608, -2.463417892707307, -2.130953109899374, + -2.8009573227909073, -2.9401508827086436, -2.748847204575824, + -2.8414237914951976, -5.010884149435608, -4.7716590964777925, + -4.868359687249472, -5.322380370933768, -5.037006822424893, + -5.279762905036231, -5.635270908532894, -2.9219817650555973, + -5.50179037155463, -6.5406369920023, -6.311742070591071, + -5.516975693542294, -5.7251712834206305, -6.083331008626942, + -6.409455776722731, -4.856277028595425, -7.356357334609669, + -6.255977591877104, -5.042089425488359, -5.723688703460408, + -6.9306288690141455, -5.879866127104103, -6.269949621534028, + -6.969512838475713, -7.475373123729266, -6.802062729887527, + -6.566109586768108, -6.661695002244713, -6.634070998648396, + -6.128265317507457, -6.0654489711795785, -6.79728322915323, + -6.99559673771516, -6.996654379273295, -7.671972036661277, + -7.840255377779173, -7.132455920432357, -7.233113706563278, + -6.737551430374816, -7.2706736012479585, -7.430290364348156, + -6.991903776514698, -8.559572276072503, -7.231105001502197, + -6.117005677632925, -6.669675284003577, -7.87022016648111, + -7.635233211015519, -7.18343028641663, -8.53463332772525, + -8.567176875457722, -7.573077285525099, + }, + { + -1.6313545940843748, -2.457209014039589, -2.565380346827546, + -2.767788786784649, -2.6386222432933284, -2.595661570703591, + -2.6893427324813977, -2.7198733792574403, -2.720533202987637, + -2.8467380805237736, -5.029918409868431, -4.178710463028381, + -5.07588590483658, -5.057436988529895, -4.899938336310833, + -5.271744381251988, -5.715419865537325, -3.1739071041721116, + -5.367102243680773, -6.224018730172038, -5.866973141306898, + -5.730015933050199, -5.360958031920617, -6.20201967617667, + -6.328452890969734, -4.837569916970598, -7.520833339090353, + -5.8325529174204185, -4.950768880962046, -5.339171801246006, + -6.500538943869661, -6.141626179853517, -5.826929659866057, + -6.371549418153106, -7.095602530626063, -6.863928261303842, + -6.26532998038965, -6.486382994639529, -6.568804701240378, + -5.928030195562504, -6.138501621140467, -6.375855129864318, + -6.672567424416514, -6.87690014131182, -7.500698430681298, + -7.486404619424152, -7.110483757622713, -6.948864783187929, + -6.7689909475117735, -7.44680608044408, -7.305175362028806, + -6.490004000406482, -8.278142463867583, -7.100043692626031, + -6.262757631704339, -6.785644569035485, -7.917859578552653, + -6.966883288690608, -6.7663306639529575, -7.827911341889713, + -7.709275282211631, -7.650980633730635, + }, + { + -2.2483023852308777, -2.2601431921520803, -2.543211818297523, + -2.6290975996017654, -2.597012293697696, -2.425038774903284, + -2.6464962853598264, -2.7268820909874645, -2.7170988469992086, + -2.6384412943652213, -4.8059842634060495, -4.801920790322523, + -4.881995682865955, -4.399130557719554, -4.977525552072196, + -5.254333597613871, -5.4861286120783905, -3.172333855510997, + -5.311009967578493, -6.3787775808955045, -6.060833750357407, + -5.179488625881995, -5.284849141539821, -6.185226477248653, + -6.342526765978808, -5.0477193482508715, -7.229508117457252, + -5.92913163950562, -4.781563610047119, -5.175693021409531, + -6.686598785853124, -5.953265540842049, -5.765108625614716, + -6.249636653394711, -7.053405713688212, -6.871084156894258, + -4.746700678954438, -4.435206614193364, -6.2492260600816145, + -5.437566316202071, -5.90294982336378, -6.5708663077474965, + -6.4774110336687505, -6.642451113295277, -7.490560332753359, + -7.638809897851904, -7.037026821604173, -6.792453878834064, + -6.557926605402683, -7.183535297458448, -7.556615154408894, + -6.675849393697125, -7.929241490870054, -6.920447880012594, + -5.937211886970303, -6.536908132533395, -7.266281062975223, + -7.138583909596181, -6.9407325511841, -7.611188320541975, + -8.063402408224212, -7.7539194100649205, + }, + { + -2.0350349956205984, -2.348602646771946, -2.41594411405956, + -2.651370395735765, -2.563410442434685, -2.706962611141542, + -2.7917112923152776, -2.7483543933766477, -2.6785026736567357, + -2.8111298756441037, -4.759837736546805, -4.20618015484438, + -4.787453629928326, -4.659538126267765, -5.015532806050122, + -5.056335259588051, -5.339468177159593, -3.040667184111712, + -5.203144447478613, -6.2264472230490675, -5.7097357671058155, + -5.585194192596145, -5.035187712987706, -5.542988699592872, + -6.206189420830567, -4.864552817623237, -7.387161020524848, + -5.130725109803191, -4.710070938057912, -5.015532806050122, + -5.563665939332496, -5.8412571088023135, -5.744609136156329, + -6.206575148897066, -6.842735398083401, -6.794361813799185, + -5.850672107736916, -6.088449236501511, -6.348519254409916, + -5.725588459198469, -6.064735388153122, -6.479956671206651, + -6.2209527037314265, -6.70094736915976, -7.3405268040104845, + -7.140183173146506, -6.467358872953416, -6.7233339772218645, + -6.184068000080206, -6.381516598393398, -7.406185056529545, + -6.368731036096427, -7.402351186018824, -6.6091509166536255, + -5.9623588984476195, -6.352529061520146, -7.133337180638517, + -7.24783614109371, -6.901266731308386, -7.234806640803376, + -7.432126848507545, -7.494477663675224, + }, + { + -2.0215812860615885, -2.455637406643175, -2.4330253990141046, + -2.6113902739466517, -2.6033787533338284, -2.6337646574813993, + -2.7111759743895725, -2.7471499108067454, -2.721672070749707, + -2.7644985821403028, -4.771735346669196, -4.967298931497775, + -4.806933772233218, -4.796223062219612, -4.702049659123834, + -4.615986584200591, -5.422222957790021, -3.024083897225106, + -5.341473161524181, -5.691932137893965, -5.870247680316981, + -5.6516120254419615, -4.85258639108815, -5.9684706438872475, + -6.205102036008844, -4.762283950640148, -7.106211026775939, + -5.754565801069223, -4.28552037163959, -5.083154204506742, + -6.514533306380853, -5.91282033178054, -5.801594696670316, + -6.211408552601299, -6.884100000075919, -6.5060032575028, + -6.051550266183317, -6.1528452495165125, -6.237040028392324, + -5.845664774991169, -5.83750622157883, -6.376088248894053, + -6.3936457603588925, -6.58430154569624, -7.071724850704769, + -7.213982427018076, -6.56730052905341, -6.608473779316545, + -6.403307671270629, -6.973970456584425, -7.485700648480843, + -6.5060032575028, -7.126013654072119, -6.592294210440276, + -5.525669597265563, -6.0726189713973024, -6.168884085714272, + -7.057863242618145, -6.702467385782517, -7.84259108644045, + -7.932742183434748, -6.889071012797939, + }, + { + -2.3515175517946965, -2.4895377785815813, -2.455794452354539, + -2.6177065611128625, -2.3916765698655924, -2.55893036105207, + -2.621684157393488, -2.6988517931658524, -2.630805311213068, + -2.6634082117951667, -4.639123440452939, -4.108116635400416, + -4.850674635561831, -4.879642999369406, -5.020506071184806, + -5.079702479761731, -5.3856932140647995, -2.922465393995544, + -5.272571266829349, -6.095451326503016, -5.703507123198923, + -5.5861660464475555, -5.144197286007818, -5.722031888813277, + -6.28586346876822, -4.872555039237073, -6.886991750607913, + -5.521389523166842, -4.712785087276782, -4.712557943607434, + -6.258849400356963, -5.781080159458192, -5.683518571573409, + -6.497978247391754, -6.801813554699944, -6.640350228026399, + -6.23670827447975, -6.480528140170159, -6.503409353749256, + -6.035279500709713, -6.187868456644626, -6.634900623258835, + -6.227883924459252, -6.665648019797556, -7.5457712876636505, + -7.422426740346121, -6.235666064901307, -6.769319721223498, + -6.343344078699495, -6.975730165249003, -7.353206363454935, + -6.635677322327007, -7.937813375439674, -6.762240045635436, + -6.147006740966892, -6.741296871790193, -7.715813334827543, + -7.183705495090042, -6.938232467377743, -7.593211012735211, + -8.021194984378726, -7.326496213127361, + }, + { + -2.404503840183417, -2.43761707422273, -2.4108252303609095, + -2.2999425986283435, -2.542843475674806, -2.6029429346724786, + -2.584141922741018, -2.5855127818195496, -2.5510561981552526, + -2.647681869737238, -4.782820501953572, -4.808208666935694, + -4.979563332219715, -5.008211525808261, -5.046432738628459, + -5.142457459370653, -5.485491564024772, -3.1144173778919617, + -5.417825855041213, -6.245547233803318, -5.938268453446564, + -5.963941744764968, -5.450843208258146, -6.059373833665894, + -6.223182084343739, -4.942745700375246, -7.208409268623897, + -6.038143491827719, -4.6277437642171355, -4.892513996642899, + -6.694766741586792, -6.117821568685912, -5.949820065666706, + -6.502648045547855, -6.835922059321975, -6.694766741586792, + -6.361312919572388, -6.403294392961231, -6.118769885494771, + -5.676056523505988, -6.406451957946552, -6.5244722133881785, + -6.6333977952105005, -6.6574953467895615, -7.573786078663716, + -7.446115322000925, -6.883692335590661, -6.903250344490318, + -6.3498910941692, -6.905331510694142, -7.588115904218541, + -6.879623153983935, -8.035692497676832, -6.939233062369824, + -5.959077527634608, -6.324513515108117, -7.549688527084656, + -7.120692921566297, -6.988939594552065, -7.891731808755324, + -7.727259481120012, -7.430143376434873, + }, + { + -2.2924692182301323, -2.450539739657926, -2.3769703926915984, + -2.532604294663703, -2.55393918066343, -2.609718771364289, + -2.6246425013179655, -2.661744915258029, -2.6011009513028984, + -2.6236780353341946, -4.669477044233787, -4.1521285006409165, + -4.955491059584871, -4.818890308190988, -5.030853379357302, + -5.002039294312803, -5.278250342600015, -3.076829272811893, + -5.307731406455737, -5.826171280206974, -6.022604681193609, + -5.8364861502494865, -5.222160875948972, -5.86734719439116, + -6.263453577434854, -5.1217643044016885, -7.32073988901422, + -5.758783848671988, -4.666476541899311, -5.259001445170042, + -6.374002568031498, -6.083417735921755, -5.695962812152638, + -6.490715185357981, -6.417147740477192, -6.55681384079998, + -5.431378631989581, -5.843302320137157, -6.369727752361356, + -5.638321273173884, -6.50456582229188, -6.275027142828877, + -6.210627212225794, -6.774279201200845, -7.54625217416398, + -7.377527245335997, -6.968174323388823, -6.961544464850153, + -6.5738576027632725, -6.808653231033201, -7.247759185607984, + -6.667823395181603, -8.06567857870257, -6.862038055489511, + -5.923075085846576, -6.772454383176216, -7.732534132174032, + -6.980444415980637, -6.935456028765855, -7.926350068393722, + -7.409760739910981, -7.357691884279851, + }, + { + -2.395140177772492, -2.3972905877762423, -2.4380054501613637, + -2.59514809923619, -2.5304957080424493, -2.553494441315067, + -2.592658670737186, -2.6234135177213647, -2.5068257242437872, + -2.493912396989218, -4.415128005729072, -4.8162395242315545, + -4.805681457561184, -4.959588437581088, -5.013259465745109, + -5.070148436368585, -5.778302005668334, -3.0289910877331776, + -5.527143864017425, -6.11105402601701, -6.123887370951674, + -5.975929983744964, -5.442482986164537, -6.151079378646925, + -6.120416006196215, -5.065655600056955, -7.071974876907828, + -5.957253719340739, -4.781773843113505, -5.354948164056644, + -6.520450343119245, -6.293473674919533, -5.817016517849025, + -6.85914069525248, -6.81356318675616, -6.777584585720929, + -5.97764525003172, -6.226877237430227, -6.02961643394924, + -5.9729353291005545, -6.418715298488868, -5.532354351604477, + -6.803710890313149, -6.889653320113874, -7.354541848692839, + -7.502757793000282, -6.938924369120657, -6.214291571987144, + -6.749334119435077, -7.237514956739779, -7.394443065445024, + -6.457482478746571, -7.947443614261728, -6.86121538479334, + -5.698583920623846, -6.845759126556648, -7.683896617545187, + -7.180188461548061, -6.757725777071325, -7.8704825731256, + -7.881944016644606, -7.107203568791572, + }, + { + -6.7847523397462, -4.956382101946025, -5.55168468117927, + -6.54694976738503, -6.679027997805541, -6.920449307304025, + -6.651631877885628, -7.028532297007316, -6.819873006155468, + -6.9668156836069475, -5.07824502580267, -3.6978375281608122, + -3.2158001218369128, -3.221520357616586, -3.5888259593254497, + -4.18118917224718, -3.0226572253435795, -3.971027081545084, + -3.562365839815367, -6.241860395850777, -3.738063429007306, + -2.286143322088719, -2.8154088123469325, -1.9366972011829824, + -6.402763696295053, -4.09673956284193, -6.7632686126933175, + -2.217315348390325, -2.7898081670861736, -2.2970370063901697, + -2.7943042308650328, -4.45525252224197, -5.6170558845712195, + -5.634999368405547, -4.204524838877218, -5.247540598697068, + -7.679731693977602, -7.496500850432476, -7.694782629326121, + -7.660949127015419, -8.444625153562612, -8.02242837580043, + -7.9107342568656245, -7.844229700045118, -8.496568431723452, + -8.90528981102539, -7.744710798608082, -7.796294560812227, + -7.4084137546993, -8.54315037261458, -8.96995880381818, + -7.702981796781846, -9.818649422090688, -7.982756374523151, + -7.087136791691802, -7.726484291615801, -9.302472289566692, + -8.264363329731667, -8.520253749464962, -9.573314897467998, + -10.041792973404897, -9.03352354731742, + }, + { + -5.530908158078372, -5.12660572425173, -5.285205739999349, + -5.724471900365845, -5.873312125055024, -6.16534879225732, + -6.148934708170209, -6.3610443530793574, -6.311033932504697, + -6.085846837289822, -1.9318371570117603, -3.9897678769234868, + -4.264721024425275, -4.508665668126993, -1.397989588361542, + -5.165043688633805, -5.1354703161064155, -4.710076673495516, + -2.2812710196631696, -5.744808587095395, -6.153180999051661, + -2.7741841902487896, -4.878049696777151, -5.0409257316634655, + -2.3898944653854226, -5.13595610674021, -7.841822106027579, + -2.8471437524600627, -3.4340095911141377, -4.619316513108799, + -2.3549200090866647, -5.921518406898233, -5.302848234969303, + -6.786138406296854, -4.207750888408638, -6.039012800612939, + -7.841822106027579, -7.704962923310382, -7.065002449743519, + -7.589296290560376, -8.260096172428325, -8.035112694192108, + -7.871344545293901, -7.140827013169332, -8.545938317958099, + -8.836893715754849, -7.993349965830133, -7.27788665694764, + -7.741565502287929, -7.888977243944857, -8.111485672976682, + -7.875096894912451, -10.091297373707675, -7.392485113590403, + -6.863268166852988, -7.610229500226842, -8.901213681542407, + -8.92241588919301, -8.313038591698524, -9.803615301255896, + -9.310181420201774, -9.00812940746704, + }, + { + -5.262771475279199, -4.784242541566078, -5.27970172852834, + -5.684918512820975, -5.945292208575153, -6.218979090786592, + -6.228233983716081, -6.354509212158639, -6.204315840281487, + -6.442281485719593, -2.526983467749325, -5.114296740462473, + -4.530006859098648, -5.422625873895182, -2.9651924820539826, + -5.615416770210166, -5.759915004501759, -0.738654569892248, + -3.9951800832331146, -7.496424744983864, -2.271649654828947, + -3.6688863968618732, -4.479075949205008, -6.406971077861438, + -2.7704548687771466, -5.064362048924221, -7.65209604855975, + -4.10990241837356, -4.3379100438872245, -3.144791234860585, + -4.257400003418622, -6.678114421469913, -6.866487433947222, + -7.697728752690109, -5.8045538215356425, -6.443871311173693, + -8.079096309219214, -8.153204281372934, -8.17767713789166, + -8.353264662504271, -8.864090286270262, -8.820319623214933, + -8.693771874337664, -8.686253041923635, -8.948055666387596, + -9.842684901480572, -8.909864867655205, -8.569719225667685, + -8.285794202093616, -9.08611589448313, -9.094531091408415, + -7.571541334321345, -10.622843459030147, -8.818180584966184, + -7.823031278179709, -8.265917771336019, -8.826764328657575, + -9.481851947393176, -9.0917181500318, -8.558120089824332, + -9.233307603986084, -10.138335143581529, + }, + { + -6.352576341887475, -4.375482811098582, -5.097247890129581, + -5.253867383491415, -5.300921501088017, -5.719261065235271, + -5.695077377144448, -4.664604980788103, -6.0509261540753565, + -6.1211148128546835, -2.6955840583648807, -4.0885439717448415, + -4.601612684872268, -4.3272866151564005, -0.8335121453767717, + -4.368698782518719, -4.841722524158204, -3.6648788033138118, + -2.5172646408008483, -5.8838060715240985, -5.03345350892465, + -4.275021730152804, -4.6405776530763765, -5.25643229403688, + -3.133213396702524, -3.881602832644344, -7.333792367288475, + -3.6375800962202542, -3.280343679384232, -4.168603006615407, + -3.0158762344692915, -4.957870530657444, -4.673357005846336, + -7.071279326505898, -4.732292586433726, -6.102987428262127, + -6.308187629826426, -6.692855118141101, -6.912844895031934, + -6.811275201992611, -7.387021770702892, -6.866694309094154, + -6.971677700053924, -7.120054614261141, -7.672123196561181, + -8.47105299909571, -7.06564216575394, -7.303313817631059, + -6.877175818825198, -7.588372692026248, -7.710844848156796, + -7.012073355807918, -9.487490679574803, -7.203069557208429, + -6.086293297912648, -6.450936411500557, -8.098699438256325, + -7.677018501414594, -7.313496820953744, -9.611543328244782, + -8.931691690166081, -8.112931574673333, + }, + { + -6.278777370845214, -4.701340011999448, -5.078865738235898, + -5.8830885176004895, -6.165050673343658, -6.274964098411205, + -6.212416394644692, -6.400778791567437, -6.741692863598321, + -6.816576094912807, -3.3928904712107735, -3.566247086795751, + -3.646180662937257, -3.523693462975528, -4.243767227003855, + -4.080529251966207, -3.795135190780714, -3.596123418433819, + -2.7581828008765474, -6.336206897605354, -4.412794119233608, + -2.7790238541384125, -3.7937492847610685, -1.8714655708089012, + -4.039189587014222, -3.917817104421266, -7.57147815542886, + -1.7211826197805813, -2.454732555101985, -3.258178620019967, + -3.831607640543702, -4.745787338078659, -3.8173867609921763, + -3.6132042458306612, -5.788388393117924, -5.078865738235898, + -6.509442541403006, -6.358094961721853, -6.982655840165303, + -5.955633264954192, -7.203135532877834, -6.519814112808369, + -6.508748303549079, -6.5805671825510945, -7.521390752829464, + -8.307422019239775, -6.987204748691672, -6.90870144071007, + -6.578332663638785, -7.263487211998134, -7.281252691733005, + -6.635300404511988, -9.747550691415777, -6.939413018359865, + -5.8716986926591, -6.717215468733563, -8.061948046761787, + -7.4055504084105825, -7.052394044113555, -9.471951436870118, + -9.440772718621666, -8.392393495069179, + }, + { + -6.019277337678054, -4.809610930457335, -5.353723551448762, + -5.676382755872694, -5.70371603264992, -5.986064759126338, + -5.8139317591377235, -5.978144349530662, -6.042881895447475, + -6.198360420377549, -2.2921047448703056, -4.34095023979577, + -4.345648109454055, -4.367598309292761, -2.0155741702111323, + -3.0132780221373907, -4.923111941566577, -4.563679111444075, + -2.223531060167143, -6.691102686207805, -5.506010794027101, + -3.0594865563826104, -4.849221407410736, -4.504665385487454, + -1.9294270482862954, -4.87704758753489, -7.8125932274537675, + -2.476262613634102, -3.9821539473427885, -2.8193399945915623, + -2.274864811699418, -5.85738068573614, -5.424700477670406, + -7.189965927648669, -6.389275800912233, -5.638934468614081, + -7.03745392086323, -7.341255375194895, -6.260863632640313, + -7.279063354938447, -7.679061834829245, -7.811259004440631, + -7.236144890792278, -7.5275849533863886, -7.494018129747305, + -9.1181262319292, -7.907769904821474, -7.576977708715964, + -7.061731490444577, -8.083943001314976, -8.105182737815888, + -7.709906510180343, -8.65367988774833, -7.560240916360441, + -6.676421938949557, -6.335428678009885, -9.006382193489584, + -8.143473650809202, -7.5376758563683515, -10.154666091954931, + -9.349927845986523, -8.656780665426579, + }, + { + -6.45948206141902, -4.541389017548509, -5.0469147172405115, + -5.714893702071352, -5.93765911006474, -6.159588168280474, + -6.484829040506417, -6.54643138634695, -6.777423978144398, + -6.472536180151478, -2.0992585177991696, -4.351360197286945, + -4.810779680772453, -4.478347221038041, -1.057979923268165, + -4.503803429976553, -4.369600328148692, -3.1761274441594316, + -2.5925706700973334, -6.132316774171815, -5.0527495258256305, + -3.548319221739584, -4.665298532220545, -4.239092161682817, + -2.8823289365699227, -3.9799558061803935, -7.613527691788449, + -2.9149052754992715, -3.0856952412535055, -3.988705873974733, + -3.519423658731669, -5.380012776918168, -5.236242140888087, + -7.294418265937878, -5.378236234528395, -5.653900468202214, + -6.843339306558724, -6.1506932535462235, -7.23734725322546, + -6.366553451469326, -7.364460455464448, -6.868708500684183, + -6.652923233094938, -6.285920796794185, -7.434940854111734, + -8.332992180665768, -7.045904469629325, -7.3935637933916185, + -6.8666551118723405, -7.771896007559911, -7.6868589648739984, + -6.982133113853507, -9.79784886428876, -7.098382406877942, + -6.05045863483477, -6.643853499354418, -7.8988076492249855, + -7.62952803313489, -7.2191872449632415, -9.030593711575094, + -9.78510983851133, -8.512650620040239, + }, + { + -7.312010848930209, -5.9704021836204655, -6.3878250976208735, + -6.964626238636961, -6.946983784483292, -7.4124339237241115, + -7.3181741966378775, -7.680384414313297, -7.712878247789743, + -7.554150900935073, -2.446039713635401, -5.2426041270797, + -5.947299500896444, -4.663409565765878, -1.9484553212571183, + -5.736061416021277, -5.712018325729332, -4.9933347157551395, + -3.124667578606162, -6.881965531986077, -5.837272445819192, + -3.4071965596735665, -4.466780007835062, -3.7439634287862087, + -2.4245394005655605, -2.7481043832730636, -7.96925132102691, + -3.001717011027925, -4.3613498567188325, -1.0557054893757776, + -3.70597885137566, -6.306264213716016, -4.571775536818785, + -8.174476930790583, -5.896948417633875, -6.225037976827956, + -7.408704473732679, -7.482057817244962, -7.278478028822127, + -7.6772808871048746, -8.27951403118039, -7.874318428531343, + -7.723948471543991, -7.700568997316141, -8.91379761462879, + -8.048158534515101, -7.944081716081054, -7.980577820654186, + -7.290733450482923, -8.159308709317411, -8.547715834521, + -7.821812931153412, -10.179602578494553, -7.600205085285631, + -6.935111692528849, -7.541317747952087, -8.80008790436004, + -8.364689804291782, -8.08874001915325, -10.681459532277783, + -10.11675467483489, -9.167510387022356, + }, + { + -7.888212115175222, -6.644858293408979, -6.899926832152373, + -7.644887076927795, -7.6845832447435445, -7.532598742461448, + -8.35541557609491, -8.047237610771814, -8.039272966898704, + -8.151069111852731, -3.668932909455333, -4.782855975285661, + -2.6659061839419613, -2.8967154778709228, -2.0223904901109755, + -4.446771913881909, -3.000219123980515, -4.8870757508472185, + -5.930891109758962, -6.823383785692898, -3.2455278628878323, + -2.6715253337166462, -3.4540042231373063, -1.613293110190054, + -3.32378892967727, -4.249589417392827, -7.22956773062339, + -3.479217249500404, -2.6394006861426296, -2.505331420610824, + -5.608431497999796, -3.9128781321838506, -6.817651617834854, + -5.899129031817539, -6.838124773860243, -4.798082958969038, + -6.707198867504101, -6.862144714309829, -6.946702102337892, + -6.119321735858142, -7.3106242225785305, -7.047703884629593, + -7.1735103563413185, -7.1142937780684905, -7.8727953781736435, + -7.684274173500164, -7.105593531784784, -7.043298950514618, + -6.705339482889372, -7.153500510194026, -7.545964351744817, + -6.429130146514012, -9.208169648866278, -7.146978747947639, + -6.381569697965211, -6.863775814304458, -8.274721978073412, + -7.87653370028425, -7.678419897035876, -9.611509357662127, + -9.195484489338961, -8.64028017837942, + }, + { + -5.479179331884548, -5.058052917950845, -5.462604366790335, + -6.042749073104725, -5.955329948912708, -5.958323963125313, + -6.182592584875092, -6.134964535885837, -5.987223312688662, + -6.092926822690444, -1.4103668710151773, -5.3350267357418115, + -4.970584606980225, -5.289457186272422, -1.770010358993832, + -4.901554513033236, -5.37688758694302, -5.089624713826659, + -3.3520314691389803, -5.392942008652894, -4.734460344193294, + -5.4364649785873, -5.373540303482963, -5.098468738623112, + -1.787587681664556, -3.2734283276295595, -6.018573842572738, + -5.287155689284143, -2.4508369623232906, -5.268931950327691, + -2.1042922431924556, -5.627005270227098, -5.60465202444635, + -5.974953220096848, -5.917202163476144, -5.89303609762896, + -7.4048682036926285, -7.92851451588768, -7.027392005486696, + -7.609083745121319, -7.38594019380711, -7.373517673808553, + -7.797674914928869, -7.69894007424318, -6.9721762828842495, + -7.92851451588768, -7.9072371174403955, -7.498269378781029, + -7.24617825143195, -7.836141195756666, -7.972477639308797, + -8.030297210197622, -7.424161406627308, -8.130380668754604, + -5.985681292336846, -7.548459123304884, -7.876146530370365, + -7.886403030537553, -8.383576565135217, -8.509870290459508, + -8.104405182351345, -7.3612475812167375, + }, + { + -4.615704248273826, -5.059874046144977, -5.605352624766998, + -6.125932548297868, -6.262849675716521, -6.79239877505316, + -6.990929985784472, -6.841047905444769, -7.1584533214794765, + -6.9886390340379165, -2.1061188583709898, -4.694960766195293, + -5.294767434713961, -4.877455498156362, -1.7372915500183557, + -4.54913156517679, -5.204152324138941, -4.129384627657894, + -2.037945671503165, -6.073264053143508, -4.589028985169175, + -2.990753289694604, -4.778969963958759, -4.689721987339014, + -2.425686266522293, -4.746481928875957, -7.836127606736178, + -3.351158263461062, -3.2418172438356554, -2.1018059989954563, + -2.7036646563447677, -4.616611132654885, -4.89196974360223, + -7.672674534246606, -4.594182160042578, -5.702759391924034, + -6.928687676907251, -6.888038111045066, -6.879281938695605, + -7.0247524516915965, -7.464610089759517, -7.314413547152238, + -7.29014621148154, -7.293244587314209, -7.623992355607218, + -8.372647679876952, -7.108856380340105, -7.38925322665568, + -6.931385277679899, -7.915280634635728, -8.074744053353632, + -6.836124212582984, -10.164407149105006, -7.372318020898097, + -6.283267109907934, -7.0071153102054895, -7.702596214111655, + -7.848216404055182, -7.332806708624255, -9.549591811814969, + -9.295811291038868, -8.128717117098297, + }, + { + -6.86596902684244, -5.052082122851932, -5.150933488599681, + -5.292208274141252, -6.214682697483703, -6.745331904339627, + -6.68552490120206, -6.760045968476806, -7.227305715576431, + -6.950863382929711, -2.1215717821722215, -3.9228106947364623, + -4.840908762978201, -3.0664686008437925, -1.6983460154315366, + -4.061987675375524, -4.329017422576037, -4.468146834761754, + -2.0559567932823986, -6.284656471404593, -4.495653917304999, + -2.239702601986903, -4.059167724800704, -4.675038779649125, + -2.554078005507218, -4.208324039380938, -7.846798279854166, + -4.952711862629422, -3.1510988852700317, -2.942373372199169, + -3.3895078376634635, -4.604176621456193, -5.1138376420493366, + -7.9144140567925305, -4.640693910453494, -5.102920313867999, + -6.555881088992486, -6.826369644359554, -6.799686147757116, + -6.863359893400418, -7.558312671802701, -7.067443904093005, + -7.193279183729173, -7.1899393531824245, -7.705592764618944, + -8.457679157012771, -7.256276285899237, -7.186610639983997, + -7.0503828029559665, -7.591798854832744, -8.06246715675167, + -7.178062400973902, -9.768878866076495, -6.917516203514602, + -5.665418470446385, -7.260143611855673, -6.937390775844807, + -7.687607497719584, -7.3485921408206325, -9.2384661090352, + -9.464847655623185, -7.897922355187247, + }, + { + -6.774920173861653, -5.204290861824316, -5.43566671911157, + -6.296930641076284, -6.565626027570306, -6.7373590105822805, + -6.991252066666452, -7.063233360250443, -7.2676834795551315, + -7.1966044322333405, -2.083909994196951, -4.067585265323549, + -5.182321460673428, -4.835732283228186, -1.980079058156674, + -5.0283663440075825, -5.072641431371989, -4.6392056435133515, + -2.7086094993416667, -6.770304135451181, -5.008181972697398, + -1.1019554288598505, -3.2594369265598493, -5.6765883311043766, + -2.736784393019736, -3.364824539297133, -7.918041582638827, + -5.789527217909832, -3.4282422713486094, -4.498435097687583, + -3.6322465613937016, -5.355734234640129, -5.0804532195051415, + -7.17597860187687, -4.9759313787901425, -6.718299350460867, + -6.452973839496266, -6.266241964550636, -6.756169624516277, + -6.43275589071587, -7.149632742116933, -6.722481373881418, + -6.632646871978412, -6.5830057991087605, -7.441299494170494, + -7.5325638154552745, -7.295254585596671, -7.196283868037615, + -6.683920964952739, -7.4835386813502325, -7.587428922371031, + -6.992558402580507, -9.54477866233574, -7.0691430439756715, + -6.0008086042221205, -6.0079129112663265, -8.273962947039259, + -7.457398913104675, -7.183861348039058, -9.235518989239411, + -9.247910721534573, -8.527701618931673, + }, + { + -6.725963719695806, -4.710012269685353, -4.981543035572536, + -5.7693914443296395, -5.741238652017211, -6.34104225410854, + -6.461701921452123, -6.616580803658851, -7.050213688876461, + -6.932988782385651, -2.7442254377569215, -3.902092293780571, + -4.081087684032349, -1.8459531867558991, -2.260196124051401, + -3.8113055093318327, -2.1904142607255017, -3.5077174805325915, + -3.070625904070985, -5.688404814159279, -3.676818033328958, + -3.705969932950152, -4.175966474127823, -3.555777846153588, + -3.6599169276373167, -4.233380646142029, -7.565625193952616, + -4.758247211588438, -2.668505149650637, -2.593317887541856, + -3.982238613532973, -4.922572014698711, -4.457568202222798, + -6.922768742308341, -4.874097132395777, -3.7366670217502587, + -6.099051002533345, -5.950541812233132, -6.754321355464054, + -6.000835251677606, -6.818602573551164, -6.538250446625281, + -6.5136752899401404, -5.7389354920585625, -6.837502673710269, + -7.922940958163197, -6.4879709082904045, -6.559235554477551, + -6.415308634074, -6.963830229106114, -7.464928001069955, + -6.466545899251641, -9.489635645899149, -6.3635319575989815, + -5.527598474760337, -6.407003965795449, -7.463914829757348, + -7.418626873140783, -6.4607730713017455, -8.850715648219035, + -9.544876513609825, -7.984492491801492, + }, + { + -7.251231594323855, -5.21374696908488, -5.698223064924641, + -6.1375976604763816, -6.322511629217009, -6.768362468268195, + -6.494106816635844, -6.457670861321346, -6.860741199932812, + -6.755693520691695, -4.0580333694338515, -3.9411669781297105, + -3.7467312772977017, -3.0227837077197126, -3.61529958808048, + -3.574469853485688, -3.2007714947116837, -4.080705573827712, + -4.670471138732819, -5.7966968651863855, -4.155602316322404, + -2.9527040483599185, -2.862751521261682, -2.037725742388079, + -3.7818488691194445, -3.113228390392548, -8.164264994327704, + -1.8458475827404217, -2.5467378386179824, -2.7020967605017185, + -3.188124830525815, -3.6799312814828857, -3.4909044267832696, + -5.197365837878935, -5.202188998354364, -5.870321468395694, + -7.4721345906728445, -7.508740887837122, -7.308294052687071, + -7.524695844262695, -8.528908107915612, -7.92499206108361, + -7.684820971302881, -8.246116020758024, -8.52404191826444, + -9.16793672064164, -7.835273241415741, -7.178620119286367, + -7.559608480967977, -8.499112535221915, -8.875184344633446, + -7.775193695995215, -10.198665810583236, -8.01769231637253, + -6.730988787795164, -7.673144441641045, -9.323628945401888, + -8.330457169191774, -8.013022138590646, -9.77099654516406, + -9.908396414566504, -9.416350685452109, + }, + { + -6.383306695601549, -4.903894061372386, -5.294037540593016, + -5.450500954330478, -5.333223053427145, -6.308171725769449, + -6.710407117707228, -6.887542628987831, -6.847572675445971, + -7.035457528925254, -2.2105034037003235, -5.0202198363584465, + -4.228806005807516, -4.2610399361561155, -2.630714925229401, + -3.899363165669206, -4.911168632565807, -1.9598915311908123, + -2.357698156549484, -6.874278408705108, -5.4856719078268386, + -2.9453898821076563, -4.76865138002776, -5.849682782165729, + -2.1582691520417923, -3.13711767038794, -7.932068702852963, + -2.1341546758789423, -3.3759594869296796, -3.525124340216131, + -3.4812218611704324, -5.083766155674364, -5.7936328268294535, + -4.353576622920111, -5.907706290260866, -6.135534326871947, + -6.846532994187875, -4.609063889576182, -7.503287789431182, + -7.483445868632177, -7.2287173521187755, -7.853210127940644, + -7.155334806184752, -7.407188463381393, -8.090673733029602, + -9.081164896953064, -7.857960730699241, -7.896800564015505, + -7.262268886290009, -8.128645226782451, -8.177248142566354, + -7.485412437604219, -9.720755973044048, -7.9739596445621235, + -6.796539470357211, -6.907958941482774, -8.646989682962468, + -8.283628546124925, -7.797896489956591, -6.6686665609139695, + -9.771081056931564, -8.750720965163202, + }, + { + -4.685181991517679, -3.1394221383869034, -4.505733999919456, + -4.7128201143378075, -4.721709061755053, -4.800636320344724, + -4.867216645426291, -4.85864761270119, -4.820149134568306, + -4.881081685563463, -3.6610088191091923, -4.523883318425133, + -4.405422178721609, -4.511747247142908, -4.4973757423052545, + -4.410859930021738, -4.692381446660533, -3.970040309575452, + -4.061847858828575, -4.724689689893191, -4.678033998252392, + -4.350556098635668, -4.167208374486401, -4.450909776342023, + -4.644417387453407, -3.724277324458299, -4.630737283549326, + -4.509337607422754, -3.787718752781497, -4.309323510802315, + -0.6823718671879156, -4.449775346271411, -4.306372205167735, + -4.410859930021738, -4.920232593125288, -4.3914208729895154, + -6.478377211171838, -6.487035273914953, -6.801150603434889, + -6.531487036485786, -6.875258575588611, -6.039010551387992, + -6.9553012832621475, -5.362670489144429, -7.042312660251777, + -7.027274782887236, -6.983472160228843, -6.58757650313683, + -6.732157731947938, -5.820321350423163, -6.504579583565862, + -6.710178825229162, -6.395685495326725, -6.710178825229162, + -6.559138567816296, -5.802621773323762, -6.789316145787886, + -6.8131267944816045, -6.29749346914751, -7.1889161344436525, + -7.012459697102096, -6.888161980424519, + }, + { + -6.973841065843466, -5.090695336189662, -5.341561221116127, + -6.0115634498929555, -6.202945132409144, -6.628849553394376, + -6.911669409716161, -7.0009992844957765, -6.886081347969349, + -6.711294966663227, -2.260331825727248, -3.839334077735597, + -3.9667628422059718, -3.420241689905642, -1.8735787833427253, + -4.188499103605226, -3.691171678101274, -3.8593630820739957, + -2.3524981891706576, -6.231341006564324, -3.7662373591100775, + -3.6465568924129386, -3.4963200738799767, -3.4321214183238204, + -2.593050419274572, -4.346868167488547, -7.8364379197408365, + -3.990422275679548, -2.904818352752614, -2.3714612152432646, + -3.1378637759089556, -4.357037863124861, -4.651314292504279, + -6.760867254793699, -4.107313187039242, -4.3976920060255535, + -6.440759053046417, -6.465477098310146, -6.582182413654962, + -6.610599862587676, -6.978263755093254, -6.719449335464573, + -6.730879435939417, -6.514035039942995, -7.843934206718764, + -7.66305587798569, -6.780807453646412, -6.810345466083276, + -6.347157031161333, -7.321885120582923, -7.779113558339937, + -6.602957069625781, -9.658525717989464, -6.893034874465941, + -5.640541915214837, -6.61808064554899, -7.825297811959789, + -7.623352248005446, -6.729273803531229, -8.625119353004427, + -9.394209194493895, -8.033505455437194, + }, + { + -5.604855175474509, -4.462285490693087, -4.571350651692403, + -5.056493889833258, -5.072440143885144, -5.681064638387167, + -6.280536665394942, -5.74786868753166, -6.300402104581103, + -6.443541857970619, -3.08781816164275, -3.6971443171127936, + -2.396573090959646, -4.216785762291264, -2.055069601332964, + -4.2400357108180975, -4.510301966657801, -2.8725223515174108, + -2.985980079879496, -6.151031964062443, -4.17853136290412, + -3.739735408911567, -4.177743865892257, -4.758224336321508, + -3.406169247786832, -2.7407038589797272, -6.9336687884910315, + -4.884332887441078, -2.4403221799860293, -1.8263508274951405, + -3.4054696763623764, -4.562592776233452, -4.420217843796468, + -6.782722037918705, -4.9372619598181355, -5.457485531187882, + -6.256287610141627, -6.611108985431037, -6.565907361549403, + -6.49534726380481, -7.061567128112425, -6.599672447239019, + -6.7013251912547664, -6.80746927442344, -7.516987128232732, + -8.06324629910308, -6.809992050813993, -6.860502751231873, + -6.646463246903641, -7.480934599971211, -7.54322475844959, + -6.575185095427639, -8.985698479921282, -6.994899040702809, + -5.767168821265772, -6.530694737719788, -7.839611467995152, + -7.435063610819411, -7.091053743823944, -8.707954377459634, + -9.10781723001913, -8.370947238445352, + }, + { + -6.890344120648169, -5.02940421086846, -5.408867487897182, + -5.967857793669728, -6.047653312106489, -6.46237897058548, + -6.749954722267399, -6.888388607925276, -6.98955227182629, + -7.21719797180092, -2.4868466816371, -4.771074471877903, + -3.6481121610473157, -4.664656454469179, -1.6817234068168874, + -4.586866350603287, -4.845552155508254, -3.154508356873059, + -2.57905575846229, -6.254276196451527, -4.921506816489567, + -4.4705470709445265, -1.6402347840968394, -5.1280286653720175, + -2.882844123251368, -4.595878517780293, -8.231899529453244, + -3.131328577959143, -2.841674626039629, -3.3059284308174086, + -3.1776616460738145, -4.478455963747706, -4.881954395151054, + -7.651052083171771, -4.569886902888001, -4.27044926534142, + -6.859624364774909, -6.620591583748609, -7.035724951332805, + -7.0226528697654516, -7.663432064808081, -6.880725735179707, + -7.096836038369936, -7.001238775261635, -7.94609099384926, + -8.742881147291776, -7.2506609077408095, -7.3153280695559095, + -7.053568548019118, -7.728589206441718, -8.033869830584925, + -6.95515998225557, -10.250083546611185, -7.317576103473792, + -6.34314386192865, -6.83930777142871, -8.443181097280567, + -7.8493114938202, -7.476614929901965, -9.60010853791645, + -8.54213954241637, -8.677952686589895, + }, + { + -7.97530600380511, -6.415691397743754, -6.861539964259856, + -7.354178676035024, -7.5544322703441, -7.956843940965374, + -8.00285595557334, -7.941715059369074, -7.997284910523884, + -7.042078762400056, -4.593656623473984, -3.4358427231129607, + -2.813804032113097, -3.8443096208464707, -2.253236786554918, + -3.523372060796657, -3.771274119941123, -4.213005093082092, + -4.26676474950711, -6.604661414352346, -4.071781743539046, + -3.0636392294004997, -2.748766550188664, -1.5979085155684218, + -6.096125476162904, -3.8319741165296572, -8.437883856353995, + -2.362166563210972, -2.079080404566457, -2.423752746988421, + -6.1373971409737065, -6.112261605870961, -6.033619491127531, + -6.187046440930159, -5.880551407088361, -5.508879098944167, + -8.771401153617497, -8.582218506443597, -8.081702252336468, + -7.820268405959624, -9.01805471242806, -8.991966276343762, + -8.037776271878622, -8.339328192194554, -9.20125771484982, + -9.932145223392613, -8.711282886564021, -8.633361864553784, + -8.75433589306331, -8.724333642759511, -9.49070501282617, + -8.37573721938598, -10.194509487860104, -8.552742688310644, + -7.968342726152964, -7.956843940965374, -9.863152351905661, + -9.598817643450872, -9.08877973142313, -9.278218755985948, + -10.729432663205156, -9.20125771484982, + }, + { + -5.1527222081970026, -5.012025127950751, -5.39388426501389, + -6.217687012129963, -6.355002880531065, -5.6351154829069525, + -6.363993950090922, -6.903492332891861, -6.853964796193012, + -7.129436704737862, -2.71361594954537, -4.917813116205636, + -4.909342477322127, -4.834757647401019, -1.3606202815267094, + -5.049186428683174, -4.831934627067998, -4.02878090419557, + -0.9857025802248848, -6.9583248115172545, -5.928056254423655, + -5.1548164498001166, -5.532591229650075, -5.575217341325883, + -2.1159432194090155, -4.2897086796604516, -7.362881241689201, + -5.200780422733714, -4.007732553855012, -4.899317914996637, + -5.732412626945404, -5.409409253401815, -4.570479874572386, + -6.004563148213606, -4.806034206660294, -6.602559371961363, + -8.008146936015933, -8.264296783798311, -8.114951495245993, + -7.067733915701082, -8.653122572902513, -8.40456220860418, + -8.35558820400772, -8.362968311305343, -9.101577865942044, + -9.242891399008622, -8.693532111240389, -8.468328826963168, + -8.348262163915647, -8.71436619814323, -8.123659823137777, + -7.872345394856871, -9.1093600063841, -8.693532111240389, + -7.279281672853908, -7.536521246398329, -8.924437667890087, + -8.99854564004381, -8.061492916721225, -8.91796515338447, + -7.579574252897618, -8.90514446495541, + }, + { + -6.625065979493934, -4.788580474517243, -5.256407824340383, + -5.37832984858721, -5.2038978790684185, -6.079823689638168, + -6.317086566978479, -6.646320764762257, -6.72855886299923, + -6.886116701148103, -1.505097000684665, -4.82588581184553, + -5.253602398681012, -4.989577089424305, -1.5758741362397704, + -5.185784176269271, -4.916501452893276, -3.8159863951228723, + -1.6041923545980379, -5.633841418592661, -5.7199839934992545, + -5.2161585690547705, -5.369003667873634, -3.3721189713516244, + -2.661522576130661, -4.670108701549678, -7.227790153984301, + -5.183916064386551, -2.494756625753895, -3.869077211017653, + -4.27104038948766, -6.0954898063825675, -4.71167529774246, + -7.136943681210728, -5.987593808088792, -6.590446143960144, + -7.403791666759668, -6.814987319540992, -7.030531087602099, + -7.451894781778061, -8.262224216961, -7.8389014914528286, + -7.199379152151521, -7.871278420177289, -7.002535480112671, + -8.630549778119708, -8.06204504276704, -7.169938203345767, + -7.199379152151521, -8.169374062997537, -8.264930578558742, + -7.090662928646722, -9.550754409315, -8.033689817011913, + -7.078178371984477, -7.674437552058881, -7.921898411023797, + -8.116604027017473, -8.273093889197904, -9.39660372948774, + -7.831846788472939, -9.063739434242727, + }, + { + -5.33959692367457, -3.901875433911095, -3.3263523840890916, + -4.682859878096868, -5.0569946532038434, -4.833678156338227, + -5.439237690659609, -5.4278954140556746, -5.662170026427569, + -5.462315637942154, -3.751806551579704, -4.660118258928943, + -3.8333548860836233, -3.8811153084238335, -3.4958051088141735, + -4.553750303632283, -4.534932311997922, -1.8458046740483725, + -3.0281790617647277, -4.436160927561686, -4.943995470762378, + -4.549402848736927, -3.9218705969950047, -5.328568099202016, + -4.452091297246621, -1.4302390343932077, -6.080922600826194, + -5.1148485422425125, -3.3219189169449392, -2.621905648275213, + -4.3789508674983555, -4.262632175345694, -5.088920047216344, + -3.3849839728869298, -3.307281062205608, -5.974439120423745, + -4.379829216259262, -6.791884017661267, -6.8166991867809905, + -6.068955964208674, -7.1565271312491765, -6.683586642330131, + -6.801736314104279, -6.532793078538745, -7.518256845849533, + -8.152370494825284, -7.391505140210389, -6.7194113688482755, + -6.416271872702784, -7.279027156783699, -6.4871905993829495, + -6.66039719741122, -8.204669994228134, -6.878895394650897, + -6.094660190607558, -6.427558674237414, -8.084652320770335, + -7.339319387039819, -7.347830076707727, -6.950948712290955, + -8.303341521735163, -7.468823387994359, + }, + { + -5.431835629645527, -3.900968951139421, -4.151004765492447, + -4.646528675532386, -4.521804796114552, -5.283128043243618, + -5.29007437093391, -5.66335521876192, -5.740643416211578, + -5.366660147370001, -2.670098748915231, -3.286242237340954, + -3.3585141055181613, -3.755602707715123, -2.5586532961090027, + -3.6926656242369105, -3.8330192649391925, -3.254475467118251, + -3.8290307266838655, -5.019290301102854, -4.363753678748955, + -2.9684754808494547, -3.151618081678543, -3.50657051212598, + -2.895083629045828, -2.91665360144011, -6.285849431998168, + -3.6228229122699127, -2.152751777383897, -3.3162167836347605, + -4.2488413058475105, -4.652746019284857, -3.9288228848114928, + -5.349158624766704, -5.572231075081914, -5.261791793324819, + -5.624778734167247, -5.278347157643276, -5.3043854022704995, + -5.210237912616575, -6.346428699255328, -5.93048158286466, + -5.615084400228674, -5.89255288484716, -5.916841563358977, + -6.777270925569916, -6.237681480853062, -5.483828321441287, + -5.599254877723378, -5.735607736245962, -6.332645938959907, + -5.443956990177872, -7.787390135984574, -5.974679054270741, + -4.487700164671342, -5.484479363130949, -7.787390135984574, + -6.624646575217306, -5.915838554247808, -7.256570295689133, + -7.872330812633658, -7.3300889926342885, + }, + { + -5.8962959205599565, -5.022482812723318, -5.262967235210922, + -5.638331471894197, -5.778457744433211, -6.3109845765456285, + -6.29163873510587, -6.56832765910669, -6.49255510063436, + -6.3650100589758205, -2.863492599093995, -4.089024629780227, + -5.154949557468251, -4.484159695248834, -0.9343644420039627, + -4.498161263246394, -4.6279862283262885, -3.982370058053253, + -2.2035282537140866, -6.199482179547703, -4.571665803362194, + -4.084960905072187, -4.755413554835734, -4.800997636581884, + -3.6971982708382995, -4.566795129611478, -6.871351961660363, + -5.099526707333125, -4.135788994836005, -3.1706024468180507, + -2.212641008744242, -5.078715683958774, -3.5811841558979225, + -6.741940888008861, -4.856098125468122, -3.980907107020626, + -6.712325967469931, -6.644006723492454, -7.153784678226001, + -6.945055157447498, -6.898020208742524, -7.021563043483541, + -7.173168545047049, -6.854391346353318, -7.669859657878135, + -8.284314569862415, -6.895321151773359, -7.513205847832758, + -6.795366054682441, -7.556635405760094, -7.827916592672458, + -7.241272132349116, -8.35137880044296, -7.187505708193456, + -6.1188193168155305, -7.139919638088829, -8.075324765986299, + -7.379987004046535, -7.364785842714484, -8.480789874094464, + -8.527936652520165, -7.717773013574032, + }, + { + -5.1596391409467905, -4.446625879300866, -4.792661465824471, + -5.092676376197713, -3.486026851337387, -5.293817992318437, + -5.4207097441730605, -5.031624556083574, -5.433394903700377, + -5.123410202650332, -4.715036303793935, -3.7021536789683007, + -3.7870302329121537, -4.088809455118969, -4.812132309601868, + -4.898021981482298, -4.704305701448848, -5.045386241156256, + -4.694501701352228, -7.439823297251669, -3.3979941838940024, + -2.7387677229303073, -3.4751598040428164, -2.0375451283648167, + -7.179408351140228, -3.811103778633784, -5.971540702552751, + -2.9729698958508197, -3.825149191015764, -4.478865937874288, + -2.6641628726801234, -5.3193066285353545, -5.996634270150407, + -6.375549146164451, -6.785390628872414, -6.572829186888741, + -4.124487206195906, -4.535138151336436, -3.7135200209337587, + -3.96463850525401, -4.554386339641352, -4.765674647825141, + -4.429638997109137, -5.277664924510665, -5.02086262836912, + -5.937153360243275, -4.7565501004863675, -3.693750268171921, + -3.749678820831146, -3.176252392796863, -5.722496422806384, + -4.546642548744449, -5.417354036326088, -3.1803577086920454, + -3.749362815140414, -4.066652137558128, -4.660532401294013, + -5.1570534898617675, -5.430844966067104, -5.284241491875228, + -5.5748604240003194, -5.63236491848051, + }, + { + -2.1543734650260515, -4.349659150265929, -4.518901599180162, + -4.334150339570116, -5.146931928181808, -4.726677895862385, + -3.8738862022041083, -5.547738126032732, -4.965611539500847, + -5.329386916158828, -2.0510193107742145, -7.017067715762613, + -6.5514024881154755, -6.782731624016124, -2.1387030138865364, + -6.887331394106641, -7.26098839501858, -5.915573580719614, + -2.792353319034718, -6.475549145597215, -8.196805549243091, + -2.7290425937620233, -6.57338139483425, -7.482751683863832, + -2.7069370493135643, -6.6217549791184664, -7.959675755954141, + -2.6929063910157374, -6.402701413055781, -7.378081061299943, + -3.038722067403394, -7.146279447242619, -7.442205589469482, + -7.777354199160186, -5.436617340201765, -7.9268859331311505, + -4.21410095816366, -3.7159537926574395, -3.274052690014408, + -3.8709022387814955, -3.903841048493916, -4.907248138723605, + -5.564815386850924, -5.634025390029099, -4.794669640998671, + -6.37044055083756, -5.773624479216047, -4.984713243886536, + -4.705003878814023, -5.884409585684786, -4.869643914101749, + -5.8105008762596295, -6.426399204485604, -4.552569937605819, + -4.688840192155229, -5.1496039404596194, -5.387063525747035, + -5.940338138344011, -5.40158748887255, -6.8282736444630405, + -6.302506440698851, -6.49077786629904, + }, + { + -5.042027972011844, -4.299718672656642, -4.243324982365076, + -1.9825142998026875, -4.725280240738409, -4.751810055033314, + -5.528415080508104, -5.372269658882421, -5.58057814379269, + -5.634199235231788, -2.092585085927561, -5.359310514239916, + -4.924244936355461, -6.182928172882705, -4.358300013368391, + -5.644135441890917, -5.3193472574280785, -2.4216711986248063, + -3.5454438576615357, -6.6553797837670805, -6.406683158028254, + -3.42063060974259, -6.230496589801815, -5.313180742512414, + -2.1141313157147192, -6.38251279710044, -7.199431055161392, + -3.210947634389801, -4.823896962521179, -5.599497627038855, + -3.7500872171087103, -6.903338504201014, -6.960201366095558, + -7.542682978767983, -5.613234106766742, -6.36476085164199, + -3.6714819678600925, -4.875454772186028, -4.20471280907303, + -3.884017303250544, -3.4828969984685907, -4.20471280907303, + -5.449056802096191, -3.411103801364231, -4.476181634325785, + -6.299489461288767, -4.386290645201218, -5.042027972011844, + -4.789932598747867, -5.333885415874105, -4.1803254304220925, + -4.950988261330972, -6.251222720318932, -4.73156059252596, + -4.355150404465494, -4.750641830601388, -5.174666905853347, + -5.78646844695934, -5.852762894799176, -6.180486169227153, + -6.353098911894147, -6.096324376905443, + }, + { + -3.621153540136533, -3.927545190670303, -4.761298021301781, + -5.2327070159671, -5.390836480884121, -5.489330016767771, + -5.419938023343771, -5.638503085521277, -4.333907884333748, + -4.5925613698517225, -2.3774699118392917, -7.240027999198052, + -5.856199730573355, -5.309006462636489, -2.1188535037876086, + -6.988713570917146, -7.259929153515347, -5.733217613565709, + -2.1988937611323722, -6.914879308906094, -7.285375819176511, + -7.471829613255376, -6.297483936486472, -6.369085087302357, + -3.0144535413397078, -6.131611710433321, -8.510182710405289, + -3.290870578196505, -5.79050447130796, -7.191941812530414, + -3.661034886200146, -5.933160771709483, -6.992552347224311, + -6.430741168725453, -6.006448567479316, -7.6405790925037875, + -3.820123280601132, -4.60874181430768, -4.552214016916188, + -4.152125265844404, -1.8440074384079603, -4.33229280995845, + -4.748982594711727, -5.590043992369601, -4.083551769494654, + -5.885005729822436, -5.570371226770897, -4.35431953325285, + -5.037889407059404, -5.600505333614971, -5.2234842891983275, + -5.239347146606377, -6.012203979049936, -3.2506791222758458, + -3.493367719618491, -4.386449689183688, -4.5929103490909995, + -3.6635121789617022, -5.259536257638402, -6.071656848963408, + -5.913358144413303, -6.637031915632404, + }, + { + -2.697794466436189, -3.604662393389068, -4.011263957500705, + -4.728080882146555, -4.781372179196779, -5.33155887684335, + -5.439355447299057, -5.586922158039964, -5.370633615250003, + -5.513150772897161, -4.398260316869362, -5.11207163673225, + -4.190082883743458, -3.4501177344839546, -6.597640067453566, + -5.490434815062624, -5.150911470048515, -4.837107457567751, + -2.85030853825846, -7.320346050255055, -5.997701716211621, + -3.4753179136511876, -4.2866759373606715, -3.397053452568678, + -6.703360230660674, -4.113003141200578, -6.444877312901155, + -3.074043686998346, -3.452623303723489, -5.193702907090925, + -3.9327969395172757, -4.131242383720524, -5.418238523858134, + -3.7478131640115406, -6.184652086682414, -7.063123185287982, + -3.7394230709797665, -3.4179694218787144, -4.267288710081795, + -3.0709940324857143, -4.263518677341253, -4.701005637323521, + -4.417094629495229, -4.569236359692398, -4.473887479448818, + -5.8432973258667005, -4.728080882146555, -3.754953367214181, + -4.1263095154005, -3.415012032977706, -4.81618158386264, + -4.691820172692138, -5.666572324000903, -2.795735372125381, + -3.2437129699651224, -3.9019638438387987, -3.582993942690341, + -5.112804505947904, -4.894364079117085, -4.722110715160052, + -6.056219323109372, -5.5022692727096265, + }, + { + -4.619141531251806, -4.242478414047348, -4.655112011634125, + -5.260049492712331, -5.018630839221239, -5.434973330528597, + -5.291682300504695, -5.652426966065128, -4.133100006910822, + -4.834429582625707, -1.998550183209591, -5.7433143168994505, + -6.845817661060527, -6.562531200081784, -2.4549848852751466, + -6.626189051853761, -6.290387731057782, -5.567486289416045, + -2.141146196531367, -7.242549089199937, -7.104679294976816, + -2.8517566389677937, -5.192894636686687, -7.082206439124757, + -2.3092018462234853, -6.71448165899944, -8.017189304011065, + -2.1705324264442973, -5.663122255181876, -6.392769837961348, + -2.584044215625091, -7.28700085177077, -6.816997222525035, + -7.454881724409931, -7.498844847831047, -6.3210545294507545, + -3.9234738704665255, -5.210974225191004, -4.156261002958894, + -4.841496749848799, -4.597299789336757, -4.421170202790724, + -4.975365923256807, -5.672123216040852, -4.474317672329112, + -6.090327349926446, -5.367408011032831, -4.491383872387231, + -5.280618880960439, -5.815780620152271, -4.640826054386648, + -5.541761398177608, -6.307308608546119, -4.360911011272527, + -3.968855752517438, -5.060904184340462, -5.092811178129705, + -5.9935197902566175, -5.119506808592894, -5.086750553518014, + -6.052587021943599, -5.38489332537385, + }, + { + -5.261046281621696, -4.9148297535258045, -5.093521542269181, + -5.160962823064713, -5.492794680129424, -5.583178741597693, + -5.608843109973599, -5.828958322395465, -5.818217080564052, + -5.6351835265880075, -1.9431063436877503, -5.762127613913009, + -6.364566555601998, -6.310693565661846, -1.465488331318766, + -6.653479397439911, -6.983078754773362, -4.701955198551832, + -2.920630198813372, -7.381717897811127, -7.114655112562081, + -3.628618850962897, -4.741931970713727, -6.050483313874263, + -2.961525855445553, -6.683481647743711, -7.823550650090166, + -2.1208799042866553, -6.236585593508124, -6.735576759627112, + -3.3232120191551315, -6.891511561247872, -6.08336298799342, + -7.9413336857465495, -5.554867108771802, -7.122498290023107, + -4.193090909121505, -3.901105949199606, -4.152689701478908, + -4.935827900365578, -3.224590180736388, -5.347946392964479, + -5.205112607677643, -4.550306897288771, -4.485129674572183, + -6.303724896345753, -5.654950567328784, -4.928779413034516, + -5.0770940681272325, -5.30867433061095, -4.570963767879653, + -4.7737511552658995, -6.146602446129337, -3.6472116840395126, + -4.583317229489316, -4.247999881283233, -5.050961927850385, + -5.554867108771802, -5.239253651630843, -5.752077278059507, + -6.0889495947020595, -6.114482896707225, + }, + { + -4.8357984388643835, -4.657872493543467, -5.438431611883497, + -5.481947249319659, -5.480513557317811, -5.739017089171186, + -5.670890393572083, -5.859121927588727, -6.047318448825703, + -5.930658377913943, -1.7234534048021994, -6.808376835001858, + -7.018097365983928, -6.515303913915201, -2.0800327647225068, + -6.985307543160936, -6.434021280478344, -5.690138581877, + -3.0546210789668655, -7.433612809945593, -7.609892052283585, + -6.846949109788098, -7.145930737493813, -6.771237288052402, + -1.3242305561151027, -6.88123818326673, -7.934388097858083, + -6.599387031125742, -6.8029859863669815, -6.569147145936024, + -2.3927324727103167, -7.200418922777882, -7.33738477785104, + -7.62201341281593, -4.893045313052039, -5.366877919534872, + -4.1899951004809015, -5.453656819480563, -4.694403309779647, + -3.431804500645092, -4.0124147615767685, -5.525942614164559, + -5.309719505694924, -5.568828205702649, -4.124028746206436, + -6.415604553692113, -5.4275025413513065, -4.984699762805498, + -5.237511197353998, -5.157641365469638, -4.101047614464405, + -5.085610237260755, -5.550223017871613, -4.869440755282619, + -4.489173830779152, -4.234321454353582, -5.041242413079193, + -5.780689785571754, -5.231908941805328, -5.964947451392574, + -5.752089170738539, -6.139854701747302, + }, + { + -5.508465007700513, -4.839258034686576, -5.109472414097966, + -5.519275923804729, -5.707703038161169, -5.439902336263216, + -5.7235763873174585, -5.821556795677663, -5.734300350680434, + -5.610247702010455, -5.910709501108635, -5.7235763873174585, + -4.435262434557772, -3.8610478472011964, -5.758858201462099, + -6.0632537548876195, -6.044700346991872, -4.117878794499082, + -6.575328598054043, -7.243783166023616, -6.403478341127383, + -5.083829983484629, -3.317712944869911, -1.7163886672099804, + -5.622223893057171, -5.543479525545615, -7.373836294271814, + -4.235327303674574, -3.4757995308184078, -3.8216678856997803, + -6.863010670505823, -5.619817153026606, -5.966922645949188, + -7.096625521687328, -7.1845942946332855, -6.5629060780554855, + -3.9611313057581787, -4.164449795424729, -3.6745940531223313, + -2.809054169189011, -3.303860323066671, -4.356804692686432, + -4.075943929085389, -4.993775889109478, -3.5055329961999946, + -5.4956443192714515, -3.896020865817486, -3.604273312212492, + -3.5622476861654384, -2.789152302491872, -3.9102728885246876, + -3.0660244750660355, -5.631909198791635, -4.404134333185408, + -2.9033092297609358, -3.155554830818952, -5.387104150696246, + -4.341290047595106, -5.258585299895267, -5.109472414097966, + -5.821556795677663, -5.14024407276472, + }, + { + -4.364789554126129, -4.918384854279969, -4.918384854279969, + -5.1613310328903585, -4.587530609962979, -4.784853461655446, + -5.006160465162354, -5.0403784224726955, -4.994002260682545, + -5.402493089707247, -1.4796200682550344, -6.181000119218348, + -6.701776073837507, -6.252825853789604, -2.1508860947624866, + -6.086590434747274, -5.772240115213331, -5.482914657017821, + -3.9652464014134288, -6.426672783592586, -6.401671481387169, + -6.285261129542758, -5.890845857621178, -6.3772800282630095, + -1.9660163181458141, -6.770768945324459, -7.107241181945672, + -5.362398632505369, -5.920923312858457, -6.7530693682250575, + -2.0597919147266994, -6.701776073837507, -6.505661194911217, + -6.5476253940102485, -6.637237552699936, -6.7530693682250575, + -3.9354569646113213, -4.092932522818745, -4.814706424805127, + -4.961309898997003, -4.355174095426687, -4.949681861001884, + -4.869194610089197, -5.046727650151355, -4.8507000274530325, + -5.115948027042541, -4.814706424805127, -4.689097748317353, + -4.863875448611597, -5.043547997233975, -3.6732539774605253, + -3.9481153614832447, -5.340799520701906, -4.835115296436334, + -4.068688911208753, -5.024679512929592, -4.753487802377227, + -4.994002260682545, -5.247044551164388, -4.698046353893368, + -5.307182909902921, -5.227776132298511, + }, + { + -5.115394338583977, -4.7561420736309445, -5.007842676853897, + -5.66123448019507, -5.033665707964376, -5.645764558422938, + -5.535809351386801, -5.907471144471458, -5.828086115216893, + -6.105296887801378, -1.6832402388074508, -7.437524027650993, + -5.590313253886724, -6.862982589498895, -3.0491034044747836, + -5.586172461220692, -6.632458930887062, -5.218606625703514, + -2.4710973040085813, -7.398809515470303, -7.268447697607059, + -2.7116510959031834, -6.686853002952861, -3.7188303108266303, + -1.9217106058640063, -6.098376444956805, -7.548749662761217, + -2.711068175455316, -6.088084758920257, -7.1630871819492326, + -2.4953178542046213, -6.5371487510827375, -6.531815405107375, + -6.592107635363495, -5.218606625703514, -7.464192274733154, + -4.025855346121542, -5.098759278079445, -4.812664759541027, + -5.369662433402117, -3.845996309584077, -5.031290409935469, + -4.627804324836165, -5.549657248245595, -3.554062953084761, + -6.151486270270753, -4.8426621708268565, -4.640563668589926, + -5.069989313821911, -5.128381534110789, -4.6191257693799175, + -5.164313543336852, -5.9721996171719525, -5.146851716173189, + -4.482065653234942, -4.222339216728002, -5.190396010216277, + -5.978297197040071, -4.681519433426748, -6.057830350562455, + -5.619789072019677, -5.749664153441089, + }, + { + -4.867406343870729, -4.4533740975545415, -4.953434176237852, + -5.384635998990067, -5.328952912513263, -5.6593669190947695, + -5.597013688377717, -5.757806991908022, -5.710273291442284, + -6.117674508577314, -2.0752354231922467, -5.330274789771179, + -7.239817294655618, -4.653127950557675, -1.9861891376978962, + -6.498730651806554, -6.748830013109291, -5.799220777458779, + -1.8880861157511444, -7.060476365999801, -6.592340150928861, + -6.615981913985901, -5.98705432616025, -6.055682832549346, + -2.2283490142554703, -6.244583360441343, -7.060476365999801, + -6.9543698600052535, -6.005072831662928, -6.675112437212724, + -3.324941220245065, -6.737960340872387, -6.846328377594169, + -7.276184938826493, -5.466076330930241, -7.601607339261121, + -3.2284338811241238, -4.7571452826690335, -4.2772171644143535, + -3.794375211195705, -2.9321390456478555, -4.655146114713912, + -4.556474587206883, -4.658518799192551, -3.9133691163329902, + -6.159223511490186, -4.874928318325417, -3.9851607325065794, + -4.593769151695071, -4.865742449976033, -4.192747084853396, + -4.158562847663465, -5.583291346122616, -4.6687056444995445, + -4.403457644910037, -4.135470875670939, -4.338591952168042, + -5.484425469598437, -5.10713123845697, -4.666659956776251, + -5.507845743806536, -5.56639905255811, + }, + { + -5.367738120634485, -4.778432275797196, -5.153988612512787, + -4.9880684755260525, -5.022992659762126, -5.315533041364984, + -5.937669186281741, -6.10637419308453, -5.985149724514735, + -6.0666028696576335, -1.4917500304139726, -5.449683054485573, + -5.538947192058466, -5.596026010244295, -2.1356770840200325, + -7.153812526227932, -7.094389105757131, -6.013018623523528, + -2.2775252846627545, -6.010847070010019, -7.246405313055757, + -6.755173383190167, -6.697507741340358, -5.530860894627108, + -1.9981251565724831, -5.698525293621089, -8.083802102460249, + -4.860110306811463, -5.882753428685626, -6.945748081740843, + -2.8863534391722627, -7.100819996087421, -6.962461562714583, + -7.231590227270615, -4.4224533361891325, -6.0688990819179836, + -3.292438554454217, -4.782234561746934, -4.459461169483883, + -4.359771877270199, -3.9163352754091854, -5.15214868559078, + -5.0863471755948115, -5.622152314836516, -4.041658687615878, + -6.321199217523649, -5.22160122153078, -5.082053480720112, + -4.742403534262838, -4.896452531238822, -4.577416633798337, + -4.325814840936012, -5.22258692885554, -5.570564946046022, + -3.7753199382595333, -4.751597592285044, -5.351900685535859, + -5.538947192058466, -3.6838339356894148, -5.717756655548977, + -5.9396873504379775, -6.193602560418941, + }, + { + -4.999514188504179, -4.700587505853424, -4.67022803135639, + -4.81576740495976, -4.92872993147181, -5.418517964945142, + -5.4525477135314535, -5.471965799388554, -5.601583292691941, + -5.294558257397029, -2.0675057720915833, -6.606510097282728, + -6.66295140818768, -6.268297216183731, -1.8472066532219233, + -6.801101746668497, -6.464629510047284, -5.518112402024337, + -2.6153829107633095, -5.40926720517299, -7.042937430994859, + -6.122880216325227, -4.984950824316282, -6.978988706394586, + -2.5341534752438712, -6.335738496979263, -7.22903971062872, + -5.047082605423289, -6.606510097282728, -7.195517018590076, + -3.3881221963080814, -6.764734102497622, -6.935691900641261, + -6.06802185577318, -5.518112402024337, -6.21743879895024, + -3.306166689176508, -4.5835098665078435, -4.096018906794089, + -2.7778687007936327, -3.149770331333854, -4.522505810044474, + -3.6065186143693406, -4.579485716208118, -3.9009018998360694, + -5.695109350702764, -4.592420415540891, -4.39850374434322, + -4.858371250530563, -4.687974726037726, -4.284600731462279, + -4.828050056815601, -5.7714823294873385, -4.760362534007699, + -3.6910492160852075, -3.1337486268025883, -5.114702793098268, + -5.044521784561615, -4.990993138772245, -5.21262268525524, + -5.1495981689488834, -4.939033399841534, + }, + { + -5.573013579156251, -4.7557456323553735, -4.835888625843566, + -5.4310032481645445, -5.375816832597254, -5.748275644237248, + -5.617589203744957, -5.426286258286405, -5.527732874623095, + -6.027336973645122, -5.771265162461947, -3.4363190171520683, + -4.863649827007062, -5.342810536129083, -3.9603112459777443, + -3.1741279621323484, -6.522548369609561, -4.089429329592385, + -5.667234434234122, -6.573100648772392, -5.74180312973163, + -3.6814950844272993, -4.555731535734934, -2.60141208694855, + -6.057840427938536, -2.6538446056843625, -7.3352407008192895, + -2.4633202784204524, -3.0508881488790776, -5.069846636151324, + -3.6495408996227328, -4.548858656447172, -5.7613477258046, + -6.031638055544512, -6.61856302284915, -5.764642621701453, + -4.906092884516815, -3.945751808422034, -4.075923906392578, + -4.145352412824341, -4.3284585910787134, -4.1375398730875474, + -4.386633312115669, -4.864991208831262, -5.2459274106602996, + -5.389330551763976, -4.366047982499911, -3.5236520927129136, + -3.6216686341149815, -3.0467401241562313, -4.4457511661828155, + -3.28056139498962, -5.728982441302569, -3.2282776746379773, + -3.3483163872180257, -3.6120365041642786, -4.280940073114231, + -4.240597235287379, -4.439591885822281, -4.571620648562269, + -5.13801612348307, -5.583972592945972, + }, + { + -5.648871675811755, -4.741030782968909, -5.070478143556586, + -5.038332518583169, -4.984450311189462, -5.652113169735926, + -5.878916005474925, -6.018576120040821, -5.698633185370818, + -5.924757364823166, -1.8468585970227798, -6.844251516414859, + -7.126600242891277, -6.081398705962876, -2.934905888757429, + -3.6595354362414967, -6.817864761241664, -3.5921374614243615, + -2.9623018840998165, -7.15537920744132, -5.837035508229937, + -2.956367800752568, -5.44072677540776, -6.135188880549014, + -2.35874447462609, -6.817864761241664, -7.744626792983114, + -1.829808862763535, -4.656579250518651, -7.012765100242478, + -3.346772137573546, -6.156466278996299, -6.772055225210369, + -5.459294948136628, -6.2464145156592386, -6.772055225210369, + -3.9054574859103, -5.259720143195114, -3.484916480113615, + -4.59888775210554, -4.26703708287839, -5.280773552392946, + -4.568219053541838, -5.236954932422101, -4.227597595795837, + -6.429913235626207, -5.14878310004672, -4.299244656170703, + -5.066854951187166, -5.722730736949879, -4.343868104023761, + -4.895290258966649, -5.924757364823166, -4.2366225736701635, + -3.439140235431567, -5.290903070631802, -4.913738130330258, + -5.32777360644013, -5.700335313441349, -4.118883335232663, + -6.285399962372192, -5.825384891009962, + }, + { + -4.134798672989471, -4.089847285127205, -4.1376763728170864, + -4.175860159787245, -3.5867437074551245, -4.073498147125676, + -4.247208240584712, -4.485742942219219, -4.161001045383496, + -4.76664532768562, -4.782994465687151, -5.224827217966189, + -5.79734641073752, -5.371880635922686, -5.362028339479674, + -4.805217602471861, -5.696541711615555, -5.1428140663053545, + -4.243997964954463, -5.696541711615555, -6.074978147335799, + -4.388579193765571, -4.887134724939747, -5.433124261163405, + -5.127188748402274, -5.465212575714905, -5.5681605449673475, + -3.871109027280912, -4.3522115495946965, -4.887134724939747, + -1.4812734394569864, -4.9697213159490214, -5.42265296129611, + -5.5681605449673475, -5.696541711615555, -5.592551998091507, + -3.9025017396887742, -4.303421385425264, -4.377529357578986, + -3.2036759521599856, -4.414847120586181, -4.370230055097374, + -4.283218678107745, -4.293269013961246, -4.570900750559526, + -4.639893622046477, -4.473572406598963, -3.944368319081564, + -3.9659778034144195, -4.663991173625537, -3.393956618621509, + -4.489832927470744, -3.9232159440763374, -4.221810049979101, + -3.677082874537429, -4.095356940938174, -3.0285527594662907, + -4.570900750559526, -4.348646483430199, -4.449666885745409, + -4.481669616831582, -4.21556002963393, + }, + { + -5.468543074118657, -4.3409301123745765, -4.939132602788878, + -5.030955774234888, -5.486636817561356, -5.444018684255573, + -5.476853371252285, -5.808607963491769, -5.908385471304435, + -5.666153323427702, -2.5849374499856705, -5.739356727450997, + -6.452595908093631, -6.391754248837386, -1.3630653142736813, + -7.218432822021854, -6.741508749931544, -4.321907198551311, + -2.3295168721177877, -7.554905058643067, -7.600367432719824, + -7.660265574300893, -6.068262723688894, -7.064282142194595, + -2.4089621725210324, -6.261701120589188, -7.218432822021854, + -7.400754378815808, -6.405740490812126, -6.66108718262097, + -3.202908170420881, -7.345184527660997, -7.318516280578836, + -7.17951740577218, -6.2346317986209705, -7.685266876506311, + -3.523815890500982, -5.001129001779655, -4.254551658580372, + -3.1791480369827805, -3.272238460048792, -4.979751531007288, + -4.696520976240299, -4.996816791561475, -3.7821441205484283, + -6.068262723688894, -4.767838317222632, -4.961310103104565, + -3.3169412686460493, -4.254142074302682, -3.686717843919221, + -4.781622132973809, -6.395232513213711, -4.882290151848487, + -4.035101934573064, -4.142657840794326, -4.886134831709296, + -5.154991621688459, -5.261248596393322, -5.763145589415012, + -5.307128202144016, -5.4187681732866855, + }, + { + -3.7153348719791315, -4.23835637018879, -5.401341822408315, + -5.655381048015406, -5.3230099791384085, -5.723838105173554, + -5.787159933132683, -6.191069830974041, -6.118044695959151, + -6.512733837722823, -2.85919837658876, -7.902248813542757, + -2.4061387894065027, -6.693770047296114, -2.250325724429897, + -7.350962550860164, -6.005451461342365, -3.2057646919026945, + -3.1269276063809657, -8.282234836349936, -4.929356031958588, + -4.893377430923357, -4.350779231381914, -5.039126698093967, + -3.0950598675329433, -2.702149075140166, -7.065140944061303, + -6.4656293685353505, -6.6671525062961585, -1.5532677412410396, + -3.407685858090993, -6.917395410231305, -4.818860209735889, + -8.330553413620743, -4.697144805661785, -7.44034239452964, + -4.201345166462018, -5.036551747250591, -4.447052124631202, + -4.271306064673767, -4.3875307527168115, -5.68024420720805, + -5.463555006909888, -4.6680077696105, -4.729585831829687, + -6.877248342217893, -5.7894972929813875, -4.744562286019245, + -4.863271676535759, -5.996776708148691, -5.115645847928164, + -4.782587554878788, -6.69569868820252, -5.6198664278662305, + -4.372289459396023, -3.8304137647360905, -5.143464614214886, + -5.692911512408815, -5.442384299786921, -6.144390895990383, + -6.553756817069401, -6.771887826560066, + }, + { + -5.439192275092616, -4.781176186054329, -4.766861598657258, + -5.578787193555129, -5.543563426336324, -5.577507602849504, + -5.742587353208953, -6.240425781448133, -6.11031213333427, + -6.461710020172159, -2.6665339561656087, -5.7471225083743445, + -6.597455272456702, -6.655866034613116, -2.1677366785812078, + -7.063212610820987, -6.615344837207477, -1.8542823223629723, + -2.8203019407701473, -7.921874229858505, -5.964600322152877, + -7.4190807777897785, -7.115398363991557, -7.50316389500032, + -2.434370037607787, -6.75042461723813, -8.501692725111447, + -2.436966652419961, -5.508344242912733, -7.262628600974241, + -3.3318853658660776, -6.320468489121669, -4.662752576421778, + -7.96269622437876, -4.3951216252530045, -7.773454224740232, + -3.4248833433841024, -4.6171877485771935, -4.856615893655904, + -5.148452521314832, -3.262215858586344, -5.067705520626301, + -4.995134827791466, -4.808655260848847, -4.056803079326151, + -6.372894286461519, -5.283769734998518, -4.869131701587735, + -4.356292992089062, -5.580068423711178, -4.205055407055328, + -5.300108397353307, -6.413362236014366, -4.081705328198343, + -3.5651654031689133, -4.4607321960690065, -4.914213385439241, + -3.3562768189902368, -5.301077859377855, -5.2191716350828905, + -5.66568217643417, -5.148452521314832, + }, + { + -4.692472771166889, -4.3008934499719516, -4.549181741142988, + -4.955265087292482, -5.083773336506403, -5.36724487642184, + -5.286555965171697, -5.606625988867734, -5.3561026998685985, + -5.6766361544403825, -6.310506725070788, -5.345083304618988, + -6.354631529979725, -5.436838168221035, -2.778870257014558, + -5.888394383529467, -5.518132910614282, -3.971194340356585, + -5.541198183545278, -7.353160360090853, -5.418980550821029, + -3.590537431731367, -3.3241709789128793, -1.8944436557320132, + -5.91226186493611, -3.390599447549114, -7.2559966116372046, + -3.6807421830309885, -3.742794171598555, -5.40143624117012, + -6.914247317915148, -6.268246915780905, -5.966865998970962, + -6.802329401711162, -7.18453764765506, -6.063197107909394, + -4.040088147996806, -3.4233375319614976, -2.7095546985241823, + -4.374927246403353, -3.562959147374605, -4.404434128912793, + -4.579296940419071, -4.391681565015401, -4.438151297809303, + -5.606625988867734, -4.3282656378478706, -4.1449534915722195, + -4.0950638220693705, -3.3913758451036427, -5.0164696546102965, + -4.409796072054178, -6.030044900592494, -3.187603169643488, + -2.500811269450856, -3.4121731416766146, -5.192891489277063, + -4.804034117568558, -4.273312053475689, -5.056716535118889, + -5.514880875227905, -5.219114684376165, + }, + { + -5.262101982971463, -4.751704716461968, -4.4750082149096455, + -5.006514043913175, -5.07515161250067, -5.251452255054804, + -5.285938431125974, -5.392274080942006, -5.414469813333791, + -5.840086736879263, -2.606359904105076, -6.823463761969788, + -6.89757173412351, -5.234645136738424, -1.8016665052081617, + -5.931967688708849, -5.7146991018376605, -5.437169400849897, + -1.672652532530341, -6.72630001351614, -6.773702252410724, + -5.308112288620296, -6.00575385876808, -6.6810434219280195, + -2.3521748840468923, -5.96169386897405, -7.419447194076086, + -6.095225261598572, -5.008173795331539, -6.397795946544104, + -5.514028406375509, -5.907173030095633, -6.070893160939042, + -4.572171963363293, -6.91979487090822, -6.823463761969788, + -3.666029218824578, -4.169934399745995, -4.315857524665681, + -2.6071122929751187, -3.822642418908326, -5.326212330263914, + -4.3600448319184775, -4.917707706137853, -3.8075287810982776, + -5.953110125282659, -5.187841070540621, -4.792366055507543, + -4.8403533292283845, -5.164264339976624, -4.320022461964966, + -3.8952676954062846, -5.745470760504414, -4.036941945426083, + -4.228361366337563, -4.708930900478682, -5.247223918945283, + -5.006514043913175, -3.8952676954062846, -4.574322501826521, + -5.886970322778113, -5.748949024880739, + }, + { + -5.49714261021104, -4.668772759709356, -4.7793028170607235, + -5.269011517385791, -5.403728165571614, -5.600190986110487, + -5.607143505425369, -5.736034518493389, -5.749350294469161, + -5.980569259788917, -1.8463761496649909, -6.7957313909118255, + -6.842614976810675, -6.277301167760615, -1.6016725181239624, + -6.834646807161499, -6.483542908272222, -3.3781298559126016, + -1.9147238137262874, -7.264209466848723, -6.97044834832056, + -6.2503937148406905, -6.323821183395508, -6.466922027036182, + -2.304012035094697, -6.232849405189781, -7.182292344380837, + -4.500040235600472, -6.194465162181466, -7.045955900828706, + -3.7335540179496816, -6.633976111699347, -6.224191342446667, + -7.560054849939665, -6.541029999189903, -7.7391030813886506, + -3.9953827115751284, -5.225208894727398, -4.2623981472183505, + -4.983820106246462, -3.5900722944719905, -4.880831478599072, + -4.625152137233465, -5.350160419862394, -4.464551422640724, + -6.03257404477923, -5.25911044640308, -4.290672925686517, + -4.802954305142684, -5.287422079228971, -4.798800612773991, + -4.757191363453292, -5.977196575310277, -4.844383490556383, + -2.946883968159507, -4.887603513509017, -5.795997983260946, + -5.6188395451885595, -4.719156549714065, -5.921535728204724, + -6.1778672707724285, -5.694577804815043, + }, + { + -4.469097220436953, -3.6430767584970343, -3.7844067498952314, + -3.931680914923784, -4.272838155799217, -4.129621978555971, + -4.297357772973536, -4.384369149963165, -4.338992745625113, + -4.178787025578902, -4.976420213651742, -4.292052720743842, + -5.461928029433443, -5.887479896108345, -4.57562801124297, + -5.526466550571014, -6.262964172004573, -4.4462034005711, + -5.238784478119233, -6.017291507630335, -6.355745905455539, + -5.968972930359527, -5.520424236115051, -6.328346931267425, + -5.70515833931439, -4.218953067304238, -6.6434279779073195, + -5.648805402763258, -5.5635078222513625, -4.870360641691417, + -6.412904319295488, -1.949196049230905, -5.968972930359527, + -4.4990139814749455, -6.027241838483503, -6.442757282445169, + -4.057336183871974, -4.448263133534111, -4.000410247075965, + -3.7940068236242506, -4.150834895195163, -4.166267300233975, + -4.566303934367847, -4.3901887590164295, -4.028468199871122, + -4.376661982718227, -4.915384323065373, -3.014652447863089, + -3.369082406994758, -4.61381880997536, -4.55706599338291, + -3.5814225774035497, -4.577972678202224, -4.001728638829222, + -3.270401473211861, -3.264703452097223, -3.9718368746319084, + -3.132988733879366, -4.361423592613295, -2.595191993338583, + -4.516519898988561, -4.912100247864183, + }, + { + -3.9699474093476774, -4.189042372135887, -4.058989243887689, + -4.102030995146257, -4.171520021443684, -4.318254103615893, + -4.424253392850214, -4.758810531535327, -4.504029895277935, + -4.414714369803455, -3.0879080140350674, -5.469976217597951, + -3.790334046490602, -5.366435538657111, -3.235086137375257, + -5.727805326900051, -5.408282648592611, -4.971959552124542, + -5.070590155566169, -5.076706382583605, -6.181472536826093, + -5.921961341341008, -4.9999725883522155, -5.5070174892783, + -1.5568628976878147, -5.5070174892783, -6.110013572843948, + -4.0678979887767985, -5.089052218405905, -5.200643283814367, + -3.6661684098537215, -4.557328477002297, -5.479108701161223, + -5.159821289294111, -5.479108701161223, -5.616579691789827, + -3.7152580200502454, -4.23556238777078, -4.212031890360586, + -3.866007780968386, -4.0678979887767985, -4.4633463186414915, + -4.142178063159669, -4.318254103615893, -4.276053749125516, + -4.772293881872614, -4.332726136224427, -4.11133338780857, + -3.9089648990693253, -4.309670359924501, -3.9127889955077286, + -4.338574106106851, -4.528549512452254, -4.149433234040841, + -3.5292744136497762, -3.6009751629760713, -4.350373653038005, + -4.828122331425556, -4.047963773875981, -4.665125047458004, + -4.673288358097166, -4.633122316371831, + }, + { + -4.806719860900339, -3.6507473420444714, -4.429814962058902, + -4.347330952102998, -4.3949170222076255, -4.698366552912107, + -4.809448375553543, -4.35423944244681, -4.534137594904308, + -4.3611959922401695, -2.871768725634745, -4.598399529122678, + -5.564587232141456, -5.519124858064699, -2.4250566837898457, + -5.688201188108633, -5.967149580591659, -5.01834957015221, + -2.505498094634157, -5.3939617151106924, -6.3813483686685775, + -5.360223575478843, -5.821732580733155, -5.742268409378909, + -2.5427453130265225, -6.0299504818306895, -5.949907774157153, + -6.048642614842842, -6.106911522966818, -6.522426966928483, + -1.9368412502165815, -5.384205540165328, -3.2469989725553616, + -5.166904264475346, -4.662348253722951, -5.93295821584338, + -4.052787789271272, -3.8964417188805776, -4.725629703670472, + -3.994277013931218, -3.570836586604418, -4.883136091344702, + -4.20182336843176, -4.894970548991704, -4.1342203512338624, + -5.159122124033291, -4.442985425248647, -4.740819869164446, + -4.583031498894364, -4.589588899440523, -4.4058064220068935, + -4.87144005158151, -4.8541485544714496, -4.806719860900339, + -4.241282205172307, -4.937530163410501, -4.382360803432213, + -5.0116381355642226, -4.368201274828579, -4.782492565565015, + -4.544565218066568, -4.4563316672015425, + }}; + +constexpr auto kTokenMap = base::MakeFixedFlatMap( + {{'1', 1}, {'0', 0}, {'3', 3}, {'2', 2}, {'5', 5}, {'4', 4}, + {'7', 7}, {'6', 6}, {'9', 9}, {'8', 8}, {'A', 36}, {'C', 38}, + {'B', 37}, {'E', 40}, {'D', 39}, {'G', 42}, {'F', 41}, {'I', 44}, + {'H', 43}, {'K', 46}, {'J', 45}, {'M', 48}, {'L', 47}, {'O', 50}, + {'N', 49}, {'Q', 52}, {'P', 51}, {'S', 54}, {'R', 53}, {'U', 56}, + {'T', 55}, {'W', 58}, {'V', 57}, {'Y', 60}, {'X', 59}, {'Z', 61}, + {'a', 10}, {'c', 12}, {'b', 11}, {'e', 14}, {'d', 13}, {'g', 16}, + {'f', 15}, {'i', 18}, {'h', 17}, {'k', 20}, {'j', 19}, {'m', 22}, + {'l', 21}, {'o', 24}, {'n', 23}, {'q', 26}, {'p', 25}, {'s', 28}, + {'r', 27}, {'u', 30}, {'t', 29}, {'w', 32}, {'v', 31}, {'y', 34}, + {'x', 33}, {'z', 35}}); + +constexpr double kClassifierThreshold = 0.015; + +bool IsHashLikely(RegexUtil& regex_util, + std::string value, + double threshold_multiplier) { + regex_util.TransformToAlphanumeric(value); + + double log_prob_sum = 0.0; + size_t add_count = 0; + for (size_t i = 0; i < value.length() - 1; i++) { + auto matrix_pos_a = kTokenMap.find(value[i]); + auto matrix_pos_b = kTokenMap.find(value[i + 1]); + if (matrix_pos_a == kTokenMap.end() || matrix_pos_b == kTokenMap.end()) { + continue; + } + + log_prob_sum += + kClassifierTransitionMatrix[matrix_pos_a->second][matrix_pos_b->second]; + add_count++; + } + + if (add_count == 0) { + return 1.0; + } + + double prob = std::exp(log_prob_sum / add_count); + return prob < (threshold_multiplier * kClassifierThreshold); +} + +} // namespace web_discovery diff --git a/components/web_discovery/browser/hash_detection.h b/components/web_discovery/browser/hash_detection.h new file mode 100644 index 000000000000..c6461aa40bc4 --- /dev/null +++ b/components/web_discovery/browser/hash_detection.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#ifndef BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_HASH_DETECTION_H_ +#define BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_HASH_DETECTION_H_ + +#include + +#include "brave/components/web_discovery/browser/regex_util.h" + +namespace web_discovery { + +// Uses a pre-trained Markov chain classifier to detect the likelihood +// of a hash in a given piece of text. Used in privacy guard functions +// for detecting potentially private URLs/queries. +bool IsHashLikely(RegexUtil& regex_util, + std::string value, + double probability_multiplier = 1.0); + +} // namespace web_discovery + +#endif // BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_HASH_DETECTION_H_ diff --git a/components/web_discovery/browser/hash_detection_unittest.cc b/components/web_discovery/browser/hash_detection_unittest.cc new file mode 100644 index 000000000000..94e6b944b28b --- /dev/null +++ b/components/web_discovery/browser/hash_detection_unittest.cc @@ -0,0 +1,29 @@ + +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#include "brave/components/web_discovery/browser/hash_detection.h" + +#include "brave/components/web_discovery/browser/regex_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace web_discovery { + +TEST(WebDiscoveryHashDetectionTest, Basic) { + RegexUtil regex_util; + + EXPECT_FALSE(IsHashLikely(regex_util, "test")); + EXPECT_FALSE(IsHashLikely(regex_util, "this is a test query")); + + EXPECT_FALSE(IsHashLikely(regex_util, + "pneumonoultramicroscopicsilicovolcanoconiosis")); + + EXPECT_TRUE(IsHashLikely(regex_util, + "N46iSNekpT:08ca45b7d7ea58ee:88dcbe4446168966a1")); + EXPECT_TRUE( + IsHashLikely(regex_util, "2btjjy78REtmYkkW0csHUbJZOstRXoWdX1mGrmmfeHI")); +} + +} // namespace web_discovery diff --git a/components/web_discovery/browser/payload_generator.cc b/components/web_discovery/browser/payload_generator.cc new file mode 100644 index 000000000000..2c631c15ad54 --- /dev/null +++ b/components/web_discovery/browser/payload_generator.cc @@ -0,0 +1,208 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#include "brave/components/web_discovery/browser/payload_generator.h" + +#include + +#include "base/containers/fixed_flat_set.h" +#include "brave/components/web_discovery/browser/privacy_guard.h" + +namespace web_discovery { + +namespace { + +constexpr char kCountryCodeFieldName[] = "ctry"; +constexpr char kSearchResultKey[] = "r"; +constexpr char kSearchResultURLKey[] = "u"; +constexpr size_t kMinSearchResultSize = 4; + +constexpr char kAliveAction[] = "alive"; +constexpr char kStatusFieldName[] = "status"; +constexpr char kTimestampFieldName[] = "t"; + +constexpr auto kQueryActions = base::MakeFixedFlatSet( + {"query", "anon-query", "widgetTitle"}); + +bool ValueHasContent(const base::Value& value) { + const auto* value_str = value.GetIfString(); + if (value_str) { + return !value_str->empty(); + } + if (!value.is_none()) { + return true; + } + return false; +} + +bool AggregatedDictHasContent(const base::Value::Dict& dict) { + for (const auto [_k1, value] : dict) { + const auto* value_dict = value.GetIfDict(); + if (!value_dict) { + continue; + } + for (const auto [_k2, sub_value] : *value_dict) { + if (ValueHasContent(sub_value)) { + return true; + } + } + } + return false; +} + +bool IsPrivateResult(RegexUtil& regex_util, + const PayloadRule& rule, + const PatternsURLDetails* matching_url_details, + const base::Value::Dict& dict) { + if (rule.key != kSearchResultKey) { + return false; + } + const auto* url = dict.FindString(kSearchResultURLKey); + if (!url) { + return false; + } + return IsPrivateURLLikely(regex_util, GURL(*url), matching_url_details); +} + +bool ShouldDropSearchResultPayload(const PayloadRule& rule, + size_t result_size) { + if (rule.key != kSearchResultKey) { + return false; + } + return result_size < kMinSearchResultSize; +} + +base::Value::Dict CreatePayloadDict(const PayloadRuleGroup& rule_group, + base::Value::Dict inner_payload) { + base::Value::Dict payload; + payload.Set(kActionKey, rule_group.action); + payload.Set(kInnerPayloadKey, std::move(inner_payload)); + return payload; +} + +std::optional GenerateClusteredJoinedPayload( + RegexUtil& regex_util, + bool is_query_action, + const PayloadRule& rule, + const PatternsURLDetails* matching_url_details, + const std::vector& attribute_values) { + base::Value::Dict joined_data; + size_t counter = 0; + for (const auto& value : attribute_values) { + if (value.empty()) { + continue; + } + if (is_query_action && + IsPrivateResult(regex_util, rule, matching_url_details, value)) { + VLOG(1) << "Omitting private search result"; + continue; + } + joined_data.Set(base::NumberToString(counter++), value.Clone()); + } + if (!AggregatedDictHasContent(joined_data)) { + VLOG(1) << "Skipped joined clustered payload due to lack of content"; + return std::nullopt; + } + if (is_query_action && + ShouldDropSearchResultPayload(rule, joined_data.size())) { + VLOG(1) << "Skipped search result payload due to too few results"; + return std::nullopt; + } + return base::Value(std::move(joined_data)); +} + +std::optional GenerateClusteredPayload( + RegexUtil& regex_util, + const PayloadRuleGroup& rule_group, + const PatternsURLDetails* matching_url_details, + const PageScrapeResult* scrape_result) { + base::Value::Dict inner_payload; + for (const auto& rule : rule_group.rules) { + base::Value payload_rule_data; + auto attribute_values_it = scrape_result->fields.find(rule.selector); + if (attribute_values_it == scrape_result->fields.end() || + attribute_values_it->second.empty()) { + VLOG(1) << "Skipped clustered payload due to no values for root " + "selector, action = " + << rule_group.action; + return std::nullopt; + } + if (rule.is_join) { + auto joined_payload = GenerateClusteredJoinedPayload( + regex_util, kQueryActions.contains(rule_group.action), rule, + matching_url_details, attribute_values_it->second); + if (!joined_payload) { + VLOG(1) << "Skipped joined clustered payload, action = " + << rule_group.action; + return std::nullopt; + } + payload_rule_data = std::move(*joined_payload); + } else { + const auto* value = attribute_values_it->second[0].FindString(rule.key); + if (!value || value->empty()) { + VLOG(1) << "Skipped non-joined clustered payload, action = " + << rule_group.action; + return std::nullopt; + } + payload_rule_data = base::Value(*value); + } + inner_payload.Set(rule.key, std::move(payload_rule_data)); + } + return CreatePayloadDict(rule_group, std::move(inner_payload)); +} + +void GenerateSinglePayloads(const ServerConfig& server_config, + const PayloadRuleGroup& rule_group, + const PageScrapeResult* scrape_result, + std::vector& payloads) { + auto attribute_values_it = scrape_result->fields.find(rule_group.key); + if (attribute_values_it == scrape_result->fields.end()) { + return; + } + for (const auto& attribute_value : attribute_values_it->second) { + auto dict = attribute_value.Clone(); + dict.Set(kCountryCodeFieldName, server_config.location); + payloads.push_back(CreatePayloadDict(rule_group, std::move(dict))); + } +} + +} // namespace + +std::vector GenerateQueryPayloads( + const ServerConfig& server_config, + RegexUtil& regex_util, + const PatternsURLDetails* url_details, + std::unique_ptr scrape_result) { + std::vector payloads; + for (const auto& rule_group : url_details->payload_rule_groups) { + if (rule_group.rule_type == PayloadRuleType::kQuery && + rule_group.result_type == PayloadResultType::kClustered) { + auto payload = GenerateClusteredPayload(regex_util, rule_group, + url_details, scrape_result.get()); + if (payload) { + payloads.push_back(std::move(*payload)); + } + } else if (rule_group.rule_type == PayloadRuleType::kSingle && + rule_group.result_type == PayloadResultType::kSingle) { + GenerateSinglePayloads(server_config, rule_group, scrape_result.get(), + payloads); + } + } + return payloads; +} + +base::Value::Dict GenerateAlivePayload(const ServerConfig& server_config, + std::string date_hour) { + base::Value::Dict payload; + payload.Set(kActionKey, kAliveAction); + base::Value::Dict inner_payload; + inner_payload.Set(kStatusFieldName, true); + inner_payload.Set(kTimestampFieldName, date_hour); + inner_payload.Set(kCountryCodeFieldName, server_config.location); + payload.Set(kInnerPayloadKey, std::move(inner_payload)); + return payload; +} + +} // namespace web_discovery diff --git a/components/web_discovery/browser/payload_generator.h b/components/web_discovery/browser/payload_generator.h new file mode 100644 index 000000000000..b234c5872abc --- /dev/null +++ b/components/web_discovery/browser/payload_generator.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#ifndef BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_PAYLOAD_GENERATOR_H_ +#define BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_PAYLOAD_GENERATOR_H_ + +#include +#include +#include + +#include "base/values.h" +#include "brave/components/web_discovery/browser/content_scraper.h" +#include "brave/components/web_discovery/browser/patterns.h" +#include "brave/components/web_discovery/browser/regex_util.h" + +namespace web_discovery { + +inline constexpr char kActionKey[] = "action"; +inline constexpr char kInnerPayloadKey[] = "payload"; + +// Generates "query" messages using the payload generation rules +// and scraped data for a given site. +std::vector GenerateQueryPayloads( + const ServerConfig& server_config, + RegexUtil& regex_util, + const PatternsURLDetails* url_details, + std::unique_ptr scrape_result); + +// Generates an "alive" message to indicate an opted-in +// status to the server. +base::Value::Dict GenerateAlivePayload(const ServerConfig& server_config, + std::string date_hour); + +} // namespace web_discovery + +#endif // BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_PAYLOAD_GENERATOR_H_ diff --git a/components/web_discovery/browser/payload_generator_unittest.cc b/components/web_discovery/browser/payload_generator_unittest.cc new file mode 100644 index 000000000000..853f6c816c9c --- /dev/null +++ b/components/web_discovery/browser/payload_generator_unittest.cc @@ -0,0 +1,283 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#include "brave/components/web_discovery/browser/payload_generator.h" + +#include +#include + +#include "brave/components/web_discovery/browser/content_scraper.h" +#include "brave/components/web_discovery/browser/patterns.h" +#include "brave/components/web_discovery/browser/regex_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace web_discovery { + +class WebDiscoveryPayloadGeneratorTest : public testing::Test { + public: + ~WebDiscoveryPayloadGeneratorTest() override = default; + + // testing::Test: + void SetUp() override { + server_config_ = std::make_unique(); + server_config_->location = "us"; + + url_details_ = std::make_unique(); + + url_details_->payload_rule_groups = std::vector(2); + + auto& single_group = url_details_->payload_rule_groups[0]; + single_group.key = ".single-element"; + single_group.rule_type = PayloadRuleType::kSingle; + single_group.result_type = PayloadResultType::kSingle; + single_group.action = "single_action"; + + auto& query_group = url_details_->payload_rule_groups[1]; + query_group.key = "query_group"; + query_group.rule_type = PayloadRuleType::kQuery; + query_group.result_type = PayloadResultType::kClustered; + query_group.action = "query"; + query_group.rules = std::vector(2); + + auto& join_rule = query_group.rules[0]; + join_rule.selector = "#results"; + join_rule.key = "r"; + join_rule.is_join = true; + + auto& qurl_rule = query_group.rules[1]; + qurl_rule.selector = "qurl"; + qurl_rule.key = "qurl"; + qurl_rule.is_join = false; + } + + protected: + std::vector GenerateQueryPayloadsHelper( + std::unique_ptr scrape_result) { + return GenerateQueryPayloads(*server_config_.get(), regex_util_, + url_details_.get(), std::move(scrape_result)); + } + + std::unique_ptr server_config_; + + private: + RegexUtil regex_util_; + std::unique_ptr url_details_; +}; + +TEST_F(WebDiscoveryPayloadGeneratorTest, GenerateQueryPayloads) { + GURL test_url("https://example.com/test"); + auto scrape_result = std::make_unique(test_url, "test_id"); + + std::vector single_dicts; + base::Value::Dict single_dict1; + single_dict1.Set("ab", "value1"); + single_dict1.Set("cd", "value2"); + single_dicts.push_back(std::move(single_dict1)); + base::Value::Dict single_dict2; + single_dict2.Set("ef", "value3"); + single_dict2.Set("gh", "value4"); + single_dicts.push_back(std::move(single_dict2)); + scrape_result->fields[".single-element"] = std::move(single_dicts); + + std::vector result_dicts1; + base::Value::Dict result_dict1, result_dict2, result_dict3, result_dict4, + result_dict5; + result_dict1.Set("njk", "joinvalue1"); + result_dict2.Set("abc", "joinvalue2"); + result_dict3.Set("njk", "joinvalue3"); + result_dict4.Set("abc", "joinvalue4"); + result_dicts1.push_back(std::move(result_dict1)); + result_dicts1.push_back(std::move(result_dict2)); + result_dicts1.push_back(std::move(result_dict3)); + result_dicts1.push_back(std::move(result_dict4)); + std::vector result_dicts2; + result_dict5.Set("qurl", "https://example.com/test1"); + result_dicts2.push_back(std::move(result_dict5)); + scrape_result->fields["#results"] = std::move(result_dicts1); + scrape_result->fields["qurl"] = std::move(result_dicts2); + + auto payloads = GenerateQueryPayloadsHelper(std::move(scrape_result)); + ASSERT_EQ(payloads.size(), 3u); + + const auto* payload = &payloads[0]; + const auto* action = payload->FindString(kActionKey); + const auto* inner_payload = payload->FindDict(kInnerPayloadKey); + ASSERT_TRUE(action && inner_payload); + EXPECT_EQ(*action, "single_action"); + + EXPECT_EQ(inner_payload->size(), 3u); + + const auto* ctry = inner_payload->FindString("ctry"); + const auto* val1 = inner_payload->FindString("ab"); + const auto* val2 = inner_payload->FindString("cd"); + ASSERT_TRUE(ctry && val1 && val2); + EXPECT_EQ(*ctry, "us"); + EXPECT_EQ(*val1, "value1"); + EXPECT_EQ(*val2, "value2"); + + payload = &payloads[1]; + action = payload->FindString(kActionKey); + inner_payload = payload->FindDict(kInnerPayloadKey); + ASSERT_TRUE(action && inner_payload); + EXPECT_EQ(*action, "single_action"); + + EXPECT_EQ(inner_payload->size(), 3u); + + ctry = inner_payload->FindString("ctry"); + val1 = inner_payload->FindString("ef"); + val2 = inner_payload->FindString("gh"); + ASSERT_TRUE(ctry && val1 && val2); + EXPECT_EQ(*ctry, "us"); + EXPECT_EQ(*val1, "value3"); + EXPECT_EQ(*val2, "value4"); + + payload = &payloads[2]; + action = payload->FindString(kActionKey); + inner_payload = payload->FindDict(kInnerPayloadKey); + ASSERT_TRUE(action && inner_payload); + EXPECT_EQ(*action, "query"); + + EXPECT_EQ(inner_payload->size(), 2u); + + const auto* qurl = inner_payload->FindString("qurl"); + const auto* r_dict = inner_payload->FindDict("r"); + ASSERT_TRUE(qurl && r_dict); + EXPECT_EQ(*qurl, "https://example.com/test1"); + EXPECT_EQ(r_dict->size(), 4u); + const auto* r0_dict = r_dict->FindDict("0"); + const auto* r1_dict = r_dict->FindDict("1"); + const auto* r2_dict = r_dict->FindDict("2"); + const auto* r3_dict = r_dict->FindDict("3"); + ASSERT_TRUE(r0_dict && r1_dict && r2_dict && r3_dict); + EXPECT_EQ(r0_dict->size(), 1u); + EXPECT_EQ(r1_dict->size(), 1u); + EXPECT_EQ(r2_dict->size(), 1u); + EXPECT_EQ(r3_dict->size(), 1u); + const auto* r0_val = r0_dict->FindString("njk"); + const auto* r1_val = r1_dict->FindString("abc"); + const auto* r2_val = r2_dict->FindString("njk"); + const auto* r3_val = r3_dict->FindString("abc"); + ASSERT_TRUE(r0_val && r1_val && r2_val && r2_val); + EXPECT_EQ(*r0_val, "joinvalue1"); + EXPECT_EQ(*r1_val, "joinvalue2"); + EXPECT_EQ(*r2_val, "joinvalue3"); + EXPECT_EQ(*r3_val, "joinvalue4"); +} + +TEST_F(WebDiscoveryPayloadGeneratorTest, GenerateAlivePayload) { + std::string date_hour = "2023051509"; + + auto alive_payload = GenerateAlivePayload(*server_config_.get(), date_hour); + + const auto* action = alive_payload.FindString("action"); + const auto* inner_payload = alive_payload.FindDict("payload"); + + ASSERT_TRUE(action && inner_payload); + + EXPECT_EQ(*action, "alive"); + const auto* ctry = inner_payload->FindString("ctry"); + const auto* ts = inner_payload->FindString("t"); + const auto status = inner_payload->FindBool("status"); + + ASSERT_TRUE(ctry && ts && status); + EXPECT_EQ(*ctry, "us"); + EXPECT_EQ(*ts, date_hour); + EXPECT_EQ(*status, true); +} + +TEST_F(WebDiscoveryPayloadGeneratorTest, ExcludePrivateResult) { + GURL test_url("https://example.com/search"); + auto scrape_result = std::make_unique(test_url, "test_id"); + + std::vector result_dicts1; + for (int i = 0; i < 5; i++) { + base::Value::Dict result_dict; + std::string url = "https://example.com/result"; + if (i == 1) { + url = "https://423947892374892879.com/example"; + } else { + url += base::NumberToString(i == 0 ? 0 : i - 1); + } + result_dict.Set("u", url); + result_dicts1.push_back(std::move(result_dict)); + } + scrape_result->fields["#results"] = std::move(result_dicts1); + std::vector result_dicts2; + base::Value::Dict qurl_dict; + qurl_dict.Set("qurl", "https://example.com/test1"); + result_dicts2.push_back(std::move(qurl_dict)); + scrape_result->fields["qurl"] = std::move(result_dicts2); + + auto payloads = GenerateQueryPayloadsHelper(std::move(scrape_result)); + ASSERT_EQ(payloads.size(), 1u); + + const auto* payload = &payloads[0]; + const auto* inner_payload = payload->FindDict("payload"); + const auto* r_dict = inner_payload->FindDict("r"); + ASSERT_TRUE(inner_payload && r_dict); + + ASSERT_EQ(r_dict->size(), 4u); + + for (int i = 0; i < 4; i++) { + const auto* ri_dict = r_dict->FindDict(base::NumberToString(i)); + ASSERT_TRUE(ri_dict); + + const auto* url = ri_dict->FindString("u"); + ASSERT_TRUE(url); + + EXPECT_EQ(*url, "https://example.com/result" + base::NumberToString(i)); + } +} + +TEST_F(WebDiscoveryPayloadGeneratorTest, ShouldDropSearchResult) { + GURL test_url("https://example.com/search"); + auto scrape_result = std::make_unique(test_url, "test_id"); + + std::vector result_dicts; + for (int i = 0; i < 3; i++) { + base::Value::Dict result_dict; + result_dict.Set("u", + "https://example.com/result" + base::NumberToString(i)); + result_dicts.push_back(std::move(result_dict)); + } + scrape_result->fields["#results"] = std::move(result_dicts); + + std::vector qurl_dicts; + base::Value::Dict qurl_dict; + qurl_dict.Set("qurl", "https://example.com/test1"); + qurl_dicts.push_back(std::move(qurl_dict)); + scrape_result->fields["qurl"] = std::move(qurl_dicts); + + auto payloads = GenerateQueryPayloadsHelper(std::move(scrape_result)); + ASSERT_EQ(payloads.size(), 0u); +} + +TEST_F(WebDiscoveryPayloadGeneratorTest, ContentMissing) { + GURL test_url("https://example.com/search"); + auto scrape_result = std::make_unique(test_url, "test_id"); + + std::vector result_dicts; + for (int i = 0; i < 9; i++) { + base::Value::Dict result_dict; + if (i < 5) { + result_dict.Set("x", ""); + } else { + result_dict.Set("x", base::Value()); + } + result_dicts.push_back(std::move(result_dict)); + } + scrape_result->fields["#results"] = std::move(result_dicts); + + std::vector qurl_dicts; + base::Value::Dict qurl_dict; + qurl_dict.Set("qurl", "https://example.com/test1"); + qurl_dicts.push_back(std::move(qurl_dict)); + scrape_result->fields["qurl"] = std::move(qurl_dicts); + + auto payloads = GenerateQueryPayloadsHelper(std::move(scrape_result)); + ASSERT_EQ(payloads.size(), 0u); +} + +} // namespace web_discovery diff --git a/components/web_discovery/browser/privacy_guard.cc b/components/web_discovery/browser/privacy_guard.cc new file mode 100644 index 000000000000..889af91a7fe4 --- /dev/null +++ b/components/web_discovery/browser/privacy_guard.cc @@ -0,0 +1,259 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#include "brave/components/web_discovery/browser/privacy_guard.h" + +#include "base/logging.h" +#include "base/strings/strcat.h" +#include "base/strings/string_split.h" +#include "base/strings/string_util.h" +#include "brave/components/web_discovery/browser/hash_detection.h" +#include "brave/components/web_discovery/browser/regex_util.h" +#include "brave/components/web_discovery/browser/util.h" +#include "url/url_constants.h" +#include "url/url_util.h" + +namespace web_discovery { + +namespace { + +constexpr size_t kMaxSearchEngineRefLength = 8; +constexpr size_t kMaxQueryLength = 50; +constexpr size_t kMaxQueryNumberLength = 7; +constexpr size_t kMaxQuerySplitLength = 7; +constexpr size_t kMaxQueryWordLength = 20; +constexpr size_t kHashCheckMinimumLength = 13; +constexpr double kHashCheckThresholdMultiplier = 1.5; + +constexpr size_t kMaxQueryStringLength = 30; +constexpr size_t kMaxQueryStringParts = 4; +constexpr size_t kMaxQueryStringOrPathNumberLength = 12; +constexpr size_t kMaxPathPartLength = 18; +constexpr size_t kMinPathPartHashCheckLength = 13; +constexpr size_t kMinSegmentHashCheckLength = 16; + +constexpr size_t kMaxDotSplitDomainSize = 6; +constexpr size_t kMaxHyphenSplitDomainSize = 4; +constexpr size_t kMaxDomainNumberLength = 5; + +constexpr char kDefaultSearchPrefix[] = "search?q="; + +constexpr char kOnionSiteSuffix[] = ".onion"; +constexpr char kLocalDomainSuffix[] = ".local"; +constexpr char kLocalhost[] = "localhost"; + +constexpr char kGoogleHostSubstring[] = "google"; +constexpr char kGoogleURLQueryParam[] = "url"; +constexpr char kMaskedURLSuffix[] = "/ (PROTECTED)"; + +bool ContainsForbiddenKeywords(RegexUtil& regex_util, const GURL& url) { + auto path_and_query = + base::StrCat({url.path_piece(), "?", url.query_piece()}); + if (regex_util.CheckPathAndQueryStringKeywords(path_and_query)) { + return true; + } + if (!url.ref_piece().empty() && + regex_util.CheckQueryStringOrRefKeywords("#" + url.ref())) { + return true; + } + if (!url.query_piece().empty() && + regex_util.CheckQueryStringOrRefKeywords("?" + url.query())) { + return true; + } + return false; +} + +bool IsPrivateDomainLikely(RegexUtil& regex_util, const std::string_view host) { + auto dot_split = + base::SplitString(host, ".", base::WhitespaceHandling::KEEP_WHITESPACE, + base::SPLIT_WANT_ALL); + if (dot_split.size() > kMaxDotSplitDomainSize) { + return true; + } + if (regex_util.CheckForLongNumber(host, kMaxDomainNumberLength)) { + return true; + } + auto hyphen_split = + base::SplitString(host, "-", base::WhitespaceHandling::KEEP_WHITESPACE, + base::SPLIT_WANT_ALL); + if (hyphen_split.size() > kMaxHyphenSplitDomainSize) { + return true; + } + return false; +} + +} // namespace + +bool IsPrivateURLLikely(RegexUtil& regex_util, + const GURL& url, + const PatternsURLDetails* matching_url_details) { + if (!url.SchemeIs("https")) { + VLOG(1) << "Ignoring URL due to non-HTTPS scheme"; + return true; + } + if (url.HostIsIPAddress()) { + VLOG(1) << "Ignoring URL due to IP address host"; + return true; + } + if (url.has_username() || url.has_password()) { + VLOG(1) << "Ignoring URL due to inclusion of credentials"; + return true; + } + if (url.has_port() && url.port_piece() != "443") { + VLOG(1) << "Ignoring URL due to non-standard port"; + return true; + } + if (matching_url_details && matching_url_details->is_search_engine) { + if (url.has_ref() && url.ref_piece().length() > kMaxSearchEngineRefLength) { + VLOG(1) << "Ignoring search engine URL due to long ref"; + return true; + } + } + auto host_piece = url.host_piece(); + if (host_piece.ends_with(kOnionSiteSuffix) || + host_piece.ends_with(kLocalDomainSuffix) || host_piece == kLocalhost) { + VLOG(1) << "Ignoring URL due a local host or onion site"; + return true; + } + if (IsPrivateDomainLikely(regex_util, url.host_piece())) { + VLOG(1) << "Ignoring URL due likely private domain"; + return true; + } + return false; +} + +bool IsPrivateQueryLikely(RegexUtil& regex_util, const std::string& query) { + if (query.length() > kMaxQueryLength) { + VLOG(1) << "Ignoring query due to long length"; + return true; + } + auto split = + base::SplitString(query, " ", base::WhitespaceHandling::KEEP_WHITESPACE, + base::SPLIT_WANT_NONEMPTY); + if (split.size() > kMaxQuerySplitLength) { + VLOG(1) << "Ignoring query due to long split length"; + return true; + } + if (regex_util.CheckForLongNumber(query, kMaxQueryNumberLength)) { + VLOG(1) << "Ignoring query due to long number"; + return true; + } + if (regex_util.CheckForEmail(query)) { + VLOG(1) << "Ignoring query due to inclusion of email"; + return true; + } + if (regex_util.CheckQueryHTTPCredentials(query)) { + VLOG(1) << "Ignoring query due to potential inclusion of HTTP credentials"; + return true; + } + for (const auto& word : split) { + if (word.length() > kMaxQueryWordLength) { + VLOG(1) << "Ignoring query due to long word"; + return true; + } + } + if (query.length() >= kHashCheckMinimumLength) { + if (IsHashLikely(regex_util, query, kHashCheckThresholdMultiplier)) { + VLOG(1) << "Ignoring query due to potential inclusion of hash"; + return true; + } + } + return false; +} + +GURL GeneratePrivateSearchURL(const GURL& original_url, + const std::string& query, + const PatternsURLDetails& matching_url_details) { + url::RawCanonOutputT query_encoded; + url::EncodeURIComponent(query, &query_encoded); + std::string query_encoded_str(query_encoded.view()); + base::ReplaceSubstringsAfterOffset(&query_encoded_str, 0, "%20", "+"); + + return GURL( + base::StrCat({original_url.scheme(), url::kStandardSchemeSeparator, + original_url.host(), "/", + matching_url_details.search_template_prefix.value_or( + kDefaultSearchPrefix), + query_encoded_str})); +} + +bool ShouldDropLongURL(RegexUtil& regex_util, const GURL& url) { + if (regex_util.CheckForEmail(url.spec())) { + return true; + } + if (!url.query_piece().empty()) { + if (url.query_piece().size() > kMaxQueryStringLength) { + return true; + } + auto query_parts = base::SplitString( + url.query_piece(), "&;", base::WhitespaceHandling::KEEP_WHITESPACE, + base::SplitResult::SPLIT_WANT_ALL); + if (query_parts.size() > kMaxQueryStringParts) { + return true; + } + if (regex_util.CheckForLongNumber(url.query_piece(), + kMaxQueryStringOrPathNumberLength)) { + return true; + } + } + if (!url.path_piece().empty()) { + if (regex_util.CheckForLongNumber(url.path_piece(), + kMaxQueryStringOrPathNumberLength)) { + return true; + } + } + auto path_parts = base::SplitString(url.path_piece(), "/._ -:+;", + base::WhitespaceHandling::KEEP_WHITESPACE, + base::SPLIT_WANT_ALL); + for (const auto& path_part : path_parts) { + if (path_part.length() > kMaxPathPartLength) { + return true; + } + if (path_part.length() >= kMinPathPartHashCheckLength && + IsHashLikely(regex_util, path_part)) { + return true; + } + } + auto path_segments = base::SplitString( + url.path_piece(), "/", base::WhitespaceHandling::KEEP_WHITESPACE, + base::SPLIT_WANT_ALL); + for (const auto& path_segment : path_segments) { + std::string alphanumeric_path_segment = path_segment; + regex_util.TransformToAlphanumeric(alphanumeric_path_segment); + if (alphanumeric_path_segment.length() >= kMinSegmentHashCheckLength && + IsHashLikely(regex_util, alphanumeric_path_segment)) { + return true; + } + } + return ContainsForbiddenKeywords(regex_util, url); +} + +std::optional MaskURL(RegexUtil& regex_util, const GURL& url) { + if (!url.SchemeIsHTTPOrHTTPS() || !url.is_valid()) { + return std::nullopt; + } + + if (!ShouldDropLongURL(regex_util, url)) { + return url.spec(); + } + + if (url.host_piece().find(kGoogleHostSubstring) != std::string::npos && + url.has_query()) { + auto google_url_param = + ExtractValueFromQueryString(url.query_piece(), kGoogleURLQueryParam); + if (google_url_param) { + GURL decoded_embedded_url(*google_url_param); + if (!decoded_embedded_url.is_valid()) { + return std::nullopt; + } + return MaskURL(regex_util, decoded_embedded_url); + } + } + + return base::StrCat({url.scheme(), url::kStandardSchemeSeparator, url.host(), + kMaskedURLSuffix}); +} + +} // namespace web_discovery diff --git a/components/web_discovery/browser/privacy_guard.h b/components/web_discovery/browser/privacy_guard.h new file mode 100644 index 000000000000..36c2fcc971b4 --- /dev/null +++ b/components/web_discovery/browser/privacy_guard.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#ifndef BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_PRIVACY_GUARD_H_ +#define BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_PRIVACY_GUARD_H_ + +#include + +#include "brave/components/web_discovery/browser/patterns.h" +#include "brave/components/web_discovery/browser/regex_util.h" +#include "url/gurl.h" + +namespace web_discovery { + +// Checks if a URL is likely to be private based on various criteria. +// If true, the page should not be investigated or reported. +bool IsPrivateURLLikely(RegexUtil& regex_util, + const GURL& url, + const PatternsURLDetails* matching_url_details); + +// Determines if a search query is likely to contain private information. +// If true, the search query should not be investigated or reported. +bool IsPrivateQueryLikely(RegexUtil& regex_util, const std::string& query); + +// Generates a simple search URL (without additional query parameters) +// based on the original search URL and query. Used for the double fetch +// to ensure that the user's profile is not involved in the query. +GURL GeneratePrivateSearchURL(const GURL& original_url, + const std::string& query, + const PatternsURLDetails& matching_url_details); + +// Checks if a URL should be dropped due to its length or content. +// Currently only used for determining whether to mask a URL +// in the function below. +bool ShouldDropLongURL(RegexUtil& regex_util, const GURL& url); + +// Masks a URL to protect privacy. Returns nullopt if URL is invalid. +std::optional MaskURL(RegexUtil& regex_util, const GURL& url); + +} // namespace web_discovery + +#endif // BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_PRIVACY_GUARD_H_ diff --git a/components/web_discovery/browser/privacy_guard_unittest.cc b/components/web_discovery/browser/privacy_guard_unittest.cc new file mode 100644 index 000000000000..e67871a298d5 --- /dev/null +++ b/components/web_discovery/browser/privacy_guard_unittest.cc @@ -0,0 +1,169 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#include "brave/components/web_discovery/browser/privacy_guard.h" + +#include "brave/components/web_discovery/browser/patterns.h" +#include "brave/components/web_discovery/browser/regex_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace web_discovery { + +class WebDiscoveryPrivacyGuardTest : public testing::Test { + public: + ~WebDiscoveryPrivacyGuardTest() override = default; + + // testing::Test: + void SetUp() override { + search_engine_pattern_.is_search_engine = true; + search_engine_pattern_.search_template_prefix = "find?testquery="; + } + + protected: + PatternsURLDetails search_engine_pattern_; + RegexUtil regex_util_; +}; + +TEST_F(WebDiscoveryPrivacyGuardTest, IsPrivateURLLikely) { + EXPECT_FALSE(IsPrivateURLLikely(regex_util_, + GURL("https://www.search1.com/search?q=test"), + &search_engine_pattern_)); + EXPECT_FALSE(IsPrivateURLLikely( + regex_util_, + GURL("https://search2.com/search?query=testing+a+nice+query"), + &search_engine_pattern_)); + EXPECT_FALSE(IsPrivateURLLikely( + regex_util_, + GURL( + "https://search2.com/search?query=quick+brown+fox+jumped&country=us"), + &search_engine_pattern_)); + EXPECT_FALSE(IsPrivateURLLikely( + regex_util_, GURL("https://www.website.com/page/test"), nullptr)); + + EXPECT_TRUE(IsPrivateURLLikely( + regex_util_, GURL("http://www.website.com/page/test"), nullptr)); + EXPECT_TRUE(IsPrivateURLLikely( + regex_util_, GURL("https://88.88.88.88/page/test"), nullptr)); + EXPECT_TRUE(IsPrivateURLLikely( + regex_util_, GURL("https://website.com:8443/page/test"), nullptr)); + EXPECT_TRUE(IsPrivateURLLikely( + regex_util_, GURL("https://user:pass@website.com/page/test"), nullptr)); + EXPECT_TRUE(IsPrivateURLLikely( + regex_util_, GURL("https://www.search1.com/search?q=test#ABCDEFGHIJK"), + &search_engine_pattern_)); + + EXPECT_TRUE(IsPrivateURLLikely( + regex_util_, GURL("https://a.nested.sub.domain.website.co.uk/test/page"), + &search_engine_pattern_)); + EXPECT_TRUE(IsPrivateURLLikely( + regex_util_, GURL("https://abc192738284732929abc.com/test/page"), + &search_engine_pattern_)); + EXPECT_TRUE(IsPrivateURLLikely( + regex_util_, GURL("https://a-long-hyphenated-web-site.com/test/page"), + &search_engine_pattern_)); +} + +TEST_F(WebDiscoveryPrivacyGuardTest, IsPrivateQueryLikely) { + EXPECT_FALSE(IsPrivateQueryLikely(regex_util_, "test")); + EXPECT_FALSE(IsPrivateQueryLikely(regex_util_, "99 cake recipes")); + EXPECT_FALSE(IsPrivateQueryLikely(regex_util_, "grapefruit and pineapple")); + EXPECT_FALSE(IsPrivateQueryLikely(regex_util_, "a quick brown fox")); + + EXPECT_TRUE(IsPrivateQueryLikely( + regex_util_, + "ABC123ABC123ABC123ABC123ABC123ABC123ABC123ABC123ABC123ABC123")); + EXPECT_TRUE(IsPrivateQueryLikely( + regex_util_, + "a long query that is potentially private and should not be considered")); + EXPECT_TRUE( + IsPrivateQueryLikely(regex_util_, "aliases for me@testemail.com")); + EXPECT_TRUE( + IsPrivateQueryLikely(regex_util_, "access site with user:pass@site.com")); + EXPECT_TRUE(IsPrivateQueryLikely(regex_util_, "php $P$MArzfx58u")); + EXPECT_TRUE(IsPrivateQueryLikely( + regex_util_, "Hippopotomonstrosesquippedaliophobia symptoms")); +} + +TEST_F(WebDiscoveryPrivacyGuardTest, GeneratePrivateSearchURL) { + GURL original_url("https://example.com/search?q=aaa&country=us&f=1"); + + EXPECT_EQ(GeneratePrivateSearchURL(original_url, "a simple test query", + search_engine_pattern_) + .spec(), + "https://example.com/find?testquery=a+simple+test+query"); + EXPECT_EQ( + GeneratePrivateSearchURL(original_url, "another simple test query 123", + PatternsURLDetails()) + .spec(), + "https://example.com/search?q=another+simple+test+query+123"); + EXPECT_EQ( + GeneratePrivateSearchURL(original_url, + "special chars @#$%^&=", search_engine_pattern_) + .spec(), + "https://example.com/find?testquery=special+chars+%40%23%24%25%5E%26%3D"); +} + +TEST_F(WebDiscoveryPrivacyGuardTest, ShouldDropLongURL) { + EXPECT_FALSE(ShouldDropLongURL( + regex_util_, GURL("https://www.search1.com/search?q=test"))); + EXPECT_FALSE(ShouldDropLongURL( + regex_util_, + GURL("https://search2.com/search?query=testing+a+nice+query"))); + EXPECT_FALSE(ShouldDropLongURL( + regex_util_, + GURL("https://search2.com/search?query=quick+fox&country=us&d=1"))); + EXPECT_FALSE(ShouldDropLongURL(regex_util_, + GURL("https://www.website.com/page/test"))); + + EXPECT_TRUE(ShouldDropLongURL( + regex_util_, + GURL("https://www.website.com/page/test?id=12823871923991"))); + EXPECT_TRUE(ShouldDropLongURL( + regex_util_, GURL("https://www.website.com/page/test1283192831292"))); + EXPECT_TRUE(ShouldDropLongURL( + regex_util_, GURL("https://www.website.com/page/1283192831292?q=1"))); + EXPECT_TRUE(ShouldDropLongURL( + regex_util_, + GURL("https://www.website.com/page/test?a=1&b=2&c=3&d=4&e=5"))); + EXPECT_TRUE(ShouldDropLongURL( + regex_util_, + GURL("https://www.website.com/page/" + "test?query=a+super+long+query+string+that+is+too+long"))); + EXPECT_TRUE(ShouldDropLongURL( + regex_util_, GURL("https://www.website.com/page/ayLxezLhK1Lh1H1"))); + EXPECT_TRUE(ShouldDropLongURL(regex_util_, + GURL("https://www.website.com/page/WebLogic"))); + EXPECT_TRUE(ShouldDropLongURL(regex_util_, + GURL("https://www.website.com/page/admin"))); + EXPECT_TRUE(ShouldDropLongURL(regex_util_, + GURL("https://www.website.com/page/edit/"))); + EXPECT_TRUE(ShouldDropLongURL( + regex_util_, GURL("https://www.website.com/page/doc?share=1"))); + EXPECT_TRUE(ShouldDropLongURL( + regex_util_, GURL("https://www.website.com/page/doc?user=abc"))); + EXPECT_TRUE(ShouldDropLongURL( + regex_util_, GURL("https://www.website.com/page/doc#logout"))); + EXPECT_TRUE(ShouldDropLongURL( + regex_util_, GURL("https://www.website.com/page/doc?password=abc"))); + EXPECT_TRUE(ShouldDropLongURL( + regex_util_, GURL("https://user:pass@www.website.com/page/test"))); + EXPECT_TRUE(ShouldDropLongURL( + regex_util_, GURL("https://www.website.com/page/test?e=test@test.com"))); +} + +TEST_F(WebDiscoveryPrivacyGuardTest, MaskURL) { + GURL url("https://www.website.com/page/test"); + auto masked_url = MaskURL(regex_util_, url); + ASSERT_TRUE(masked_url); + EXPECT_EQ(*masked_url, url); + + masked_url = MaskURL(regex_util_, GURL("https://www.website.com/page/admin")); + ASSERT_TRUE(masked_url); + EXPECT_EQ(*masked_url, "https://www.website.com/ (PROTECTED)"); + + EXPECT_FALSE(MaskURL(regex_util_, GURL("file:///etc"))); +} + +} // namespace web_discovery diff --git a/components/web_discovery/browser/regex_util.cc b/components/web_discovery/browser/regex_util.cc new file mode 100644 index 000000000000..88f19f14aa69 --- /dev/null +++ b/components/web_discovery/browser/regex_util.cc @@ -0,0 +1,121 @@ +// Copyright (c) 2024 The Brave Authors. All rights reserved. +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "brave/components/web_discovery/browser/regex_util.h" + +#include "base/strings/strcat.h" +#include "base/strings/string_number_conversions.h" + +namespace { + +constexpr char kLongNumberRegexPrefix[] = "[0-9]{"; +constexpr char kLongNumberRegexSuffix[] = ",}"; +constexpr char kEmailRegex[] = + "[a-z0-9\\-_@]+(@|%40|%(25)+40)[a-z0-9\\-_]+\\.[a-z0-9\\-_]"; +constexpr char kHttpPasswordRegex[] = "[^:]+:[^@]+@"; +constexpr char kNotAlphanumericRegex[] = "[^a-zA-Z0-9]"; +constexpr char kPunctuationRegex[] = "[!\"'()*,-./:;?[\\]^_`{|}~%$=&+#]"; + +constexpr std::array kPathAndQueryStringCheckRegexes = { + "(?i)\\/admin([\\/\\?#=]|$)", + "(?i)\\/wp-admin([\\/\\?#=]|$)", + "(?i)\\/edit([\\/\\?#=]|$)", + "(?i)[&\\?#\\/]share([\\/\\?#=]|$)", + "(?i)[&\\?#\\/;]sharing([\\/\\?#=]|$)", + "(?i)[&\\?#\\/;]logout([\\/\\?#=]|$)", + "(?i)WebLogic", + "(?i)[&\\?#\\/;]token([\\/\\?#=_;]|$)", + "(?i)[&\\?#\\/;]trk([\\/\\?#=_]|$)", + "[&\\?#\\/=;](http|https)(:\\/|\\%3A\\%2F)"}; + +constexpr std::array kQueryStringAndRefCheckRegexes = { + "(?i)[&\\?#_\\-;]user", "(?i)[&\\?#_\\-;]token", + "(?i)[&\\?#_\\-;]auth", "(?i)[&\\?#_\\-;]uid", + "(?i)[&\\?#_\\-;]email", "(?i)[&\\?#_\\-;]usr", + "(?i)[&\\?#_\\-;]pin", "(?i)[&\\?#_\\-;]pwd", + "(?i)[&\\?#_\\-;]password", "(?i)[&\\?#;]u[=#]", + "(?i)[&\\?#;]url[=#]", "(?i)[&\\?#_\\-;]http", + "(?i)[&\\?#_\\-;]ref[=#]", "(?i)[&\\?#_\\-;]red[=#]", + "(?i)[&\\?#_\\-;]trk", "(?i)[&\\?#_\\-;]track", + "(?i)[&\\?#_\\-;]shar", "(?i)[&\\?#_\\-;]login", + "(?i)[&\\?#_\\-;]logout", "(?i)[&\\?#_\\-;]session", +}; + +} // anonymous namespace + +namespace web_discovery { + +RegexUtil::RegexUtil() = default; +RegexUtil::~RegexUtil() = default; + +bool RegexUtil::CheckForEmail(const std::string_view str) { + if (!email_regex_) { + email_regex_.emplace(kEmailRegex); + } + return re2::RE2::PartialMatch(str, *email_regex_); +} + +bool RegexUtil::CheckForLongNumber(const std::string_view str, + size_t max_length) { + if (!long_number_regexes_.contains(max_length)) { + auto regex_str = base::StrCat({kLongNumberRegexPrefix, + base::NumberToString(max_length + 1), + kLongNumberRegexSuffix}); + long_number_regexes_[max_length] = std::make_unique(regex_str); + } + return re2::RE2::PartialMatch(str, *long_number_regexes_[max_length]); +} + +void RegexUtil::RemovePunctuation(std::string& str) { + if (!punctuation_regex_) { + punctuation_regex_.emplace(kPunctuationRegex); + } + re2::RE2::GlobalReplace(&str, *punctuation_regex_, ""); +} + +void RegexUtil::TransformToAlphanumeric(std::string& str) { + if (!non_alphanumeric_regex_) { + non_alphanumeric_regex_.emplace(kNotAlphanumericRegex); + } + re2::RE2::GlobalReplace(&str, *non_alphanumeric_regex_, ""); +} + +bool RegexUtil::CheckPathAndQueryStringKeywords( + const std::string_view path_and_query) { + if (path_and_query_string_keyword_regexes_.empty()) { + for (const auto& regex_str : kPathAndQueryStringCheckRegexes) { + path_and_query_string_keyword_regexes_.emplace_back(regex_str); + } + } + for (const auto& regex : path_and_query_string_keyword_regexes_) { + if (re2::RE2::PartialMatch(path_and_query, regex)) { + return true; + } + } + return false; +} + +bool RegexUtil::CheckQueryStringOrRefKeywords(const std::string_view str) { + if (query_string_and_ref_keyword_regexes_.empty()) { + for (const auto& regex_str : kQueryStringAndRefCheckRegexes) { + query_string_and_ref_keyword_regexes_.emplace_back(regex_str); + } + } + for (const auto& regex : query_string_and_ref_keyword_regexes_) { + if (re2::RE2::PartialMatch(str, regex)) { + return true; + } + } + return false; +} + +bool RegexUtil::CheckQueryHTTPCredentials(const std::string_view str) { + if (!http_password_regex_) { + http_password_regex_.emplace(kHttpPasswordRegex); + } + return re2::RE2::PartialMatch(str, *http_password_regex_); +} + +} // namespace web_discovery diff --git a/components/web_discovery/browser/regex_util.h b/components/web_discovery/browser/regex_util.h new file mode 100644 index 000000000000..e08dad02fd6e --- /dev/null +++ b/components/web_discovery/browser/regex_util.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#ifndef BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_REGEX_UTIL_H_ +#define BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_REGEX_UTIL_H_ + +#include +#include +#include +#include + +#include "base/containers/flat_map.h" +#include "third_party/re2/src/re2/re2.h" + +namespace web_discovery { + +// Lazily creates and caches pre-compiled regexes, mainly used for +// privacy risk assessment of page URLs/contents. +class RegexUtil { + public: + RegexUtil(); + ~RegexUtil(); + + RegexUtil(const RegexUtil&) = delete; + RegexUtil& operator=(const RegexUtil&) = delete; + + bool CheckForEmail(const std::string_view str); + bool CheckForLongNumber(const std::string_view str, size_t max_length); + bool CheckPathAndQueryStringKeywords(const std::string_view path_and_query); + bool CheckQueryStringOrRefKeywords(const std::string_view str); + bool CheckQueryHTTPCredentials(const std::string_view str); + void RemovePunctuation(std::string& str); + void TransformToAlphanumeric(std::string& str); + + private: + std::optional email_regex_; + // key is long number map length + base::flat_map> long_number_regexes_; + std::deque path_and_query_string_keyword_regexes_; + std::deque query_string_and_ref_keyword_regexes_; + std::optional http_password_regex_; + std::optional punctuation_regex_; + std::optional non_alphanumeric_regex_; +}; + +} // namespace web_discovery + +#endif // BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_REGEX_UTIL_H_ diff --git a/components/web_discovery/browser/web_discovery_service.cc b/components/web_discovery/browser/web_discovery_service.cc index b2eb0821fbc5..e25166872103 100644 --- a/components/web_discovery/browser/web_discovery_service.cc +++ b/components/web_discovery/browser/web_discovery_service.cc @@ -5,8 +5,14 @@ #include "brave/components/web_discovery/browser/web_discovery_service.h" +#include + #include "base/functional/bind.h" +#include "base/strings/stringprintf.h" +#include "brave/components/web_discovery/browser/content_scraper.h" +#include "brave/components/web_discovery/browser/payload_generator.h" #include "brave/components/web_discovery/browser/pref_names.h" +#include "brave/components/web_discovery/browser/privacy_guard.h" #include "brave/components/web_discovery/browser/server_config_loader.h" #include "components/prefs/pref_registry_simple.h" #include "components/prefs/pref_service.h" @@ -70,6 +76,7 @@ void WebDiscoveryService::Start() { } void WebDiscoveryService::Stop() { + content_scraper_ = nullptr; server_config_loader_ = nullptr; credential_manager_ = nullptr; } @@ -92,6 +99,58 @@ void WebDiscoveryService::OnConfigChange() { credential_manager_->JoinGroups(); } -void WebDiscoveryService::OnPatternsLoaded() {} +void WebDiscoveryService::OnPatternsLoaded() { + if (!content_scraper_) { + content_scraper_ = std::make_unique( + server_config_loader_.get(), ®ex_util_); + } +} + +bool WebDiscoveryService::ShouldExtractFromPage( + const GURL& url, + content::RenderFrameHost* render_frame_host) { + if (!content_scraper_) { + return false; + } + const auto* matching_url_details = + server_config_loader_->GetLastPatterns().GetMatchingURLPattern(url, + false); + if (!matching_url_details) { + return false; + } + VLOG(1) << "URL matched pattern " << matching_url_details->id << ": " << url; + if (IsPrivateURLLikely(regex_util_, url, matching_url_details)) { + return false; + } + return true; +} + +void WebDiscoveryService::StartExtractingFromPage( + const GURL& url, + mojo::Remote document_extractor) { + auto remote_id = + document_extractor_remotes_.Add(std::move(document_extractor)); + content_scraper_->ScrapePage( + url, false, document_extractor_remotes_.Get(remote_id), + base::BindOnce(&WebDiscoveryService::OnContentScraped, + base::Unretained(this), false)); +} + +void WebDiscoveryService::OnContentScraped( + bool is_strict, + std::unique_ptr result) { + if (!result) { + return; + } + const auto& patterns = server_config_loader_->GetLastPatterns(); + auto* original_url_details = + patterns.GetMatchingURLPattern(result->url, is_strict); + if (!original_url_details) { + return; + } + auto payloads = GenerateQueryPayloads( + server_config_loader_->GetLastServerConfig(), regex_util_, + original_url_details, std::move(result)); +} } // namespace web_discovery diff --git a/components/web_discovery/browser/web_discovery_service.h b/components/web_discovery/browser/web_discovery_service.h index 88c6837d60bd..57d82c3f8050 100644 --- a/components/web_discovery/browser/web_discovery_service.h +++ b/components/web_discovery/browser/web_discovery_service.h @@ -12,10 +12,14 @@ #include "base/files/file_path.h" #include "base/memory/raw_ptr.h" +#include "brave/components/web_discovery/browser/content_scraper.h" #include "brave/components/web_discovery/browser/credential_manager.h" +#include "brave/components/web_discovery/browser/regex_util.h" #include "brave/components/web_discovery/browser/server_config_loader.h" +#include "brave/components/web_discovery/common/web_discovery.mojom.h" #include "components/keyed_service/core/keyed_service.h" #include "components/prefs/pref_change_registrar.h" +#include "mojo/public/cpp/bindings/remote_set.h" class PrefRegistrySimple; class PrefService; @@ -50,6 +54,13 @@ class WebDiscoveryService : public KeyedService { // KeyedService: void Shutdown() override; + // Called by `WebDiscoveryTabHelper` to notify on a page load. + bool ShouldExtractFromPage(const GURL& url, + content::RenderFrameHost* render_frame_host); + void StartExtractingFromPage( + const GURL& url, + mojo::Remote document_extractor); + private: void Start(); void Stop(); @@ -59,6 +70,8 @@ class WebDiscoveryService : public KeyedService { void OnConfigChange(); void OnPatternsLoaded(); + void OnContentScraped(bool is_strict, + std::unique_ptr result); raw_ptr local_state_; raw_ptr profile_prefs_; @@ -66,10 +79,15 @@ class WebDiscoveryService : public KeyedService { base::FilePath user_data_dir_; + RegexUtil regex_util_; + scoped_refptr shared_url_loader_factory_; + mojo::RemoteSet document_extractor_remotes_; + std::unique_ptr server_config_loader_; std::unique_ptr credential_manager_; + std::unique_ptr content_scraper_; }; } // namespace web_discovery diff --git a/components/web_discovery/browser/web_discovery_tab_helper.cc b/components/web_discovery/browser/web_discovery_tab_helper.cc new file mode 100644 index 000000000000..f2b8d7fdc144 --- /dev/null +++ b/components/web_discovery/browser/web_discovery_tab_helper.cc @@ -0,0 +1,45 @@ +// Copyright (c) 2024 The Brave Authors. All rights reserved. +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "brave/components/web_discovery/browser/web_discovery_tab_helper.h" + +#include + +#include "brave/components/web_discovery/browser/web_discovery_service.h" +#include "content/public/browser/navigation_handle.h" +#include "content/public/browser/render_frame_host.h" +#include "services/service_manager/public/cpp/interface_provider.h" + +namespace web_discovery { + +WebDiscoveryTabHelper::WebDiscoveryTabHelper( + content::WebContents* web_contents, + WebDiscoveryService* web_discovery_service) + : content::WebContentsObserver(web_contents), + content::WebContentsUserData(*web_contents), + web_discovery_service_(web_discovery_service) { + CHECK(web_discovery_service); +} + +WebDiscoveryTabHelper::~WebDiscoveryTabHelper() = default; + +void WebDiscoveryTabHelper::DidFinishLoad( + content::RenderFrameHost* render_frame_host, + const GURL& url) { + if (!render_frame_host->IsInPrimaryMainFrame()) { + return; + } + if (!web_discovery_service_->ShouldExtractFromPage(url, render_frame_host)) { + return; + } + mojo::Remote remote; + render_frame_host->GetRemoteInterfaces()->GetInterface( + remote.BindNewPipeAndPassReceiver()); + web_discovery_service_->StartExtractingFromPage(url, std::move(remote)); +} + +WEB_CONTENTS_USER_DATA_KEY_IMPL(WebDiscoveryTabHelper); + +} // namespace web_discovery diff --git a/components/web_discovery/browser/web_discovery_tab_helper.h b/components/web_discovery/browser/web_discovery_tab_helper.h new file mode 100644 index 000000000000..b04de03119d2 --- /dev/null +++ b/components/web_discovery/browser/web_discovery_tab_helper.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#ifndef BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_WEB_DISCOVERY_TAB_HELPER_H_ +#define BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_WEB_DISCOVERY_TAB_HELPER_H_ + +#include "base/memory/raw_ptr.h" +#include "content/public/browser/web_contents.h" +#include "content/public/browser/web_contents_observer.h" +#include "content/public/browser/web_contents_user_data.h" + +namespace content { +class RenderFrameHost; +} // namespace content + +namespace web_discovery { + +class WebDiscoveryService; + +class WebDiscoveryTabHelper + : public content::WebContentsObserver, + public content::WebContentsUserData { + public: + WebDiscoveryTabHelper(content::WebContents* web_contents, + WebDiscoveryService* web_discovery_service); + ~WebDiscoveryTabHelper() override; + + WebDiscoveryTabHelper(const WebDiscoveryTabHelper&) = delete; + WebDiscoveryTabHelper& operator=(const WebDiscoveryTabHelper&) = delete; + + private: + friend class content::WebContentsUserData; + + // content::WebContentsObserver: + void DidFinishLoad(content::RenderFrameHost* render_frame_host, + const GURL& url) override; + + raw_ptr web_discovery_service_; + + WEB_CONTENTS_USER_DATA_KEY_DECL(); +}; + +} // namespace web_discovery + +#endif // BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_WEB_DISCOVERY_TAB_HELPER_H_ diff --git a/components/web_discovery/common/BUILD.gn b/components/web_discovery/common/BUILD.gn index 36b3ff34852e..6689b2dc5251 100644 --- a/components/web_discovery/common/BUILD.gn +++ b/components/web_discovery/common/BUILD.gn @@ -4,9 +4,17 @@ # You can obtain one at https://mozilla.org/MPL/2.0/. import("//brave/components/web_discovery/buildflags/buildflags.gni") +import("//mojo/public/tools/bindings/mojom.gni") assert(enable_web_discovery_native) +if (enable_web_discovery_native) { + mojom("mojom") { + sources = [ "web_discovery.mojom" ] + deps = [ "//mojo/public/mojom/base" ] + } +} + component("common") { output_name = "web_discovery_common" sources = [ diff --git a/components/web_discovery/common/web_discovery.mojom b/components/web_discovery/common/web_discovery.mojom new file mode 100644 index 000000000000..94de01e15d6d --- /dev/null +++ b/components/web_discovery/common/web_discovery.mojom @@ -0,0 +1,36 @@ +// Copyright (c) 2024 The Brave Authors. All rights reserved. +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +module web_discovery.mojom; + +struct SelectAttributeRequest { + // An optional selector for an element within the current selected element. + // The attribute will be retrieved from the embedded element. + string? sub_selector; + // Arbitrary ID used for storing the scraped result. + string key; + // Name of the attribute to scrape. + string attribute; +}; + +struct SelectRequest { + // The DOM selector for the element to scrape. + string root_selector; + // Scrape requests for the selected element. + array attribute_requests; +}; + +struct AttributeResult { + // The DOM selector for the scraped element. + string root_selector; + // A map of arbitrary IDs to scraped results. Value will be set to + // nullopt if the attribute was not available. + map attribute_values; +}; + +interface DocumentExtractor { + // Extracts DOM attributes from the current page in renderer. + QueryElementAttributes(array requests) => (array results); +}; diff --git a/components/web_discovery/renderer/BUILD.gn b/components/web_discovery/renderer/BUILD.gn new file mode 100644 index 000000000000..72d8ba366512 --- /dev/null +++ b/components/web_discovery/renderer/BUILD.gn @@ -0,0 +1,24 @@ +# Copyright (c) 2024 The Brave Authors. All rights reserved. +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at https://mozilla.org/MPL/2.0/. + +import("//brave/components/web_discovery/buildflags/buildflags.gni") + +assert(enable_web_discovery_native) + +source_set("renderer") { + sources = [ + "blink_document_extractor.cc", + "blink_document_extractor.h", + ] + + deps = [ + "//base", + "//brave/components/web_discovery/common:mojom", + "//content/public/renderer", + "//mojo/public/cpp/bindings", + "//services/service_manager/public/cpp", + "//third_party/blink/public:blink", + ] +} diff --git a/components/web_discovery/renderer/DEPS b/components/web_discovery/renderer/DEPS new file mode 100644 index 000000000000..082311a6a709 --- /dev/null +++ b/components/web_discovery/renderer/DEPS @@ -0,0 +1,5 @@ +include_rules = [ + "+content/public/renderer", + "+services/service_manager/public/cpp", + "+third_party/blink/public/web", +] diff --git a/components/web_discovery/renderer/blink_document_extractor.cc b/components/web_discovery/renderer/blink_document_extractor.cc new file mode 100644 index 000000000000..87afbaa33600 --- /dev/null +++ b/components/web_discovery/renderer/blink_document_extractor.cc @@ -0,0 +1,94 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#include "brave/components/web_discovery/renderer/blink_document_extractor.h" + +#include +#include + +#include "third_party/blink/public/web/web_element.h" +#include "third_party/blink/public/web/web_local_frame.h" + +namespace web_discovery { + +namespace { + +constexpr char kTextContentAttributeName[] = "textContent"; + +void ProcessAttributeRequests( + std::string root_selector, + const std::vector& requests, + const blink::WebVector& elements, + std::vector& results) { + for (const auto& element : elements) { + auto attributes_result = mojom::AttributeResult::New(); + attributes_result->root_selector = root_selector; + + std::optional sub_element; + const auto* element_to_query = &element; + for (const auto& request : requests) { + if (request->sub_selector) { + auto web_sub_selector = + blink::WebString::FromUTF8(*request->sub_selector); + sub_element = element.QuerySelector(web_sub_selector); + element_to_query = &*sub_element; + } + std::optional attribute_value; + if (!element_to_query->IsNull()) { + if (request->attribute == kTextContentAttributeName) { + attribute_value = element_to_query->TextContent().Utf8(); + } else { + auto attribute_name = blink::WebString::FromUTF8(request->attribute); + auto web_attribute_value = + element_to_query->GetAttribute(attribute_name); + if (!web_attribute_value.IsNull()) { + attribute_value = web_attribute_value.Utf8(); + } + } + } + attributes_result->attribute_values[request->key] = attribute_value; + } + results.push_back(std::move(attributes_result)); + } +} + +} // namespace + +BlinkDocumentExtractor::BlinkDocumentExtractor( + content::RenderFrame* render_frame, + service_manager::BinderRegistry* registry) + : content::RenderFrameObserver(render_frame), render_frame_(render_frame) { + registry->AddInterface(base::BindRepeating( + &BlinkDocumentExtractor::BindReceiver, base::Unretained(this))); +} + +BlinkDocumentExtractor::~BlinkDocumentExtractor() = default; + +void BlinkDocumentExtractor::QueryElementAttributes( + std::vector requests, + QueryElementAttributesCallback callback) { + blink::WebDocument document = render_frame_->GetWebFrame()->GetDocument(); + std::vector results; + for (const auto& request : requests) { + auto selector = blink::WebString::FromUTF8(request->root_selector); + auto elements = document.QuerySelectorAll(selector); + ProcessAttributeRequests(request->root_selector, + request->attribute_requests, elements, results); + } + + std::move(callback).Run(std::move(results)); +} + +void BlinkDocumentExtractor::OnDestruct() { + delete this; +} + +void BlinkDocumentExtractor::BindReceiver( + mojo::PendingReceiver receiver) { + receiver_.reset(); + receiver_.Bind(std::move(receiver)); +} + +} // namespace web_discovery diff --git a/components/web_discovery/renderer/blink_document_extractor.h b/components/web_discovery/renderer/blink_document_extractor.h new file mode 100644 index 000000000000..b502872395c3 --- /dev/null +++ b/components/web_discovery/renderer/blink_document_extractor.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#ifndef BRAVE_COMPONENTS_WEB_DISCOVERY_RENDERER_BLINK_DOCUMENT_EXTRACTOR_H_ +#define BRAVE_COMPONENTS_WEB_DISCOVERY_RENDERER_BLINK_DOCUMENT_EXTRACTOR_H_ + +#include + +#include "brave/components/web_discovery/common/web_discovery.mojom.h" +#include "content/public/renderer/render_frame.h" +#include "content/public/renderer/render_frame_observer.h" +#include "mojo/public/cpp/bindings/receiver.h" +#include "services/service_manager/public/cpp/binder_registry.h" + +namespace web_discovery { + +// Extracts attributes from the current page +// for the native re-implementation of Web Discovery. +class BlinkDocumentExtractor : public content::RenderFrameObserver, + public mojom::DocumentExtractor { + public: + BlinkDocumentExtractor(content::RenderFrame* render_frame, + service_manager::BinderRegistry* registry); + ~BlinkDocumentExtractor() override; + + BlinkDocumentExtractor(const BlinkDocumentExtractor&) = delete; + BlinkDocumentExtractor& operator=(const BlinkDocumentExtractor&) = delete; + + // mojom::DocumentExtractor: + void QueryElementAttributes(std::vector requests, + QueryElementAttributesCallback callback) override; + + // RenderFrameObserver: + void OnDestruct() override; + + private: + void BindReceiver(mojo::PendingReceiver receiver); + + raw_ptr render_frame_; + mojo::Receiver receiver_{this}; +}; + +} // namespace web_discovery + +#endif // BRAVE_COMPONENTS_WEB_DISCOVERY_RENDERER_BLINK_DOCUMENT_EXTRACTOR_H_ diff --git a/renderer/sources.gni b/renderer/sources.gni index 0fb4561218a8..6186b47d21d0 100644 --- a/renderer/sources.gni +++ b/renderer/sources.gni @@ -9,6 +9,7 @@ import("//brave/components/brave_vpn/common/buildflags/buildflags.gni") import("//brave/components/playlist/common/buildflags/buildflags.gni") import("//brave/components/speedreader/common/buildflags/buildflags.gni") import("//brave/components/tor/buildflags/buildflags.gni") +import("//brave/components/web_discovery/buildflags/buildflags.gni") import("//third_party/widevine/cdm/widevine.gni") brave_chrome_renderer_sources = [ @@ -60,6 +61,13 @@ if (enable_ai_chat) { ] } +if (enable_web_discovery_native) { + brave_chrome_renderer_deps += [ + "//brave/components/web_discovery/common", + "//brave/components/web_discovery/renderer", + ] +} + if (enable_playlist) { brave_chrome_renderer_deps += [ "//brave/components/playlist/renderer" ] } diff --git a/script/brave_license_helper.py b/script/brave_license_helper.py index 6d88e10c714f..edd295a4bb5e 100644 --- a/script/brave_license_helper.py +++ b/script/brave_license_helper.py @@ -48,7 +48,7 @@ def AddBraveCredits(root, prune_paths, special_cases, prune_dirs, os.path.join('brave', 'third_party', 'rust', 'challenge_bypass_ristretto_cxx'), os.path.join('brave', 'third_party', 'rust', 'constellation_cxx'), - os.path.join('brave', 'third_party', 'rust', 'document_extractor_cxx'), + os.path.join('brave', 'third_party', 'rust', 'document_extractor'), os.path.join('brave', 'third_party', 'rust', 'json_cxx'), os.path.join('brave', 'third_party', 'rust', 'filecoin_cxx'), os.path.join('brave', 'third_party', 'rust', 'skus'), diff --git a/test/BUILD.gn b/test/BUILD.gn index 6de317247535..5561383fdfd3 100644 --- a/test/BUILD.gn +++ b/test/BUILD.gn @@ -1060,6 +1060,10 @@ test("brave_browser_tests") { ] } + if (enable_web_discovery_native) { + deps += [ "//brave/components/web_discovery/browser:browser_tests" ] + } + if (ethereum_remote_client_enabled) { sources += [ "//brave/browser/extensions/brave_wallet_apitest.cc" ] diff --git a/third_party/rust/chromium_crates_io/Cargo.lock b/third_party/rust/chromium_crates_io/Cargo.lock index 9d951b56ce23..6d558b233c5c 100644 --- a/third_party/rust/chromium_crates_io/Cargo.lock +++ b/third_party/rust/chromium_crates_io/Cargo.lock @@ -405,6 +405,7 @@ dependencies = [ "brave_wallet", "challenge-bypass-ristretto-cxx", "constellation-cxx", + "document-extractor", "filecoin-cxx", "json-cxx", "skus-cxx", @@ -689,6 +690,15 @@ dependencies = [ "subtle", ] +[[package]] +name = "document-extractor" +version = "0.1.0" +dependencies = [ + "cxx", + "html5ever", + "kuchikiki", +] + [[package]] name = "dtoa" version = "0.4.8" diff --git a/third_party/rust/chromium_crates_io/Cargo.toml b/third_party/rust/chromium_crates_io/Cargo.toml index 7729a986d81c..64324853dfc0 100644 --- a/third_party/rust/chromium_crates_io/Cargo.toml +++ b/third_party/rust/chromium_crates_io/Cargo.toml @@ -27,6 +27,7 @@ constellation-cxx = "0.1" challenge-bypass-ristretto-cxx = "1" zcash = "1" anonymous-credentials = "0.1" +document-extractor = "0.1" [patch.crates-io.futures-retry_v0_5] path = "../futures_retry/v0_5/crate" @@ -84,6 +85,10 @@ package = "challenge-bypass-ristretto-cxx" path = "../../../components/web_discovery/browser/anonymous_credentials/rust" package = "anonymous-credentials" +[patch.crates-io.document-extractor_v0_1] +path = "../../../components/web_discovery/browser/document_extractor/rust" +package = "document-extractor" + [patch.crates-io.aho_corasick_v1] path = "../../../../third_party/rust/chromium_crates_io/vendor/aho-corasick-1.1.3" package = "aho-corasick" diff --git a/third_party/rust/document_extractor/v0_1/README.chromium b/third_party/rust/document_extractor/v0_1/README.chromium new file mode 100644 index 000000000000..8a755e33ef70 --- /dev/null +++ b/third_party/rust/document_extractor/v0_1/README.chromium @@ -0,0 +1,8 @@ +Name: document-extractor +URL: https://crates.io/crates/document-extractor +Description: +Version: 0.1.0 +Security Critical: yes +Shipped: yes +License: Mozilla Public License 2.0 +License File: