-
Notifications
You must be signed in to change notification settings - Fork 233
/
reporting_robots.cc
81 lines (73 loc) · 2.97 KB
/
reporting_robots.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#include "reporting_robots.h"
#include <algorithm>
#include <string>
#include <vector>
#include "absl/strings/ascii.h"
#include "absl/strings/string_view.h"
namespace googlebot {
// The kUnsupportedTags tags are popular tags in robots.txt files, but Google
// doesn't use them for anything. Other search engines may, however, so we
// parse them out so users of the library can highlight them for their own
// users if they so wish.
// These are different from the "unknown" tags, since we know that these may
// have some use cases; to the best of our knowledge other tags we find, don't.
// (for example, "unicorn" from "unicorn: /value")
static const std::vector<std::string> kUnsupportedTags = {
"clean-param", "crawl-delay", "host", "noarchive", "noindex", "nofollow"};
void RobotsParsingReporter::Digest(int line_num,
RobotsParsedLine::RobotsTagName parsed_tag) {
if (line_num > last_line_seen_) {
last_line_seen_ = line_num;
}
if (parsed_tag != RobotsParsedLine::kUnknown &&
parsed_tag != RobotsParsedLine::kUnused) {
++valid_directives_;
}
RobotsParsedLine& line = robots_parse_results_[line_num];
line.line_num = line_num;
line.tag_name = parsed_tag;
}
void RobotsParsingReporter::ReportLineMetadata(int line_num,
const LineMetadata& metadata) {
if (line_num > last_line_seen_) {
last_line_seen_ = line_num;
}
RobotsParsedLine& line = robots_parse_results_[line_num];
line.line_num = line_num;
line.is_typo = metadata.is_acceptable_typo;
line.metadata = metadata;
}
void RobotsParsingReporter::HandleRobotsStart() {
last_line_seen_ = 0;
valid_directives_ = 0;
unused_directives_ = 0;
}
void RobotsParsingReporter::HandleRobotsEnd() {}
void RobotsParsingReporter::HandleUserAgent(int line_num,
absl::string_view line_value) {
Digest(line_num, RobotsParsedLine::kUserAgent);
}
void RobotsParsingReporter::HandleAllow(int line_num,
absl::string_view line_value) {
Digest(line_num, RobotsParsedLine::kAllow);
}
void RobotsParsingReporter::HandleDisallow(int line_num,
absl::string_view line_value) {
Digest(line_num, RobotsParsedLine::kDisallow);
}
void RobotsParsingReporter::HandleSitemap(int line_num,
absl::string_view line_value) {
Digest(line_num, RobotsParsedLine::kSitemap);
}
void RobotsParsingReporter::HandleUnknownAction(int line_num,
absl::string_view action,
absl::string_view line_value) {
RobotsParsedLine::RobotsTagName rtn =
std::count(kUnsupportedTags.begin(), kUnsupportedTags.end(),
absl::AsciiStrToLower(action)) > 0
? RobotsParsedLine::kUnused
: RobotsParsedLine::kUnknown;
unused_directives_++;
Digest(line_num, rtn);
}
} // namespace googlebot