Skip to content

Commit

Permalink
Anonymize requests to robots.txt and favicon.ico
Browse files Browse the repository at this point in the history
  • Loading branch information
kovshenin committed Dec 4, 2021
1 parent 8b7da73 commit f45d842
Showing 1 changed file with 64 additions and 22 deletions.
86 changes: 64 additions & 22 deletions include/common.php
Original file line number Diff line number Diff line change
Expand Up @@ -54,29 +54,13 @@ function key() {
return $cache_key;
}

$cookies = [];
$headers = [];

// Clean up and normalize cookies.
foreach ( $_COOKIE as $key => $value ) {

// Ignore cookies that begin with a _, assume they're JS-only.
if ( substr( $key, 0, 1 ) == '_' ) {
unset( $_COOKIE[ $key ] );
continue;
}

if ( ! in_array( $key, config( 'ignore_cookies' ) ) ) {
$cookies[ $key ] = $value;
}
}

// Clean the URL/query vars
// Break the URL down.
$parsed = parse_url( 'http://example.org' . $_SERVER['REQUEST_URI'] );
$path = $parsed['path'];
$query = $parsed['query'] ?? '';

parse_str( $query, $query_vars );

// Ignore some query vars.
foreach ( $query_vars as $key => $value ) {
if ( in_array( $key, config( 'ignore_query_vars' ) ) ) {
unset( $query_vars[ $key ] );
Expand All @@ -85,14 +69,35 @@ function key() {

$cache_key = [
'https' => $_SERVER['HTTPS'] ?? '',
'method' => $_SERVER['REQUEST_METHOD'] ?? '',
'method' => strtoupper( $_SERVER['REQUEST_METHOD'] ) ?? '',
'host' => strtolower( $_SERVER['HTTP_HOST'] ?? '' ),
'path' => $path,
'query_vars' => $query_vars,
'cookies' => $cookies,
'headers' => $headers,
'cookies' => [],
];

// Return early if this request is anonymized.
if ( anonymize( $cache_key ) ) {
return $cache_key;
}

// Clean up and normalize cookies.
$cookies = [];
foreach ( $_COOKIE as $key => $value ) {

// Ignore cookies that begin with a _, assume they're JS-only.
if ( substr( $key, 0, 1 ) == '_' ) {
unset( $_COOKIE[ $key ] );
continue;
}

if ( ! in_array( $key, config( 'ignore_cookies' ) ) ) {
$cookies[ $key ] = $value;
}
}

$cache_key['cookies'] = $cookies;

return $cache_key;
}

Expand Down Expand Up @@ -150,3 +155,40 @@ function read_metadata( $f ) {
$meta = json_decode( $bytes, true );
return $meta;
}

/**
* Anonymize a request
*
* This function checks whether this request should be anonymized, and alters
* the cache key to reflect that. Also touches certain super-globals, such
* as $_COOKIE to make sure the request is truly anonymous.
*
* @param string $cache_key The cache key, passed by reference
*
* @return bool True if the request was anonymized.
*/
function anonymize( &$cache_key ) {

// Don't anonymize POST and other requests that may alter data.
if ( $cache_key['method'] !== 'GET' && $cache_key['method'] !== 'HEAD' ) {
return false;
}

// TODO: Maybe increase the TTL on these paths.
if ( ! in_array( $cache_key['path'], [
'/robots.txt',
'/favicon.ico',
] ) ) {
return false;
}

// Very anonymous.
// TODO: Clean php://input too.
$_COOKIE = [];
$_GET = [];
$_REQUEST = [];
$_POST = [];

$cache_key['query_vars'] = [];
return true;
}

0 comments on commit f45d842

Please sign in to comment.