pass $Document through all the tokenizer methods

Perl-Critic · Jul 22, 2022 · 9b5f30a · 9b5f30a
1 parent 3cd40bb
commit 9b5f30a
Show file tree

Hide file tree

Showing 23 changed files with 106 additions and 114 deletions.
diff --git a/lib/PPI/Lexer.pm b/lib/PPI/Lexer.pm
@@ -239,7 +239,7 @@ sub _lex_document {
 
 	# Start the processing loop
 	my $Token;
-	while ( ref($Token = $self->_get_token) ) {
+	while ( ref($Token = $self->_get_token($Document)) ) {
 		# Add insignificant tokens directly beneath us
 		unless ( $Token->significant ) {
 			$self->_add_element( $Document, $Token );
@@ -264,7 +264,7 @@ sub _lex_document {
 			# Move the lexing down into the statement
 			$self->_add_delayed( $Document );
 			$self->_add_element( $Document, $Statement );
-			$self->_lex_statement( $Statement );
+			$self->_lex_statement( $Statement, $Document );
 
 			next;
 		}
@@ -275,7 +275,7 @@ sub _lex_document {
 			$self->_rollback( $Token );
 			my $Statement = PPI::Statement->new;
 			$self->_add_element( $Document, $Statement );
-			$self->_lex_statement( $Statement );
+			$self->_lex_statement( $Statement, $Document );
 			next;
 		}
 
@@ -384,7 +384,7 @@ my %STATEMENT_CLASSES = (
 );
 
 sub _statement {
-	my ($self, $Parent, $Token) = @_;
+	my ($self, $Parent, $Token, $Document) = @_;
 	# my $self   = shift;
 	# my $Parent = _INSTANCE(shift, 'PPI::Node')  or die "Bad param 1";
 	# my $Token  = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
@@ -399,7 +399,7 @@ sub _statement {
 			# Is the next significant token a =>
 			# Read ahead to the next significant token
 			my $Next;
-			while ( $Next = $self->_get_token ) {
+			while ( $Next = $self->_get_token($Document) ) {
 				unless ( $Next->significant ) {
 					push @{$self->{delayed}}, $Next;
 					# $self->_delay_element( $Next );
@@ -620,7 +620,7 @@ sub _statement {
 }
 
 sub _lex_statement {
-	my ($self, $Statement) = @_;
+	my ( $self, $Statement, $Document ) = @_;
 	# my $self      = shift;
 	# my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
 
@@ -631,7 +631,7 @@ sub _lex_statement {
 
 	# Begin processing tokens
 	my $Token;
-	while ( ref( $Token = $self->_get_token ) ) {
+	while ( ref( $Token = $self->_get_token($Document) ) ) {
 		# Delay whitespace and comment tokens
 		unless ( $Token->significant ) {
 			push @{$self->{delayed}}, $Token;
@@ -675,12 +675,12 @@ sub _lex_statement {
 
 		# Determine the class for the structure and create it
 		my $method    = $RESOLVE{$Token->content};
-		my $Structure = $self->$method($Statement)->new($Token);
+		my $Structure = $self->$method( $Statement, $Document )->new($Token);
 
 		# Move the lexing down into the Structure
 		$self->_add_delayed( $Statement );
 		$self->_add_element( $Statement, $Structure );
-		$self->_lex_structure( $Structure );
+		$self->_lex_structure( $Structure, $Document );
 	}
 
 	# Was it an error in the tokenizer?
@@ -1130,7 +1130,7 @@ my @CURLY_LOOKAHEAD_CLASSES = (
 # Given a parent element, and a { token to open a structure, determine
 # the class that the structure should be.
 sub _curly {
-	my ($self, $Parent) = @_;
+	my ( $self, $Parent, $Document ) = @_;
 	# my $self   = shift;
 	# my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
 
@@ -1230,7 +1230,7 @@ sub _curly {
 	my $Next;
 	my $position = 0;
 	my @delayed;
-	while ( $Next = $self->_get_token ) {
+	while ( $Next = $self->_get_token($Document) ) {
 		unless ( $Next->significant ) {
 			push @delayed, $Next;
 			next;
@@ -1263,13 +1263,13 @@ sub _curly {
 
 
 sub _lex_structure {
-	my ($self, $Structure) = @_;
+	my ( $self, $Structure, $Document ) = @_;
 	# my $self      = shift;
 	# my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1";
 
 	# Start the processing loop
 	my $Token;
-	while ( ref($Token = $self->_get_token) ) {
+	while ( ref($Token = $self->_get_token($Document)) ) {
 		# Is this a direct type token
 		unless ( $Token->significant ) {
 			push @{$self->{delayed}}, $Token;
@@ -1284,11 +1284,11 @@ sub _lex_structure {
 			$self->_add_delayed( $Structure );
 
 			# Determine the class for the Statement and create it
-			my $Statement = $self->_statement($Structure, $Token)->new($Token);
+			my $Statement = $self->_statement($Structure, $Token, $Document)->new($Token);
 
 			# Move the lexing down into the Statement
 			$self->_add_element( $Structure, $Statement );
-			$self->_lex_statement( $Statement );
+			$self->_lex_statement( $Statement, $Document );
 
 			next;
 		}
@@ -1299,7 +1299,7 @@ sub _lex_structure {
 			$self->_rollback( $Token );
 			my $Statement = PPI::Statement->new;
 			$self->_add_element( $Structure, $Statement );
-			$self->_lex_statement( $Statement );
+			$self->_lex_statement( $Statement, $Document );
 			next;
 		}
 
@@ -1363,7 +1363,7 @@ sub _lex_structure {
 
 # Get the next token for processing, handling buffering
 sub _get_token {
-	shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token;
+	shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token($_[1]);
 }
 
 # Old long version of the above

diff --git a/lib/PPI/Token/ArrayIndex.pm b/lib/PPI/Token/ArrayIndex.pm
@@ -39,7 +39,7 @@ our @ISA = "PPI::Token";
 # Tokenizer Methods
 
 sub __TOKENIZER__on_char {
-	my $t = $_[1];
+	my ( undef, $t, $Document ) = @_;
 
 	# Suck in till the end of the arrayindex
 	pos $t->{line} = $t->{line_cursor};
@@ -49,7 +49,7 @@ sub __TOKENIZER__on_char {
 	}
 
 	# End of token
-	$t->_finalize_token->__TOKENIZER__on_char( $t );
+	$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 }
 
 1;

diff --git a/lib/PPI/Token/Attribute.pm b/lib/PPI/Token/Attribute.pm
@@ -88,14 +88,13 @@ sub parameters {
 # Tokenizer Methods
 
 sub __TOKENIZER__on_char {
-	my $class = shift;
-	my $t     = shift;
+	my ( $class, $t, $Document ) = @_;
 	my $char  = substr( $t->{line}, $t->{line_cursor}, 1 );
 
 	# Unless this is a '(', we are finished.
 	unless ( $char eq '(' ) {
 		# Finalise and recheck
-		return $t->_finalize_token->__TOKENIZER__on_char( $t );
+		return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 	}
 
 	# This is a bar(...) style attribute.
@@ -111,7 +110,7 @@ sub __TOKENIZER__on_char {
 
 	# Found the end of the attribute
 	$t->{token}->{content} .= $string;
-	$t->_finalize_token->__TOKENIZER__on_char( $t );
+	$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 }
 
 # Scan for a close braced, and take into account both escaping,

diff --git a/lib/PPI/Token/Cast.pm b/lib/PPI/Token/Cast.pm
@@ -52,14 +52,14 @@ our %POSTFIX = map { $_ => 1 } (
 # A cast is either % @ $ or $#
 # and also postfix dereference are %* @* $* $#*
 sub __TOKENIZER__on_char {
-	my $t    = $_[1];
+	my ( undef, $t, $Document ) = @_;
 	my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
 
 	# Are we still an operator if we add the next character
 	my $content = $t->{token}->{content};
 	return 1 if $POSTFIX{ $content . $char };
 
-	$t->_finalize_token->__TOKENIZER__on_char( $t );
+	$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 }
 
 1;

diff --git a/lib/PPI/Token/Comment.pm b/lib/PPI/Token/Comment.pm
@@ -71,11 +71,11 @@ sub significant() { '' }
 # Most stuff goes through __TOKENIZER__commit.
 # This is such a rare case, do char at a time to keep the code small
 sub __TOKENIZER__on_char {
-	my $t = $_[1];
+	my ( undef, $t, $Document ) = @_;
 
 	# Make sure not to include the trailing newline
 	if ( substr( $t->{line}, $t->{line_cursor}, 1 ) eq "\n" ) {
-		return $t->_finalize_token->__TOKENIZER__on_char( $t );
+		return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 	}
 
 	1;

diff --git a/lib/PPI/Token/DashedWord.pm b/lib/PPI/Token/DashedWord.pm
@@ -50,7 +50,7 @@ C<content> because C<-Foo'Bar> expands to C<-Foo::Bar>.
 # Tokenizer Methods
 
 sub __TOKENIZER__on_char {
-	my $t = $_[1];
+	my ( undef, $t, $Document ) = @_;
 
 	# Suck to the end of the dashed bareword
 	pos $t->{line} = $t->{line_cursor};
@@ -68,7 +68,7 @@ sub __TOKENIZER__on_char {
 		$t->{class} = $t->{token}->set_class( 'Word' );
 	}
 
-	$t->_finalize_token->__TOKENIZER__on_char( $t );
+	$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 }
 
 1;

diff --git a/lib/PPI/Token/HereDoc.pm b/lib/PPI/Token/HereDoc.pm
@@ -169,7 +169,7 @@ sub _is_match_indent {
 
 # Parse in the entire here-doc in one call
 sub __TOKENIZER__on_char {
-	my ( $self, $t ) = @_;
+	my ( $self, $t, $Document ) = @_;
 
 	# We are currently located on the first char after the <<
 
@@ -182,7 +182,7 @@ sub __TOKENIZER__on_char {
 	if ( $t->{line} !~ m/\G( ~? \s* (?: "[^"]*" | '[^']*' | `[^`]*` | \\?\w+ ) )/gcx ) {
 		# Degenerate to a left-shift operation
 		$t->{token}->set_class('Operator');
-		return $t->_finalize_token->__TOKENIZER__on_char( $t );
+		return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 	}
 
 	# Add the rest of the token, work out what type it is,
@@ -254,7 +254,7 @@ sub __TOKENIZER__on_char {
 			}
 
 			# The HereDoc is now fully parsed
-			return $t->_finalize_token->__TOKENIZER__on_char( $t );
+			return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 		}
 
 		# Add the line
@@ -300,7 +300,7 @@ sub __TOKENIZER__on_char {
 	$token->{_damaged} = 1;
 
 	# The HereDoc is not fully parsed
-	$t->_finalize_token->__TOKENIZER__on_char( $t );
+	$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 }
 
 1;

diff --git a/lib/PPI/Token/Magic.pm b/lib/PPI/Token/Magic.pm
@@ -51,7 +51,7 @@ our $VERSION = '1.277';
 our @ISA = "PPI::Token::Symbol";
 
 sub __TOKENIZER__on_char {
-	my $t = $_[1];
+	my ( undef, $t, $Document ) = @_;
 
 	# $c is the candidate new content
 	my $c = $t->{token}->{content} . substr( $t->{line}, $t->{line_cursor}, 1 );
@@ -69,13 +69,13 @@ sub __TOKENIZER__on_char {
 			if ( $c =~ /^\$\'\d$/ ) {
 				# In this case, we have a magic plus a digit.
 				# Save the CURRENT token, and rerun the on_char
-				return $t->_finalize_token->__TOKENIZER__on_char( $t );
+				return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 			}
 
 			# A symbol in the style $_foo or $::foo or $'foo.
 			# Overwrite the current token
 			$t->{class} = $t->{token}->set_class('Symbol');
-			return PPI::Token::Symbol->__TOKENIZER__on_char( $t );
+			return PPI::Token::Symbol->__TOKENIZER__on_char( $t, $Document );
 		}
 
 		if ( $c =~ /^\$\$\w/ ) {
@@ -107,13 +107,13 @@ sub __TOKENIZER__on_char {
 			# This is really an index dereferencing cast, although
 			# it has the same two chars as the magic variable $#.
 			$t->{class} = $t->{token}->set_class('Cast');
-			return $t->_finalize_token->__TOKENIZER__on_char( $t );
+			return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 		}
 
 		if ( $c =~ /^(\$\#)\w/ ) {
 			# This is really an array index thingy ( $#array )
 			$t->{token} = PPI::Token::ArrayIndex->new( "$1" );
-			return PPI::Token::ArrayIndex->__TOKENIZER__on_char( $t );
+			return PPI::Token::ArrayIndex->__TOKENIZER__on_char( $t, $Document );
 		}
 
 		if ( $c =~ /^\$\^\w+$/o ) {
@@ -169,7 +169,7 @@ sub __TOKENIZER__on_char {
 	}
 
 	# End the current magic token, and recheck
-	$t->_finalize_token->__TOKENIZER__on_char( $t );
+	$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 }
 
 # Our version of canonical is plain simple

diff --git a/lib/PPI/Token/Number.pm b/lib/PPI/Token/Number.pm
@@ -76,8 +76,7 @@ sub _literal {
 # Tokenizer Methods
 
 sub __TOKENIZER__on_char {
-	my $class = shift;
-	my $t     = shift;
+	my ( $class, $t, $Document ) = @_;
 	my $char  = substr( $t->{line}, $t->{line_cursor}, 1 );
 
 	# Allow underscores straight through
@@ -118,7 +117,7 @@ sub __TOKENIZER__on_char {
 
 	# Doesn't fit a special case, or is after the end of the token
 	# End of token.
-	$t->_finalize_token->__TOKENIZER__on_char( $t );
+	$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 }
 
 1;

diff --git a/lib/PPI/Token/Number/Binary.pm b/lib/PPI/Token/Number/Binary.pm
@@ -72,8 +72,7 @@ sub literal {
 # Tokenizer Methods
 
 sub __TOKENIZER__on_char {
-	my $class = shift;
-	my $t     = shift;
+	my ( undef, $t, $Document ) = @_;
 	my $char  = substr( $t->{line}, $t->{line_cursor}, 1 );
 
 	# Allow underscores straight through
@@ -89,7 +88,7 @@ sub __TOKENIZER__on_char {
 
 	# Doesn't fit a special case, or is after the end of the token
 	# End of token.
-	$t->_finalize_token->__TOKENIZER__on_char( $t );
+	$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 }
 
 1;

diff --git a/lib/PPI/Token/Number/Exp.pm b/lib/PPI/Token/Number/Exp.pm
@@ -86,8 +86,7 @@ sub literal {
 # Tokenizer Methods
 
 sub __TOKENIZER__on_char {
-	my $class = shift;
-	my $t     = shift;
+	my ( undef, $t, $Document ) = @_;
 	my $char  = substr( $t->{line}, $t->{line_cursor}, 1 );
 
         # To get here, the token must have already encountered an 'E'
@@ -109,7 +108,7 @@ sub __TOKENIZER__on_char {
 			$t->{class} = $t->{token}->set_class('Number');
 			$t->_new_token('Operator', '.');
 			$t->_new_token('Word', $word);
-			return $t->{class}->__TOKENIZER__on_char( $t );
+			return $t->{class}->__TOKENIZER__on_char( $t, $Document );
 		}
 		else {
 			$t->{token}->{_error} = "Illegal character in exponent '$char'";
@@ -118,7 +117,7 @@ sub __TOKENIZER__on_char {
 
 	# Doesn't fit a special case, or is after the end of the token
 	# End of token.
-	$t->_finalize_token->__TOKENIZER__on_char( $t );
+	$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
 }
 
 1;