Skip to content

Commit

Permalink
pass $Document through all the tokenizer methods
Browse files Browse the repository at this point in the history
  • Loading branch information
wchristian committed Jul 22, 2022
1 parent 3cd40bb commit 9b5f30a
Show file tree
Hide file tree
Showing 23 changed files with 106 additions and 114 deletions.
34 changes: 17 additions & 17 deletions lib/PPI/Lexer.pm
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ sub _lex_document {

# Start the processing loop
my $Token;
while ( ref($Token = $self->_get_token) ) {
while ( ref($Token = $self->_get_token($Document)) ) {
# Add insignificant tokens directly beneath us
unless ( $Token->significant ) {
$self->_add_element( $Document, $Token );
Expand All @@ -264,7 +264,7 @@ sub _lex_document {
# Move the lexing down into the statement
$self->_add_delayed( $Document );
$self->_add_element( $Document, $Statement );
$self->_lex_statement( $Statement );
$self->_lex_statement( $Statement, $Document );

next;
}
Expand All @@ -275,7 +275,7 @@ sub _lex_document {
$self->_rollback( $Token );
my $Statement = PPI::Statement->new;
$self->_add_element( $Document, $Statement );
$self->_lex_statement( $Statement );
$self->_lex_statement( $Statement, $Document );
next;
}

Expand Down Expand Up @@ -384,7 +384,7 @@ my %STATEMENT_CLASSES = (
);

sub _statement {
my ($self, $Parent, $Token) = @_;
my ($self, $Parent, $Token, $Document) = @_;
# my $self = shift;
# my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
# my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
Expand All @@ -399,7 +399,7 @@ sub _statement {
# Is the next significant token a =>
# Read ahead to the next significant token
my $Next;
while ( $Next = $self->_get_token ) {
while ( $Next = $self->_get_token($Document) ) {
unless ( $Next->significant ) {
push @{$self->{delayed}}, $Next;
# $self->_delay_element( $Next );
Expand Down Expand Up @@ -620,7 +620,7 @@ sub _statement {
}

sub _lex_statement {
my ($self, $Statement) = @_;
my ( $self, $Statement, $Document ) = @_;
# my $self = shift;
# my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";

Expand All @@ -631,7 +631,7 @@ sub _lex_statement {

# Begin processing tokens
my $Token;
while ( ref( $Token = $self->_get_token ) ) {
while ( ref( $Token = $self->_get_token($Document) ) ) {
# Delay whitespace and comment tokens
unless ( $Token->significant ) {
push @{$self->{delayed}}, $Token;
Expand Down Expand Up @@ -675,12 +675,12 @@ sub _lex_statement {

# Determine the class for the structure and create it
my $method = $RESOLVE{$Token->content};
my $Structure = $self->$method($Statement)->new($Token);
my $Structure = $self->$method( $Statement, $Document )->new($Token);

# Move the lexing down into the Structure
$self->_add_delayed( $Statement );
$self->_add_element( $Statement, $Structure );
$self->_lex_structure( $Structure );
$self->_lex_structure( $Structure, $Document );
}

# Was it an error in the tokenizer?
Expand Down Expand Up @@ -1130,7 +1130,7 @@ my @CURLY_LOOKAHEAD_CLASSES = (
# Given a parent element, and a { token to open a structure, determine
# the class that the structure should be.
sub _curly {
my ($self, $Parent) = @_;
my ( $self, $Parent, $Document ) = @_;
# my $self = shift;
# my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";

Expand Down Expand Up @@ -1230,7 +1230,7 @@ sub _curly {
my $Next;
my $position = 0;
my @delayed;
while ( $Next = $self->_get_token ) {
while ( $Next = $self->_get_token($Document) ) {
unless ( $Next->significant ) {
push @delayed, $Next;
next;
Expand Down Expand Up @@ -1263,13 +1263,13 @@ sub _curly {


sub _lex_structure {
my ($self, $Structure) = @_;
my ( $self, $Structure, $Document ) = @_;
# my $self = shift;
# my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1";

# Start the processing loop
my $Token;
while ( ref($Token = $self->_get_token) ) {
while ( ref($Token = $self->_get_token($Document)) ) {
# Is this a direct type token
unless ( $Token->significant ) {
push @{$self->{delayed}}, $Token;
Expand All @@ -1284,11 +1284,11 @@ sub _lex_structure {
$self->_add_delayed( $Structure );

# Determine the class for the Statement and create it
my $Statement = $self->_statement($Structure, $Token)->new($Token);
my $Statement = $self->_statement($Structure, $Token, $Document)->new($Token);

# Move the lexing down into the Statement
$self->_add_element( $Structure, $Statement );
$self->_lex_statement( $Statement );
$self->_lex_statement( $Statement, $Document );

next;
}
Expand All @@ -1299,7 +1299,7 @@ sub _lex_structure {
$self->_rollback( $Token );
my $Statement = PPI::Statement->new;
$self->_add_element( $Structure, $Statement );
$self->_lex_statement( $Statement );
$self->_lex_statement( $Statement, $Document );
next;
}

Expand Down Expand Up @@ -1363,7 +1363,7 @@ sub _lex_structure {

# Get the next token for processing, handling buffering
sub _get_token {
shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token;
shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token($_[1]);
}

# Old long version of the above
Expand Down
4 changes: 2 additions & 2 deletions lib/PPI/Token/ArrayIndex.pm
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ our @ISA = "PPI::Token";
# Tokenizer Methods

sub __TOKENIZER__on_char {
my $t = $_[1];
my ( undef, $t, $Document ) = @_;

# Suck in till the end of the arrayindex
pos $t->{line} = $t->{line_cursor};
Expand All @@ -49,7 +49,7 @@ sub __TOKENIZER__on_char {
}

# End of token
$t->_finalize_token->__TOKENIZER__on_char( $t );
$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

1;
Expand Down
7 changes: 3 additions & 4 deletions lib/PPI/Token/Attribute.pm
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,13 @@ sub parameters {
# Tokenizer Methods

sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
my ( $class, $t, $Document ) = @_;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );

# Unless this is a '(', we are finished.
unless ( $char eq '(' ) {
# Finalise and recheck
return $t->_finalize_token->__TOKENIZER__on_char( $t );
return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

# This is a bar(...) style attribute.
Expand All @@ -111,7 +110,7 @@ sub __TOKENIZER__on_char {

# Found the end of the attribute
$t->{token}->{content} .= $string;
$t->_finalize_token->__TOKENIZER__on_char( $t );
$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

# Scan for a close braced, and take into account both escaping,
Expand Down
4 changes: 2 additions & 2 deletions lib/PPI/Token/Cast.pm
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ our %POSTFIX = map { $_ => 1 } (
# A cast is either % @ $ or $#
# and also postfix dereference are %* @* $* $#*
sub __TOKENIZER__on_char {
my $t = $_[1];
my ( undef, $t, $Document ) = @_;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );

# Are we still an operator if we add the next character
my $content = $t->{token}->{content};
return 1 if $POSTFIX{ $content . $char };

$t->_finalize_token->__TOKENIZER__on_char( $t );
$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

1;
Expand Down
4 changes: 2 additions & 2 deletions lib/PPI/Token/Comment.pm
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ sub significant() { '' }
# Most stuff goes through __TOKENIZER__commit.
# This is such a rare case, do char at a time to keep the code small
sub __TOKENIZER__on_char {
my $t = $_[1];
my ( undef, $t, $Document ) = @_;

# Make sure not to include the trailing newline
if ( substr( $t->{line}, $t->{line_cursor}, 1 ) eq "\n" ) {
return $t->_finalize_token->__TOKENIZER__on_char( $t );
return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

1;
Expand Down
4 changes: 2 additions & 2 deletions lib/PPI/Token/DashedWord.pm
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ C<content> because C<-Foo'Bar> expands to C<-Foo::Bar>.
# Tokenizer Methods

sub __TOKENIZER__on_char {
my $t = $_[1];
my ( undef, $t, $Document ) = @_;

# Suck to the end of the dashed bareword
pos $t->{line} = $t->{line_cursor};
Expand All @@ -68,7 +68,7 @@ sub __TOKENIZER__on_char {
$t->{class} = $t->{token}->set_class( 'Word' );
}

$t->_finalize_token->__TOKENIZER__on_char( $t );
$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

1;
Expand Down
8 changes: 4 additions & 4 deletions lib/PPI/Token/HereDoc.pm
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ sub _is_match_indent {

# Parse in the entire here-doc in one call
sub __TOKENIZER__on_char {
my ( $self, $t ) = @_;
my ( $self, $t, $Document ) = @_;

# We are currently located on the first char after the <<

Expand All @@ -182,7 +182,7 @@ sub __TOKENIZER__on_char {
if ( $t->{line} !~ m/\G( ~? \s* (?: "[^"]*" | '[^']*' | `[^`]*` | \\?\w+ ) )/gcx ) {
# Degenerate to a left-shift operation
$t->{token}->set_class('Operator');
return $t->_finalize_token->__TOKENIZER__on_char( $t );
return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

# Add the rest of the token, work out what type it is,
Expand Down Expand Up @@ -254,7 +254,7 @@ sub __TOKENIZER__on_char {
}

# The HereDoc is now fully parsed
return $t->_finalize_token->__TOKENIZER__on_char( $t );
return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

# Add the line
Expand Down Expand Up @@ -300,7 +300,7 @@ sub __TOKENIZER__on_char {
$token->{_damaged} = 1;

# The HereDoc is not fully parsed
$t->_finalize_token->__TOKENIZER__on_char( $t );
$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

1;
Expand Down
12 changes: 6 additions & 6 deletions lib/PPI/Token/Magic.pm
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ our $VERSION = '1.277';
our @ISA = "PPI::Token::Symbol";

sub __TOKENIZER__on_char {
my $t = $_[1];
my ( undef, $t, $Document ) = @_;

# $c is the candidate new content
my $c = $t->{token}->{content} . substr( $t->{line}, $t->{line_cursor}, 1 );
Expand All @@ -69,13 +69,13 @@ sub __TOKENIZER__on_char {
if ( $c =~ /^\$\'\d$/ ) {
# In this case, we have a magic plus a digit.
# Save the CURRENT token, and rerun the on_char
return $t->_finalize_token->__TOKENIZER__on_char( $t );
return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

# A symbol in the style $_foo or $::foo or $'foo.
# Overwrite the current token
$t->{class} = $t->{token}->set_class('Symbol');
return PPI::Token::Symbol->__TOKENIZER__on_char( $t );
return PPI::Token::Symbol->__TOKENIZER__on_char( $t, $Document );
}

if ( $c =~ /^\$\$\w/ ) {
Expand Down Expand Up @@ -107,13 +107,13 @@ sub __TOKENIZER__on_char {
# This is really an index dereferencing cast, although
# it has the same two chars as the magic variable $#.
$t->{class} = $t->{token}->set_class('Cast');
return $t->_finalize_token->__TOKENIZER__on_char( $t );
return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

if ( $c =~ /^(\$\#)\w/ ) {
# This is really an array index thingy ( $#array )
$t->{token} = PPI::Token::ArrayIndex->new( "$1" );
return PPI::Token::ArrayIndex->__TOKENIZER__on_char( $t );
return PPI::Token::ArrayIndex->__TOKENIZER__on_char( $t, $Document );
}

if ( $c =~ /^\$\^\w+$/o ) {
Expand Down Expand Up @@ -169,7 +169,7 @@ sub __TOKENIZER__on_char {
}

# End the current magic token, and recheck
$t->_finalize_token->__TOKENIZER__on_char( $t );
$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

# Our version of canonical is plain simple
Expand Down
5 changes: 2 additions & 3 deletions lib/PPI/Token/Number.pm
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ sub _literal {
# Tokenizer Methods

sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
my ( $class, $t, $Document ) = @_;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );

# Allow underscores straight through
Expand Down Expand Up @@ -118,7 +117,7 @@ sub __TOKENIZER__on_char {

# Doesn't fit a special case, or is after the end of the token
# End of token.
$t->_finalize_token->__TOKENIZER__on_char( $t );
$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

1;
Expand Down
5 changes: 2 additions & 3 deletions lib/PPI/Token/Number/Binary.pm
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ sub literal {
# Tokenizer Methods

sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
my ( undef, $t, $Document ) = @_;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );

# Allow underscores straight through
Expand All @@ -89,7 +88,7 @@ sub __TOKENIZER__on_char {

# Doesn't fit a special case, or is after the end of the token
# End of token.
$t->_finalize_token->__TOKENIZER__on_char( $t );
$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

1;
Expand Down
7 changes: 3 additions & 4 deletions lib/PPI/Token/Number/Exp.pm
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,7 @@ sub literal {
# Tokenizer Methods

sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
my ( undef, $t, $Document ) = @_;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );

# To get here, the token must have already encountered an 'E'
Expand All @@ -109,7 +108,7 @@ sub __TOKENIZER__on_char {
$t->{class} = $t->{token}->set_class('Number');
$t->_new_token('Operator', '.');
$t->_new_token('Word', $word);
return $t->{class}->__TOKENIZER__on_char( $t );
return $t->{class}->__TOKENIZER__on_char( $t, $Document );
}
else {
$t->{token}->{_error} = "Illegal character in exponent '$char'";
Expand All @@ -118,7 +117,7 @@ sub __TOKENIZER__on_char {

# Doesn't fit a special case, or is after the end of the token
# End of token.
$t->_finalize_token->__TOKENIZER__on_char( $t );
$t->_finalize_token->__TOKENIZER__on_char( $t, $Document );
}

1;
Expand Down
Loading

0 comments on commit 9b5f30a

Please sign in to comment.