From 082b961bdd4e71cfa947f4157bfe1585b7be7ad1 Mon Sep 17 00:00:00 2001 From: Christian Walde Date: Tue, 19 Jul 2022 13:26:12 +0200 Subject: [PATCH] proof-of-concept for feature-tracking and perl sub signatures (see #273) --- lib/PPI/Document.pm | 44 +++++++--- lib/PPI/Element.pm | 18 +++- lib/PPI/Lexer.pm | 42 ++++----- lib/PPI/Statement/Include.pm | 34 ++++++++ lib/PPI/Token.pm | 1 + lib/PPI/Token/ArrayIndex.pm | 4 +- lib/PPI/Token/Attribute.pm | 7 +- lib/PPI/Token/Cast.pm | 4 +- lib/PPI/Token/Comment.pm | 4 +- lib/PPI/Token/DashedWord.pm | 4 +- lib/PPI/Token/HereDoc.pm | 8 +- lib/PPI/Token/Magic.pm | 12 +-- lib/PPI/Token/Number.pm | 5 +- lib/PPI/Token/Number/Binary.pm | 5 +- lib/PPI/Token/Number/Exp.pm | 7 +- lib/PPI/Token/Number/Float.pm | 7 +- lib/PPI/Token/Number/Hex.pm | 5 +- lib/PPI/Token/Number/Octal.pm | 5 +- lib/PPI/Token/Number/Version.pm | 9 +- lib/PPI/Token/Operator.pm | 8 +- lib/PPI/Token/Prototype.pm | 5 +- lib/PPI/Token/Signature.pm | 55 ++++++++++++ lib/PPI/Token/Structure.pm | 2 +- lib/PPI/Token/Symbol.pm | 12 +-- lib/PPI/Token/Unknown.pm | 30 +++---- lib/PPI/Token/Whitespace.pm | 27 ++++-- lib/PPI/Token/Word.pm | 21 +++-- lib/PPI/Tokenizer.pm | 12 +-- t/feature_tracking.t | 145 ++++++++++++++++++++++++++++++++ 29 files changed, 409 insertions(+), 133 deletions(-) create mode 100644 lib/PPI/Token/Signature.pm create mode 100644 t/feature_tracking.t diff --git a/lib/PPI/Document.pm b/lib/PPI/Document.pm index 950c1250..3681e638 100644 --- a/lib/PPI/Document.pm +++ b/lib/PPI/Document.pm @@ -128,17 +128,22 @@ In all cases, the document is considered to be "anonymous" and not tied back to where it was created from. Specifically, if you create a PPI::Document from a filename, the document will B remember where it was created from. +Returns a C object, or C if parsing fails. +L objects can also be thrown if there are parsing problems. + The constructor also takes attribute flags. -At this time, the only available attribute is the C flag. +=head3 readonly -Setting C to true will allow various systems to provide -additional optimisations and caching. Note that because C is an -optimisation flag, it is off by default and you will need to explicitly -enable it. +Setting C to true will allow various systems to provide additional +optimisations and caching. Note that because C is an optimisation +flag, it is off by default and you will need to explicitly enable it. -Returns a C object, or C if parsing fails. -L objects can also be thrown if there are parsing problems. +=head3 feature_mods + +Setting feature_mods with a hashref allows defining perl parsing features to be +enabled for the whole document. (e.g. when the code is assumed to be run as a +oneliner) =cut @@ -181,25 +186,25 @@ sub new { my $document = $CACHE->get_document($file_contents); return $class->_setattr( $document, %attr ) if $document; - $document = PPI::Lexer->lex_source( $$file_contents ); + $document = PPI::Lexer->lex_source( $$file_contents, %attr ); if ( $document ) { # Save in the cache $CACHE->store_document( $document ); - return $class->_setattr( $document, %attr ); + return $document; } } else { - my $document = PPI::Lexer->lex_file( $source ); + my $document = PPI::Lexer->lex_file( $source, %attr ); return $class->_setattr( $document, %attr ) if $document; } } elsif ( _SCALAR0($source) ) { - my $document = PPI::Lexer->lex_source( $$source ); - return $class->_setattr( $document, %attr ) if $document; + my $document = PPI::Lexer->lex_source( $$source, %attr ); + return $document if $document; } elsif ( _ARRAY0($source) ) { $source = join '', map { "$_\n" } @$source; - my $document = PPI::Lexer->lex_source( $source ); - return $class->_setattr( $document, %attr ) if $document; + my $document = PPI::Lexer->lex_source( $source, %attr ); + return $document if $document; } else { $class->_error("Unknown object or reference was passed to PPI::Document::new"); @@ -229,6 +234,7 @@ sub _setattr { my ($class, $document, %attr) = @_; $document->{readonly} = !! $attr{readonly}; $document->{filename} = $attr{filename}; + $document->{feature_mods} = $attr{feature_mods}; return $document; } @@ -344,6 +350,16 @@ sub tab_width { $self->{tab_width} = shift; } +=head2 feature_mods { feature_name => $enabled } + +=cut + +sub feature_mods { + my $self = shift; + return $self->{feature_mods} unless @_; + $self->{feature_mods} = shift; +} + =pod =head2 save diff --git a/lib/PPI/Element.pm b/lib/PPI/Element.pm index 2ee74d53..5b16a285 100644 --- a/lib/PPI/Element.pm +++ b/lib/PPI/Element.pm @@ -467,9 +467,25 @@ sub previous_token { } } +sub presumed_features { + my ($self) = @_; + + my @feature_mods; + my $walker = $self; + while ($walker) { + my $sib_walk = $walker; + while ($sib_walk) { + push @feature_mods, $sib_walk if $sib_walk->can("feature_mods"); + $sib_walk = $sib_walk->sprevious_sibling; + } + $walker = $walker->parent; + } + my %feature_mods = map %{$_}, reverse grep defined, map $_->feature_mods, + @feature_mods; - + return \%feature_mods; +} ##################################################################### # Manipulation diff --git a/lib/PPI/Lexer.pm b/lib/PPI/Lexer.pm index ce547c3d..c36c81db 100644 --- a/lib/PPI/Lexer.pm +++ b/lib/PPI/Lexer.pm @@ -143,6 +143,7 @@ sub lex_file { unless ( defined $file ) { return $self->_error("Did not pass a filename to PPI::Lexer::lex_file"); } + my %args = @_; # Create the Tokenizer my $Tokenizer = eval { @@ -154,7 +155,7 @@ sub lex_file { return $self->_error( $errstr ); } - $self->lex_tokenizer( $Tokenizer ); + $self->lex_tokenizer( $Tokenizer, %args ); } =pod @@ -175,6 +176,7 @@ sub lex_source { unless ( defined $source and not ref $source ) { return $self->_error("Did not pass a string to PPI::Lexer::lex_source"); } + my %args = @_; # Create the Tokenizer and hand off to the next method my $Tokenizer = eval { @@ -186,7 +188,7 @@ sub lex_source { return $self->_error( $errstr ); } - $self->lex_tokenizer( $Tokenizer ); + $self->lex_tokenizer( $Tokenizer, %args ); } =pod @@ -206,9 +208,11 @@ sub lex_tokenizer { return $self->_error( "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer" ) unless $Tokenizer; + my %args = @_; # Create the empty document my $Document = PPI::Document->new; + ref($Document)->_setattr( $Document, %args ) if keys %args; # Lex the token stream into the document $self->{Tokenizer} = $Tokenizer; @@ -239,7 +243,7 @@ sub _lex_document { # Start the processing loop my $Token; - while ( ref($Token = $self->_get_token) ) { + while ( ref($Token = $self->_get_token($Document)) ) { # Add insignificant tokens directly beneath us unless ( $Token->significant ) { $self->_add_element( $Document, $Token ); @@ -264,7 +268,7 @@ sub _lex_document { # Move the lexing down into the statement $self->_add_delayed( $Document ); $self->_add_element( $Document, $Statement ); - $self->_lex_statement( $Statement ); + $self->_lex_statement( $Statement, $Document ); next; } @@ -275,7 +279,7 @@ sub _lex_document { $self->_rollback( $Token ); my $Statement = PPI::Statement->new; $self->_add_element( $Document, $Statement ); - $self->_lex_statement( $Statement ); + $self->_lex_statement( $Statement, $Document ); next; } @@ -384,7 +388,7 @@ my %STATEMENT_CLASSES = ( ); sub _statement { - my ($self, $Parent, $Token) = @_; + my ($self, $Parent, $Token, $Document) = @_; # my $self = shift; # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1"; # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2"; @@ -399,7 +403,7 @@ sub _statement { # Is the next significant token a => # Read ahead to the next significant token my $Next; - while ( $Next = $self->_get_token ) { + while ( $Next = $self->_get_token($Document) ) { unless ( $Next->significant ) { push @{$self->{delayed}}, $Next; # $self->_delay_element( $Next ); @@ -620,7 +624,7 @@ sub _statement { } sub _lex_statement { - my ($self, $Statement) = @_; + my ( $self, $Statement, $Document ) = @_; # my $self = shift; # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1"; @@ -631,7 +635,7 @@ sub _lex_statement { # Begin processing tokens my $Token; - while ( ref( $Token = $self->_get_token ) ) { + while ( ref( $Token = $self->_get_token($Document) ) ) { # Delay whitespace and comment tokens unless ( $Token->significant ) { push @{$self->{delayed}}, $Token; @@ -675,12 +679,12 @@ sub _lex_statement { # Determine the class for the structure and create it my $method = $RESOLVE{$Token->content}; - my $Structure = $self->$method($Statement)->new($Token); + my $Structure = $self->$method( $Statement, $Document )->new($Token); # Move the lexing down into the Structure $self->_add_delayed( $Statement ); $self->_add_element( $Statement, $Structure ); - $self->_lex_structure( $Structure ); + $self->_lex_structure( $Structure, $Document ); } # Was it an error in the tokenizer? @@ -1130,7 +1134,7 @@ my @CURLY_LOOKAHEAD_CLASSES = ( # Given a parent element, and a { token to open a structure, determine # the class that the structure should be. sub _curly { - my ($self, $Parent) = @_; + my ( $self, $Parent, $Document ) = @_; # my $self = shift; # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1"; @@ -1230,7 +1234,7 @@ sub _curly { my $Next; my $position = 0; my @delayed; - while ( $Next = $self->_get_token ) { + while ( $Next = $self->_get_token($Document) ) { unless ( $Next->significant ) { push @delayed, $Next; next; @@ -1263,13 +1267,13 @@ sub _curly { sub _lex_structure { - my ($self, $Structure) = @_; + my ( $self, $Structure, $Document ) = @_; # my $self = shift; # my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1"; # Start the processing loop my $Token; - while ( ref($Token = $self->_get_token) ) { + while ( ref($Token = $self->_get_token($Document)) ) { # Is this a direct type token unless ( $Token->significant ) { push @{$self->{delayed}}, $Token; @@ -1284,11 +1288,11 @@ sub _lex_structure { $self->_add_delayed( $Structure ); # Determine the class for the Statement and create it - my $Statement = $self->_statement($Structure, $Token)->new($Token); + my $Statement = $self->_statement($Structure, $Token, $Document)->new($Token); # Move the lexing down into the Statement $self->_add_element( $Structure, $Statement ); - $self->_lex_statement( $Statement ); + $self->_lex_statement( $Statement, $Document ); next; } @@ -1299,7 +1303,7 @@ sub _lex_structure { $self->_rollback( $Token ); my $Statement = PPI::Statement->new; $self->_add_element( $Structure, $Statement ); - $self->_lex_statement( $Statement ); + $self->_lex_statement( $Statement, $Document ); next; } @@ -1363,7 +1367,7 @@ sub _lex_structure { # Get the next token for processing, handling buffering sub _get_token { - shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token; + shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token($_[1]); } # Old long version of the above diff --git a/lib/PPI/Statement/Include.pm b/lib/PPI/Statement/Include.pm index ae8d3120..156a6cf1 100644 --- a/lib/PPI/Statement/Include.pm +++ b/lib/PPI/Statement/Include.pm @@ -48,6 +48,8 @@ use strict; use PPI::Statement (); use PPI::Statement::Include::Perl6 (); +use feature (); + our $VERSION = '1.277'; our @ISA = "PPI::Statement"; @@ -236,6 +238,38 @@ sub arguments { return @args; } +sub feature_mods { + my ($self) = @_; + + my %known = ( signatures => 1 ); + + return if $self->type eq "require"; + + if ( my $perl_version = $self->version ) { + ## crude proof of concept hack due to above + return { signatures => 1 } if $perl_version >= 5.035; + + # # tried using feature.pm here, but it is impossible to install + # # future versions of it, so e.g. a 5.20 install cannot know about + # # 5.36 features + # $perl_version = join ".", # + # ( split /\./, $perl_version )[0], + # 0 + ( split /\./, $perl_version )[1]; + # my $bundle = $feature::feature_bundle{$perl_version}; + # return { map +( $_ => 1 ), %{$bundle} }; + } + + if ( $self->module eq "feature" ) { + my @features = grep $known{$_}, map $_->literal, + map $_->isa("PPI::Structure::List") ? $_->children : $_, + $self->arguments; + my $on_or_off = $self->type eq "use" ? 1 : 0; + return { map +( $_ => $on_or_off ), @features } if @features; + } + + return; +} + 1; =pod diff --git a/lib/PPI/Token.pm b/lib/PPI/Token.pm index e261fb18..41609116 100644 --- a/lib/PPI/Token.pm +++ b/lib/PPI/Token.pm @@ -70,6 +70,7 @@ use PPI::Token::Separator (); use PPI::Token::Data (); use PPI::Token::End (); use PPI::Token::Prototype (); +use PPI::Token::Signature (); use PPI::Token::Attribute (); use PPI::Token::Unknown (); diff --git a/lib/PPI/Token/ArrayIndex.pm b/lib/PPI/Token/ArrayIndex.pm index 84005710..495e01dd 100644 --- a/lib/PPI/Token/ArrayIndex.pm +++ b/lib/PPI/Token/ArrayIndex.pm @@ -39,7 +39,7 @@ our @ISA = "PPI::Token"; # Tokenizer Methods sub __TOKENIZER__on_char { - my $t = $_[1]; + my ( undef, $t, $Document ) = @_; # Suck in till the end of the arrayindex pos $t->{line} = $t->{line_cursor}; @@ -49,7 +49,7 @@ sub __TOKENIZER__on_char { } # End of token - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/Attribute.pm b/lib/PPI/Token/Attribute.pm index cb5e80a7..74d30283 100644 --- a/lib/PPI/Token/Attribute.pm +++ b/lib/PPI/Token/Attribute.pm @@ -88,14 +88,13 @@ sub parameters { # Tokenizer Methods sub __TOKENIZER__on_char { - my $class = shift; - my $t = shift; + my ( $class, $t, $Document ) = @_; my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Unless this is a '(', we are finished. unless ( $char eq '(' ) { # Finalise and recheck - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } # This is a bar(...) style attribute. @@ -111,7 +110,7 @@ sub __TOKENIZER__on_char { # Found the end of the attribute $t->{token}->{content} .= $string; - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } # Scan for a close braced, and take into account both escaping, diff --git a/lib/PPI/Token/Cast.pm b/lib/PPI/Token/Cast.pm index f0144b14..b5030309 100644 --- a/lib/PPI/Token/Cast.pm +++ b/lib/PPI/Token/Cast.pm @@ -52,14 +52,14 @@ our %POSTFIX = map { $_ => 1 } ( # A cast is either % @ $ or $# # and also postfix dereference are %* @* $* $#* sub __TOKENIZER__on_char { - my $t = $_[1]; + my ( undef, $t, $Document ) = @_; my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Are we still an operator if we add the next character my $content = $t->{token}->{content}; return 1 if $POSTFIX{ $content . $char }; - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/Comment.pm b/lib/PPI/Token/Comment.pm index ba0f82c0..8750c419 100644 --- a/lib/PPI/Token/Comment.pm +++ b/lib/PPI/Token/Comment.pm @@ -71,11 +71,11 @@ sub significant() { '' } # Most stuff goes through __TOKENIZER__commit. # This is such a rare case, do char at a time to keep the code small sub __TOKENIZER__on_char { - my $t = $_[1]; + my ( undef, $t, $Document ) = @_; # Make sure not to include the trailing newline if ( substr( $t->{line}, $t->{line_cursor}, 1 ) eq "\n" ) { - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/DashedWord.pm b/lib/PPI/Token/DashedWord.pm index 61ca51b6..7843192e 100644 --- a/lib/PPI/Token/DashedWord.pm +++ b/lib/PPI/Token/DashedWord.pm @@ -50,7 +50,7 @@ C because C<-Foo'Bar> expands to C<-Foo::Bar>. # Tokenizer Methods sub __TOKENIZER__on_char { - my $t = $_[1]; + my ( undef, $t, $Document ) = @_; # Suck to the end of the dashed bareword pos $t->{line} = $t->{line_cursor}; @@ -68,7 +68,7 @@ sub __TOKENIZER__on_char { $t->{class} = $t->{token}->set_class( 'Word' ); } - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/HereDoc.pm b/lib/PPI/Token/HereDoc.pm index 6d9ab5d6..5f88735b 100644 --- a/lib/PPI/Token/HereDoc.pm +++ b/lib/PPI/Token/HereDoc.pm @@ -169,7 +169,7 @@ sub _is_match_indent { # Parse in the entire here-doc in one call sub __TOKENIZER__on_char { - my ( $self, $t ) = @_; + my ( $self, $t, $Document ) = @_; # We are currently located on the first char after the << @@ -182,7 +182,7 @@ sub __TOKENIZER__on_char { if ( $t->{line} !~ m/\G( ~? \s* (?: "[^"]*" | '[^']*' | `[^`]*` | \\?\w+ ) )/gcx ) { # Degenerate to a left-shift operation $t->{token}->set_class('Operator'); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } # Add the rest of the token, work out what type it is, @@ -254,7 +254,7 @@ sub __TOKENIZER__on_char { } # The HereDoc is now fully parsed - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } # Add the line @@ -300,7 +300,7 @@ sub __TOKENIZER__on_char { $token->{_damaged} = 1; # The HereDoc is not fully parsed - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/Magic.pm b/lib/PPI/Token/Magic.pm index 208c493e..37747197 100644 --- a/lib/PPI/Token/Magic.pm +++ b/lib/PPI/Token/Magic.pm @@ -51,7 +51,7 @@ our $VERSION = '1.277'; our @ISA = "PPI::Token::Symbol"; sub __TOKENIZER__on_char { - my $t = $_[1]; + my ( undef, $t, $Document ) = @_; # $c is the candidate new content my $c = $t->{token}->{content} . substr( $t->{line}, $t->{line_cursor}, 1 ); @@ -69,13 +69,13 @@ sub __TOKENIZER__on_char { if ( $c =~ /^\$\'\d$/ ) { # In this case, we have a magic plus a digit. # Save the CURRENT token, and rerun the on_char - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } # A symbol in the style $_foo or $::foo or $'foo. # Overwrite the current token $t->{class} = $t->{token}->set_class('Symbol'); - return PPI::Token::Symbol->__TOKENIZER__on_char( $t ); + return PPI::Token::Symbol->__TOKENIZER__on_char( $t, $Document ); } if ( $c =~ /^\$\$\w/ ) { @@ -107,13 +107,13 @@ sub __TOKENIZER__on_char { # This is really an index dereferencing cast, although # it has the same two chars as the magic variable $#. $t->{class} = $t->{token}->set_class('Cast'); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } if ( $c =~ /^(\$\#)\w/ ) { # This is really an array index thingy ( $#array ) $t->{token} = PPI::Token::ArrayIndex->new( "$1" ); - return PPI::Token::ArrayIndex->__TOKENIZER__on_char( $t ); + return PPI::Token::ArrayIndex->__TOKENIZER__on_char( $t, $Document ); } if ( $c =~ /^\$\^\w+$/o ) { @@ -169,7 +169,7 @@ sub __TOKENIZER__on_char { } # End the current magic token, and recheck - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } # Our version of canonical is plain simple diff --git a/lib/PPI/Token/Number.pm b/lib/PPI/Token/Number.pm index df3901f4..c9dd11c3 100644 --- a/lib/PPI/Token/Number.pm +++ b/lib/PPI/Token/Number.pm @@ -76,8 +76,7 @@ sub _literal { # Tokenizer Methods sub __TOKENIZER__on_char { - my $class = shift; - my $t = shift; + my ( $class, $t, $Document ) = @_; my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Allow underscores straight through @@ -118,7 +117,7 @@ sub __TOKENIZER__on_char { # Doesn't fit a special case, or is after the end of the token # End of token. - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/Number/Binary.pm b/lib/PPI/Token/Number/Binary.pm index 5ade9397..212fb601 100644 --- a/lib/PPI/Token/Number/Binary.pm +++ b/lib/PPI/Token/Number/Binary.pm @@ -72,8 +72,7 @@ sub literal { # Tokenizer Methods sub __TOKENIZER__on_char { - my $class = shift; - my $t = shift; + my ( undef, $t, $Document ) = @_; my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Allow underscores straight through @@ -89,7 +88,7 @@ sub __TOKENIZER__on_char { # Doesn't fit a special case, or is after the end of the token # End of token. - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/Number/Exp.pm b/lib/PPI/Token/Number/Exp.pm index 06db05d2..0955ed0f 100644 --- a/lib/PPI/Token/Number/Exp.pm +++ b/lib/PPI/Token/Number/Exp.pm @@ -86,8 +86,7 @@ sub literal { # Tokenizer Methods sub __TOKENIZER__on_char { - my $class = shift; - my $t = shift; + my ( undef, $t, $Document ) = @_; my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # To get here, the token must have already encountered an 'E' @@ -109,7 +108,7 @@ sub __TOKENIZER__on_char { $t->{class} = $t->{token}->set_class('Number'); $t->_new_token('Operator', '.'); $t->_new_token('Word', $word); - return $t->{class}->__TOKENIZER__on_char( $t ); + return $t->{class}->__TOKENIZER__on_char( $t, $Document ); } else { $t->{token}->{_error} = "Illegal character in exponent '$char'"; @@ -118,7 +117,7 @@ sub __TOKENIZER__on_char { # Doesn't fit a special case, or is after the end of the token # End of token. - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/Number/Float.pm b/lib/PPI/Token/Number/Float.pm index 3ef061f7..4909dae7 100644 --- a/lib/PPI/Token/Number/Float.pm +++ b/lib/PPI/Token/Number/Float.pm @@ -70,9 +70,8 @@ sub literal { # Tokenizer Methods sub __TOKENIZER__on_char { - my $class = shift; - my $t = shift; - my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); + my ( $class, $t, $Document ) = @_; + my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Allow underscores straight through return 1 if $char eq '_'; @@ -112,7 +111,7 @@ sub __TOKENIZER__on_char { # Doesn't fit a special case, or is after the end of the token # End of token. - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/Number/Hex.pm b/lib/PPI/Token/Number/Hex.pm index c0b4a0ed..7feb5c87 100644 --- a/lib/PPI/Token/Number/Hex.pm +++ b/lib/PPI/Token/Number/Hex.pm @@ -67,8 +67,7 @@ sub literal { # Tokenizer Methods sub __TOKENIZER__on_char { - my $class = shift; - my $t = shift; + my ( undef, $t, $Document ) = @_; my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Allow underscores straight through @@ -80,7 +79,7 @@ sub __TOKENIZER__on_char { # Doesn't fit a special case, or is after the end of the token # End of token. - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/Number/Octal.pm b/lib/PPI/Token/Number/Octal.pm index e1801710..6a88c4ca 100644 --- a/lib/PPI/Token/Number/Octal.pm +++ b/lib/PPI/Token/Number/Octal.pm @@ -68,8 +68,7 @@ sub literal { # Tokenizer Methods sub __TOKENIZER__on_char { - my $class = shift; - my $t = shift; + my ( undef, $t, $Document ) = @_; my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Allow underscores straight through @@ -85,7 +84,7 @@ sub __TOKENIZER__on_char { # Doesn't fit a special case, or is after the end of the token # End of token. - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/Number/Version.pm b/lib/PPI/Token/Number/Version.pm index 3494439e..be099b98 100644 --- a/lib/PPI/Token/Number/Version.pm +++ b/lib/PPI/Token/Number/Version.pm @@ -70,8 +70,7 @@ sub literal { # Tokenizer Methods sub __TOKENIZER__on_char { - my $class = shift; - my $t = shift; + my ( undef, $t, $Document ) = @_; my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Allow digits @@ -106,11 +105,11 @@ sub __TOKENIZER__on_char { # Doesn't fit a special case, or is after the end of the token # End of token. - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } sub __TOKENIZER__commit { - my $t = $_[1]; + my ( undef, $t, $Document ) = @_; # Capture the rest of the token pos $t->{line} = $t->{line_cursor}; @@ -128,7 +127,7 @@ sub __TOKENIZER__commit { # This is a v-string $t->{line_cursor} += length $content; $t->_new_token( 'Number::Version', $content ); - $t->_finalize_token->__TOKENIZER__on_char($t); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/Operator.pm b/lib/PPI/Token/Operator.pm index b3178da3..699c88c2 100644 --- a/lib/PPI/Token/Operator.pm +++ b/lib/PPI/Token/Operator.pm @@ -55,7 +55,7 @@ our @ISA = "PPI::Token"; # Tokenizer Methods sub __TOKENIZER__on_char { - my $t = $_[1]; + my ( undef, $t, $Document ) = @_; my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Are we still an operator if we add the next character @@ -72,7 +72,7 @@ sub __TOKENIZER__on_char { if ( $char =~ /^[0-9]$/ ) { # This is a decimal number $t->{class} = $t->{token}->set_class('Number::Float'); - return $t->{class}->__TOKENIZER__on_char( $t ); + return $t->{class}->__TOKENIZER__on_char( $t, $Document ); } } @@ -87,7 +87,7 @@ sub __TOKENIZER__on_char { # This is a here-doc. # Change the class and move to the HereDoc's own __TOKENIZER__on_char method. $t->{class} = $t->{token}->set_class('HereDoc'); - return $t->{class}->__TOKENIZER__on_char( $t ); + return $t->{class}->__TOKENIZER__on_char( $t, $Document ); } } @@ -96,7 +96,7 @@ sub __TOKENIZER__on_char { if $content eq '<>' or $content eq '<<>>'; # Finalize normally - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/Prototype.pm b/lib/PPI/Token/Prototype.pm index 1f654b7c..acffcd5e 100644 --- a/lib/PPI/Token/Prototype.pm +++ b/lib/PPI/Token/Prototype.pm @@ -54,8 +54,7 @@ our $VERSION = '1.277'; our @ISA = "PPI::Token"; sub __TOKENIZER__on_char { - my $class = shift; - my $t = shift; + my ( undef, $t, $Document ) = @_; # Suck in until we find the closing paren (or the end of line) pos $t->{line} = $t->{line_cursor}; @@ -67,7 +66,7 @@ sub __TOKENIZER__on_char { return 0 unless $1 =~ /\)$/; # Found the closing paren - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } =pod diff --git a/lib/PPI/Token/Signature.pm b/lib/PPI/Token/Signature.pm new file mode 100644 index 00000000..0ecf8556 --- /dev/null +++ b/lib/PPI/Token/Signature.pm @@ -0,0 +1,55 @@ +package PPI::Token::Signature; + +=pod + +=head1 NAME + +PPI::Token::Signature - A subroutine signature descriptor + +=head1 INHERITANCE + + PPI::Token::Signature + isa PPI::Token::Prototype + isa PPI::Token + isa PPI::Element + +=head1 SYNOPSIS + + TODO: document + +=head1 DESCRIPTION + + TODO: document + +=cut + +use strict; +use PPI::Token::Prototype (); + +our $VERSION = '1.276'; + +our @ISA = "PPI::Token::Prototype"; + +1; + +=pod + +=head1 SUPPORT + +See the L in the main module. + +=head1 AUTHOR + +Adam Kennedy Eadamk@cpan.orgE + +=head1 COPYRIGHT + +Copyright 2001 - 2011 Adam Kennedy. + +This program is free software; you can redistribute +it and/or modify it under the same terms as Perl itself. + +The full text of the license can be found in the +LICENSE file included with this module. + +=cut diff --git a/lib/PPI/Token/Structure.pm b/lib/PPI/Token/Structure.pm index 0e0ecd1a..a07e0ad3 100644 --- a/lib/PPI/Token/Structure.pm +++ b/lib/PPI/Token/Structure.pm @@ -66,7 +66,7 @@ my %CLOSES = ( sub __TOKENIZER__on_char { # Structures are one character long, always. # Finalize and process again. - $_[1]->_finalize_token->__TOKENIZER__on_char( $_[1] ); + $_[1]->_finalize_token->__TOKENIZER__on_char( $_[1], $_[2] ); } sub __TOKENIZER__commit { diff --git a/lib/PPI/Token/Symbol.pm b/lib/PPI/Token/Symbol.pm index 20295883..6fdcbef9 100644 --- a/lib/PPI/Token/Symbol.pm +++ b/lib/PPI/Token/Symbol.pm @@ -158,7 +158,7 @@ sub symbol_type { # Tokenizer Methods sub __TOKENIZER__on_char { - my $t = $_[1]; + my ( undef, $t, $Document ) = @_; # Suck in till the end of the symbol pos $t->{line} = $t->{line_cursor}; @@ -171,7 +171,7 @@ sub __TOKENIZER__on_char { my $content = $t->{token}->{content}; if ( $content eq '@_' or $content eq '$_' ) { $t->{class} = $t->{token}->set_class( 'Magic' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } # Shortcut for most of the X:: symbols @@ -183,17 +183,17 @@ sub __TOKENIZER__on_char { $t->{line_cursor}++; $t->{class} = $t->{token}->set_class( 'Magic' ); } - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } if ( $content =~ /^[\$%*@&]::(?:[^\w]|$)/ ) { my $current = substr( $content, 0, 3, '' ); $t->{token}->{content} = $current; $t->{line_cursor} -= length( $content ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } if ( $content =~ /^(?:\$|\@)\d+/ ) { $t->{class} = $t->{token}->set_class( 'Magic' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } # Trim off anything we oversucked... @@ -213,7 +213,7 @@ sub __TOKENIZER__on_char { $t->{token}->{content} = $1; } - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } 1; diff --git a/lib/PPI/Token/Unknown.pm b/lib/PPI/Token/Unknown.pm index 5a0599e1..80076a87 100644 --- a/lib/PPI/Token/Unknown.pm +++ b/lib/PPI/Token/Unknown.pm @@ -45,7 +45,7 @@ our @ISA = "PPI::Token"; # Tokenizer Methods sub __TOKENIZER__on_char { - my ( $self, $t ) = @_; # Self and Tokenizer + my ( $self, $t, $Document ) = @_; # Self and Tokenizer my $c = $t->{token}->{content}; # Current token my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Current character @@ -55,7 +55,7 @@ sub __TOKENIZER__on_char { if ( $char =~ /\d/ ) { # bitwise operator $t->{class} = $t->{token}->set_class( 'Operator' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } if ( $char =~ /[\w:]/ ) { @@ -95,7 +95,7 @@ sub __TOKENIZER__on_char { return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char); $t->{class} = $t->{token}->set_class( 'Operator' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); @@ -133,7 +133,7 @@ sub __TOKENIZER__on_char { # Must be a cast $t->{class} = $t->{token}->set_class( 'Cast' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); @@ -171,7 +171,7 @@ sub __TOKENIZER__on_char { # Must be a cast $t->{class} = $t->{token}->set_class( 'Cast' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); @@ -186,7 +186,7 @@ sub __TOKENIZER__on_char { } if ( $char eq '[' ) { $t->{class} = $t->{token}->set_class( 'Cast' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } } } @@ -195,7 +195,7 @@ sub __TOKENIZER__on_char { if ( $char =~ /\d/ ) { # bitwise operator $t->{class} = $t->{token}->set_class( 'Operator' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } # Is it a magic variable? @@ -227,7 +227,7 @@ sub __TOKENIZER__on_char { # Probably the mod operator $t->{class} = $t->{token}->set_class( 'Operator' ); - return $t->{class}->__TOKENIZER__on_char( $t ); + return $t->{class}->__TOKENIZER__on_char( $t, $Document ); @@ -245,7 +245,7 @@ sub __TOKENIZER__on_char { if ( $char =~ /\d/ ) { # bitwise operator $t->{class} = $t->{token}->set_class( 'Operator' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } if ( $char =~ /[\w:]/ ) { @@ -261,7 +261,7 @@ sub __TOKENIZER__on_char { # Probably the binary and operator $t->{class} = $t->{token}->set_class( 'Operator' ); - return $t->{class}->__TOKENIZER__on_char( $t ); + return $t->{class}->__TOKENIZER__on_char( $t, $Document ); @@ -285,7 +285,7 @@ sub __TOKENIZER__on_char { # The numeric negative operator $t->{class} = $t->{token}->set_class( 'Operator' ); - return $t->{class}->__TOKENIZER__on_char( $t ); + return $t->{class}->__TOKENIZER__on_char( $t, $Document ); @@ -303,12 +303,12 @@ sub __TOKENIZER__on_char { # This : is an attribute indicator $t->{class} = $t->{token}->set_class( 'Operator' ); $t->{token}->{_attribute} = 1; - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } # It MIGHT be a label, but it's probably the ?: trinary operator $t->{class} = $t->{token}->set_class( 'Operator' ); - return $t->{class}->__TOKENIZER__on_char( $t ); + return $t->{class}->__TOKENIZER__on_char( $t, $Document ); } # erm... @@ -326,10 +326,10 @@ sub _is_cast_or_op { } sub _as_cast_or_op { - my ( $self, $t ) = @_; + my ( undef, $t, $Document ) = @_; my $class = _cast_or_op( $t ); $t->{class} = $t->{token}->set_class( $class ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } sub _prev_significant_w_cursor { diff --git a/lib/PPI/Token/Whitespace.pm b/lib/PPI/Token/Whitespace.pm index 2874b3da..f5687e5b 100644 --- a/lib/PPI/Token/Whitespace.pm +++ b/lib/PPI/Token/Whitespace.pm @@ -192,12 +192,13 @@ sub __TOKENIZER__on_line_start { } sub __TOKENIZER__on_char { - my $t = $_[1]; + my ( undef, $t, $Document ) = @_; my $c = substr $t->{line}, $t->{line_cursor}, 1; my $char = ord $c; # Do we definitely know what something is? - return $COMMITMAP{$char}->__TOKENIZER__commit($t) if $COMMITMAP{$char}; + return $COMMITMAP{$char}->__TOKENIZER__commit( $t, $Document ) + if $COMMITMAP{$char}; # Handle the simple option first return $CLASSMAP{$char} if $CLASSMAP{$char}; @@ -212,8 +213,21 @@ sub __TOKENIZER__on_char { # 2. The one before that is the word 'sub'. # 3. The one before that is a 'structure' - # Get the three previous significant tokens - my @tokens = $t->_previous_significant_tokens(3); + # Get at least the three previous significant tokens, and extend the + # retrieval range to include at least one token that can walk the + # already generated tree. (i.e. has a parent) + my ( $tokens_to_get, @tokens ) = (3); + while ( !@tokens or ( $tokens[-1] and !$tokens[-1]->parent ) ) { + @tokens = $t->_previous_significant_tokens($tokens_to_get); + last if @tokens < $tokens_to_get; + $tokens_to_get++; + } + + my ($closest_parented_token) = grep $_->parent, @tokens; + die "no parented element found" unless # + $closest_parented_token ||= $Document; + return 'Signature' + if $closest_parented_token->presumed_features->{signatures}; # A normal subroutine declaration my $p1 = $tokens[1]; @@ -402,7 +416,7 @@ sub __TOKENIZER__on_char { # Otherwise, commit like a normal bareword, including x # operator followed by whitespace. - return PPI::Token::Word->__TOKENIZER__commit($t); + return PPI::Token::Word->__TOKENIZER__commit( $t, $Document ); } elsif ( $char == 45 ) { # $char eq '-' # Look for an obvious operator operand context @@ -415,7 +429,8 @@ sub __TOKENIZER__on_char { } } elsif ( $char >= 128 ) { # Outside ASCII - return 'PPI::Token::Word'->__TOKENIZER__commit($t) if $c =~ /\w/; + return 'PPI::Token::Word'->__TOKENIZER__commit( $t, $Document ) + if $c =~ /\w/; return 'Whitespace' if $c =~ /\s/; } diff --git a/lib/PPI/Token/Word.pm b/lib/PPI/Token/Word.pm index 32e8cc94..689537d1 100644 --- a/lib/PPI/Token/Word.pm +++ b/lib/PPI/Token/Word.pm @@ -118,8 +118,7 @@ sub method_call { sub __TOKENIZER__on_char { - my $class = shift; - my $t = shift; + my ( undef, $t, $Document ) = @_; # Suck in till the end of the bareword pos $t->{line} = $t->{line_cursor}; @@ -147,26 +146,26 @@ sub __TOKENIZER__on_char { # Check for a Perl keyword that is forced to be a normal word instead if ( $t->__current_token_is_forced_word ) { $t->{class} = $t->{token}->set_class( 'Word' ); - return $t->{class}->__TOKENIZER__on_char( $t ); + return $t->{class}->__TOKENIZER__on_char( $t, $Document ); } # Check for a quote like operator. %QUOTELIKE must be subset of %KEYWORDS if ( $QUOTELIKE{$word} ) { $t->{class} = $t->{token}->set_class( $QUOTELIKE{$word} ); - return $t->{class}->__TOKENIZER__on_char( $t ); + return $t->{class}->__TOKENIZER__on_char( $t, $Document ); } # Or one of the word operators. %OPERATOR must be subset of %KEYWORDS if ( $OPERATOR{$word} ) { $t->{class} = $t->{token}->set_class( 'Operator' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } } # Unless this is a simple identifier, at this point # it has to be a normal bareword if ( $word =~ /\:/ ) { - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + return $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } # If the NEXT character in the line is a colon, this @@ -184,7 +183,7 @@ sub __TOKENIZER__on_char { } # Finalise and process the character again - $t->_finalize_token->__TOKENIZER__on_char( $t ); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } @@ -192,7 +191,7 @@ sub __TOKENIZER__on_char { # We are committed to being a bareword. # Or so we would like to believe. sub __TOKENIZER__commit { - my ($class, $t) = @_; + my ( $class, $t, $Document ) = @_; # Our current position is the first character of the bareword. # Capture the bareword. @@ -216,7 +215,7 @@ sub __TOKENIZER__commit { if ( __current_token_is_attribute($t) ) { $t->_new_token( 'Attribute', $word ); return ($t->{line_cursor} >= $t->{line_length}) ? 0 - : $t->{class}->__TOKENIZER__on_char($t); + : $t->{class}->__TOKENIZER__on_char( $t, $Document ); } # Check for the end of the file @@ -281,7 +280,7 @@ sub __TOKENIZER__commit { # Special Case: A Quote-like operator $t->_new_token( $QUOTELIKE{$word}, $word ); return ($t->{line_cursor} >= $t->{line_length}) ? 0 - : $t->{class}->__TOKENIZER__on_char( $t ); + : $t->{class}->__TOKENIZER__on_char( $t, $Document ); } elsif ( $OPERATOR{$word} && ($word ne 'x' || $t->_current_x_is_operator) ) { # Word operator @@ -324,7 +323,7 @@ sub __TOKENIZER__commit { $t->_finalize_token; return 0; } - $t->_finalize_token->__TOKENIZER__on_char($t); + $t->_finalize_token->__TOKENIZER__on_char( $t, $Document ); } diff --git a/lib/PPI/Tokenizer.pm b/lib/PPI/Tokenizer.pm index d2cbfa7f..71d5bdad 100644 --- a/lib/PPI/Tokenizer.pm +++ b/lib/PPI/Tokenizer.pm @@ -279,7 +279,7 @@ reached the end of the file, or C on error. =cut sub get_token { - my $self = shift; + my ( $self, $Document ) = @_; # Shortcut for EOF if ( $self->{token_eof} @@ -300,7 +300,7 @@ sub get_token { # can start to convert code to exception-based code. my $rv = eval { # No token, we need to get some more - while ( $line_rv = $self->_process_next_line ) { + while ( $line_rv = $self->_process_next_line($Document) ) { # If there is something in the buffer, return it # The defined() prevents a ton of calls to PPI::Util::TRUE if ( defined( my $token = $self->{tokens}->[ $self->{token_cursor} ] ) ) { @@ -510,7 +510,7 @@ sub _char { # Returns 0 if EOF # Returns undef on error sub _process_next_line { - my $self = shift; + my ( $self, $Document ) = @_; # Fill the line buffer my $rv; @@ -536,7 +536,7 @@ sub _process_next_line { } # If we can't deal with the entire line, process char by char - while ( $rv = $self->_process_next_char ) {} + while ( $rv = $self->_process_next_char($Document) ) {} unless ( defined $rv ) { PPI::Exception->throw("Error at line $self->{line_count}, character $self->{line_cursor}"); } @@ -564,7 +564,7 @@ sub _process_next_line { # called, it has been fairly heavily in-lined, so the code # might look a bit ugly and duplicated. sub _process_next_char { - my $self = shift; + my ( $self, $Document ) = @_; ### FIXME - This checks for a screwed up condition that triggers ### several warnings, amongst other things. @@ -578,7 +578,7 @@ sub _process_next_char { # Pass control to the token class my $result; - unless ( $result = $self->{class}->__TOKENIZER__on_char( $self ) ) { + unless ( $result = $self->{class}->__TOKENIZER__on_char( $self, $Document ) ) { # undef is error. 0 is "Did stuff ourself, you don't have to do anything" return defined $result ? 1 : undef; } diff --git a/t/feature_tracking.t b/t/feature_tracking.t new file mode 100644 index 00000000..42e5283c --- /dev/null +++ b/t/feature_tracking.t @@ -0,0 +1,145 @@ +#!/usr/bin/perl + +use lib 't/lib'; +use PPI::Test::pragmas; +use Test::More tests => 3 + ( $ENV{AUTHOR_TESTING} ? 1 : 0 ); + +use B 'perlstring'; + +use PPI (); + +#use DB::Skip subs => [ +# qw( PPI::Document::new PPI::Lexer::lex_source PPI::Lexer::new +# PPI::Lexer::_clear PPI::Lexer::(eval) PPI::Lexer::X_TOKENIZER +# PPI::Tokenizer::new PPI::Lexer::lex_tokenizer PPI::Node::new ), +# qr/^PPI::Tokenizer::__ANON__.*237.*$/ +#]; + +sub test_document; + +FEATURE_TRACKING: { + test_document + <<'END_PERL', + sub meep(&$) {} + use 5.035; + sub marp($left, $right) {} +END_PERL + [ + 'PPI::Statement::Sub' => 'sub meep(&$) {}', + 'PPI::Token::Word' => 'sub', + 'PPI::Token::Word' => 'meep', + 'PPI::Token::Prototype' => '(&$)', + 'PPI::Structure::Block' => '{}', + 'PPI::Token::Structure' => '{', + 'PPI::Token::Structure' => '}', + 'PPI::Statement::Include' => 'use 5.035;', + 'PPI::Token::Word' => 'use', + 'PPI::Token::Number::Float' => '5.035', + 'PPI::Token::Structure' => ';', + 'PPI::Statement::Sub' => 'sub marp($left, $right) {}', + 'PPI::Token::Word' => 'sub', + 'PPI::Token::Word' => 'marp', + 'PPI::Token::Signature' => '($left, $right)', # !!!!!!!!!!!!!!!!!!!! + 'PPI::Structure::Block' => '{}', + 'PPI::Token::Structure' => '{', + 'PPI::Token::Structure' => '}', + ], + "enabling of features"; +} + +DOCUMENT_FEATURES: { + test_document + <<'END_PERL', + sub meep(&$) {} + sub marp($left, $right) {} +END_PERL + [ + 'PPI::Statement::Sub' => 'sub meep(&$) {}', + 'PPI::Token::Word' => 'sub', + 'PPI::Token::Word' => 'meep', + 'PPI::Token::Signature' => '(&$)', + 'PPI::Structure::Block' => '{}', + 'PPI::Token::Structure' => '{', + 'PPI::Token::Structure' => '}', + 'PPI::Statement::Sub' => 'sub marp($left, $right) {}', + 'PPI::Token::Word' => 'sub', + 'PPI::Token::Word' => 'marp', + 'PPI::Token::Signature' => '($left, $right)', + 'PPI::Structure::Block' => '{}', + 'PPI::Token::Structure' => '{', + 'PPI::Token::Structure' => '}', + ], + "document-level default features", + feature_mods => { signatures => 1 }; +} + +DISABLE_FEATURE: { + $DB::single = $DB::single = 1; + test_document + <<'END_PERL', + sub meep(&$) {} + no feature 'signatures'; + sub marp($left, $right) {} +END_PERL + [ + 'PPI::Statement::Sub' => 'sub meep(&$) {}', + 'PPI::Token::Word' => 'sub', + 'PPI::Token::Word' => 'meep', + 'PPI::Token::Signature' => '(&$)', + 'PPI::Structure::Block' => '{}', + 'PPI::Token::Structure' => '{', + 'PPI::Token::Structure' => '}', + 'PPI::Statement::Include' => q|no feature 'signatures';|, + 'PPI::Token::Word' => 'no', + 'PPI::Token::Word' => 'feature', + 'PPI::Token::Quote::Single' => q|'signatures'|, + 'PPI::Token::Structure' => ';', + 'PPI::Statement::Sub' => 'sub marp($left, $right) {}', + 'PPI::Token::Word' => 'sub', + 'PPI::Token::Word' => 'marp', + 'PPI::Token::Prototype' => '($left, $right)', + 'PPI::Structure::Block' => '{}', + 'PPI::Token::Structure' => '{', + 'PPI::Token::Structure' => '}', + ], + "disabling of features", + feature_mods => { signatures => 1 }; +} + +### TODO from ppi_token_unknown.t , deduplicate + +sub one_line_explain { + my ($data) = @_; + my @explain = explain $data; + s/\n//g for @explain; + return join "", @explain; +} + +sub main_level_line { + return "" if not $TODO; + my @outer_final; + my $level = 0; + while ( my @outer = caller( $level++ ) ) { + @outer_final = @outer; + } + return "l $outer_final[2] - "; +} + +sub test_document { + local $Test::Builder::Level = $Test::Builder::Level + 1; + my ( $code, $expected, $msg, @args ) = @_; + $msg = perlstring $code if !defined $msg; + + my $d = PPI::Document->new( \$code, @args ); + my $tokens = $d->find( sub { $_[1]->significant } ); + $tokens = [ map { ref($_), $_->content } @$tokens ]; + + my $ok = is_deeply( $tokens, $expected, main_level_line . $msg ); + if ( !$ok ) { + diag ">>> $code -- $msg\n"; + diag one_line_explain $tokens; + diag one_line_explain $expected; + } + + return; +}