#!/usr/bin/env perl
############################################################
#
#    perltidy - a perl script indenter and formatter
#
#    Copyright (c) 2000, 2001 by Steven L. Hancock
#    Distributed under the GPL license agreement; see file COPYING
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#    For brief instructions instructions, try 'perltidy -h'.
#    For more complete documentation, try 'man perltidy'.
#
#    This script was formatted with the command:
#
#      perltidy perltidy.pl
#
#    Code Contributions:
#      Thanks to Michael Cartmell for adaptation to VMS and for v-string help
#
############################################################

require 5.004;
$^W = 1;    # turn on warnings
use strict;
use vars qw{
  $VERSION
};

use IO::File;

BEGIN {
    $VERSION = <<'RCS';
    $Id: perltidy.pl,v 1.127 2001/03/28 19:33:31 steve Exp steve $
RCS
    chomp $VERSION;
    $VERSION =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/;
}

=pod

Here is a map of the flow of data from the input file to the output
file:

LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
      input                         groups                 output
      lines   tokens      lines       of     lines          lines
                                     lines

The names correspond to the package names responsible for the unit processes.

The overall process is controlled by the "main" package.

LineSource is the stream of input lines

Tokenizer analyzes a line and breaks it into tokens, peeking ahead
if necessary.  A token is any section of the input line which should be
manipulated as a single entity during formatting.  For example, a single
',' character is a token, and so is an entire side comment.  It handles
the complexities of Perl syntax, such as distinguishing between '<<' as
a shift operator and as a here-document, or distinguishing between '/'
as a divide symbol and as a pattern delimiter.  

Formatter inserts and deletes whitespace between tokens, and breaks
sequences of tokens at appropriate points as output lines.  It bases its
decisions on the default rules as modified by any command-line options. 

VerticalAligner collects groups of lines together and tries to line up
certain tokens, such as '=>', '#', and '=' by adding whitespace. 

FileWriter simply writes lines to the output stream.

The Logger package, not shown, records significant events and warning
messages.  It writes a .LOG file, which is automatically saved if
perltidy detects an error.  The .LOG file may also be saved with a
'-log' flag.

Some comments refer to test files, most of which are in the test directory.

=cut
sub main {

    # VMS file names are restricted to a 40.40 format, so
    # we append _tdy instead of .tdy, etc.  
    my $dot;
    my $dot_pattern;
    if ( $^O eq 'VMS' ) {
        $dot         = '_';
        $dot_pattern = '_';
    }
    else {
        $dot         = '.';
        $dot_pattern = '\.';    # must escape for use in regex
    }

    # handle command line options
    my ( $rOpts, $config_file, $rraw_options ) = process_command_line();
    Perltidy::Formatter::check_options($rOpts);
    if ( $rOpts->{'html'} ) {
        Perltidy::HtmlWriter->check_options($rOpts);
    }

    # create a diagnostics object if requested
    my $diagnostics_object = undef;
    if ( $rOpts->{'DIAGNOSTICS'} ) {
        $diagnostics_object = Perltidy::Diagnostics->new();
    }

    # loop to process all files in argument list
    unshift ( @ARGV, '-' ) unless @ARGV;
    my $number_of_files = @ARGV;
    my $input_file;
    while ( $input_file = shift @ARGV ) {
        my $fileroot;

        #---------------------------------------------------------------
        # determine the input file name
        #---------------------------------------------------------------
        if ( $input_file eq '-' ) {    # '-' indicates input from STDIN
            $fileroot = "perltidy";    # root name to use for .ERR, .LOG, etc
        }
        else {
            $fileroot = $input_file;

            unless ( -e $input_file ) {
                print "skipping file: $input_file: does not exist\n";
                next;
            }

            unless ( -f $input_file ) {
                print "skipping: $input_file: not a regular file\n";
                next;
            }

            unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
                print "skipping file: $input_file: Non-text\n";
                next;
            }
        }

        # Skip files with same extension as the output files
        # because this can lead to a messy situation
        # with files like script.tdy.tdy.tdy ... when you rerun
        # perltidy over and over with wildcard input
        my $output_extension = $rOpts->{'html'} ? "html" : "tdy";

        if ( ( $input_file =~
          /($dot_pattern)($output_extension|LOG|DEBUG|ERR|TEE|TMPI|TMPO)$/ )
          || ( $input_file eq 'DIAGNOSTICS' ) )
        {
            print "skipping file: $input_file: wrong extension\n";
            next;
        }

        # the 'source_object' supplies a method to read the input file
        my $source_object = Perltidy::LineSource->new( $input_file, $rOpts );
        next unless ($source_object);

        # register this file name with the Diagnostics package
        $diagnostics_object->set_input_file($input_file) if $diagnostics_object;

        #---------------------------------------------------------------
        # determine the output file name
        #---------------------------------------------------------------
        my $output_file = undef;

        if ( $rOpts->{'outfile'} ) {

            if ( $number_of_files <= 1 ) {

                if ( $rOpts->{'standard-output'} ) {
                    die "You may not use -o and -st together\n";
                }
                $output_file = $rOpts->{outfile};

                # make sure user gives a file name after -o
                if ( $output_file =~ /^-/ ) {
                    die "You must specify a valid filename after -o\n";
                }
            }
            else {
                die "You may not use -o with more than one input file\n";
            }
        }
        elsif ( $rOpts->{'standard-output'} ) {
            $output_file = '-';

            if ( $number_of_files <= 1 ) {
            }
            else {
                die "You may not use -st with more than one input file\n";
            }
        }
        elsif ( $input_file eq '-' ) {
            $output_file = $input_file;
        }
        else {
            $output_file = $fileroot . $dot . $output_extension;
        }

        # the 'sink_object' knows how to write the output file
        my $tee_file = $fileroot . $dot . "TEE";
        my $sink_object =
          Perltidy::LineSink->new( $output_file, $tee_file, $rOpts );

        #---------------------------------------------------------------
        # initialize the error logger
        #---------------------------------------------------------------
        my $warning_file = $fileroot . $dot . "ERR";
        my $log_file     = $fileroot . $dot . "LOG";

        my $logger_object =
          Perltidy::Logger->new( $rOpts, $log_file, $warning_file );
        write_logfile_header( $rOpts, $logger_object, $config_file,
          $rraw_options );

        #---------------------------------------------------------------
        # initialize the debug object, if any
        #---------------------------------------------------------------
        my $debugger_object = undef;
        if ( $rOpts->{DEBUG} ) {
            $debugger_object =
              Perltidy::Debugger->new( $fileroot . $dot . "DEBUG" );
        }

        #---------------------------------------------------------------
        # create a formatter for this file : html writer or pretty printer
        #---------------------------------------------------------------
        my $formatter;
        if ( $rOpts->{'html'} ) {
            $formatter = Perltidy::HtmlWriter->new( $fileroot, $output_file );
        }

        else {
            $formatter = Perltidy::Formatter->new(
              logger_object      => $logger_object,
              diagnostics_object => $diagnostics_object,
              sink_object        => $sink_object,
            );
        }

        #---------------------------------------------------------------
        # create the tokenizer for this file
        #---------------------------------------------------------------
        my $tokenizer = Perltidy::Tokenizer->new(
          source_object      => $source_object,
          logger_object      => $logger_object,
          debugger_object    => $debugger_object,
          diagnostics_object => $diagnostics_object,
          starting_level     => $rOpts->{'starting-indentation-level'},
          tabs               => $rOpts->{'tabs'},
          indent_columns     => $rOpts->{'indent-columns'},
          look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
        );

        #---------------------------------------------------------------
        # process the file
        #---------------------------------------------------------------
        process_this_file( $tokenizer, $formatter );

        #---------------------------------------------------------------
        # clean up and report errors
        #---------------------------------------------------------------
        $source_object->close_input_file();
        $sink_object->close_output_file() if $sink_object;
        if ($debugger_object) { $debugger_object->close_debug_file(); }

        my $infile_syntax_ok = 0;    # -1 no  0=don't know   1 yes
        if ($output_file) {
            chmod 0755, $output_file;
            if ( $logger_object && $rOpts->{'check-syntax'} ) {
                my $ifname = $source_object->get_input_file_copy_name();
                my $ofname = $sink_object->get_output_file_copy();
                $infile_syntax_ok =
                  check_syntax( $ifname, $ofname, $logger_object, $rOpts );
            }
        }
        $source_object->unlink_copy();
        $sink_object->unlink_copy();
        if ($logger_object) { $logger_object->finish($infile_syntax_ok) }

    }    # end of loop to process all files

}    # end of main program

sub write_logfile_header {
    my ( $rOpts, $logger_object, $config_file, $rraw_options ) = @_;
    $logger_object->write_logfile_entry(
     "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
    );
    my $options_string = join ( ' ', @$rraw_options );

    if ($config_file) {
        $logger_object->write_logfile_entry(
          "Found Configuration File >>> $config_file \n");
    }
    $logger_object->write_logfile_entry(
      "Configuration and command line parameters for this run:\n");
    $logger_object->write_logfile_entry("$options_string\n");

    if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
        $rOpts->{'logfile'} = 1;    # force logfile to be saved
        $logger_object->write_logfile_entry("Final parameter set for this run\n"
        );
        $logger_object->write_logfile_entry(
          "------------------------------------\n");

        foreach my $i ( keys %{$rOpts} ) {
            $logger_object->write_logfile_entry( '--' . "$i=$rOpts->{$i}\n" );
        }
        $logger_object->write_logfile_entry(
          "------------------------------------\n");
    }
    $logger_object->write_logfile_entry(
      "To find error messages search for 'WARNING' with your editor\n");
}

sub process_command_line {
    use Getopt::Long;

    ######################################################################
    # Note: a few options are not documented in the man page and usage
    # message. This is because these are experimental or debug options and
    # may or may not be retained in future versions.
    ######################################################################

    # here is a summary of the Getopt codes:
    # <none> does not take an argument
    # =s takes a mandatory string
    # :s takes an optional string
    # =i takes a mandatory integer
    # :i takes an optional integer
    # ! does not take an argument and may be negated
    #  i.e., -foo and -nofoo are allowed
    # a double dash signals the end of the options list
    #
    #---------------------------------------------------------------
    # Define the option string passed to GetOptions.
    #---------------------------------------------------------------
    my @option_string = qw
      (
      DEBUG!
      DIAGNOSTICS!
      look-for-hash-bang!
      dump-defaults!
      dump-options!
      dump-long-names!
      dump-short-names!
      dump-token-types!
      dump-want-left-space!
      dump-want-right-space!
      version
      npro
      noprofile
      html!
      recombine!
      add-newlines!
      add-semicolons!
      add-whitespace!
      blanks-before-blocks!
      blanks-before-comments!
      blanks-before-subs!
      swallow-optional-blank-lines!
      brace-tightness=i
      check-syntax!
      comma-break-flag=i
      continuation-indentation=i
      cuddled-else!
      delete-block-comments!
      delete-pod!
      delete-side-comments!
      delete-semicolons!
      force-read-binary!
      fuzzy-line-length!
      help
      indent-block-comments!
      indent-columns=i
      delete-old-newlines!
      delete-old-whitespace!
      logfile!
      logfile-gap:i
      maximum-fields-per-table=i
      maximum-consecutive-blank-lines=i
      maximum-line-length=i
      maximum-space-to-comment=i
      minimum-space-to-comment=i
      maximum-whitespace-columns=i
      opening-brace-on-new-line!
      outdent-long-lines!
      indent-closing-brace!
      outfile=s
      paren-tightness=i
      quiet!
      space-for-semicolon!
      space-terminal-semicolon!
      show-options!
      square-bracket-tightness=i
      starting-indentation-level=i
      standard-output!
      tabs!
      tee-block-comments!
      tee-pod!
      tee-side-comments!
      tidy-output!
      break-after-comma-arrows!
      want-left-space=s
      nowant-left-space=s
      want-right-space=s
      nowant-right-space=s
    );

    # The Perltidy::HtmlWriter will add its own options to the string
    Perltidy::HtmlWriter->make_getopt_long_names( \@option_string );

    #---------------------------------------------------------------
    # Assign default values to the above options here, except
    # for 'outfile' and 'help'.
    # These settings should approximate the perlstyle(1) suggestions.
    #---------------------------------------------------------------
    my @defaults = qw(
      nohtml
      recombine
      add-newlines
      add-whitespace
      blanks-before-blocks
      blanks-before-comments
      blanks-before-subs
      brace-tightness=1
      check-syntax
      comma-break-flag=0
      continuation-indentation=2
      fuzzy-line-length
      indent-block-comments
      indent-columns=4
      nodelete-old-whitespace
      nologfile
      maximum-fields-per-table=40
      maximum-consecutive-blank-lines=1
      maximum-line-length=80
      maximum-space-to-comment=16
      minimum-space-to-comment=4
      maximum-whitespace-columns=32
      nocuddled-else
      outdent-long-lines
      noswallow-optional-blank-lines
      delete-old-newlines
      noquiet
      paren-tightness=1
      space-for-semicolon
      delete-semicolons
      add-semicolons
      noshow-options
      square-bracket-tightness=1
      notabs
      tidy-output
      nobreak-after-comma-arrows
    );

    #---------------------------------------------------------------
    # set the defaults by passing the above list through GetOptions
    #---------------------------------------------------------------
    my %Opts = ();
    {
        local @ARGV;
        my $i;

        for $i (@defaults) { push @ARGV, "--" . $i }

        if ( !GetOptions( \%Opts, @option_string ) ) {
            die "Programming Bug: error in setting default options";
        }
    }

    #---------------------------------------------------------------
    # Define abbreviations which will be expanded into the above primitives.
    # These may be defined recursively.
    #---------------------------------------------------------------
    my %expansion = (
      'v'                   => [qw(version)],
      'x'                   => [qw(look-for-hash-bang)],
      'nx'                  => [qw(nolook-for-hash-bang)],
      'l'                   => [qw(maximum-line-length)],
      'g'                   => [qw(logfile-gap)],
      'opt'                 => [qw(show-options)],
      'nso'                 => [qw(noshow-options)],
      'ci'                  => [qw(continuation-indentation)],
      'sil'                 => [qw(starting-indentation-level)],
      'mbl'                 => [qw(maximum-consecutive-blank-lines)],
      'sbt'                 => [qw(square-bracket-tightness)],
      'bt'                  => [qw(brace-tightness)],
      'pt'                  => [qw(paren-tightness)],
      'bbb'                 => [qw(blanks-before-blocks)],
      'nbbb'                => [qw(noblanks-before-blocks)],
      'bbc'                 => [qw(blanks-before-comments)],
      'nbbc'                => [qw(noblanks-before-comments)],
      'bbs'                 => [qw(blanks-before-subs)],
      'nbbs'                => [qw(noblanks-before-subs)],
      'sob'                 => [qw(swallow-optional-blank-lines)],
      'nsob'                => [qw(noswallow-optional-blank-lines)],
      'dws'                 => [qw(delete-old-whitespace)],
      'ndws'                => [qw(nodelete-old-whitespace)],
      'aws'                 => [qw(add-whitespace)],
      'naws'                => [qw(noadd-whitespace)],
      'dnl'                 => [qw(delete-old-newlines)],
      'ndnl'                => [qw(nodelete-old-newlines)],
      'anl'                 => [qw(add-newlines)],
      'nanl'                => [qw(noadd-newlines)],
      'oll'                 => [qw(outdent-long-lines)],
      'noll'                => [qw(nooutdent-long-lines)],
      'fll'                 => [qw(fuzzy-line-length)],
      'nfll'                => [qw(nofuzzy-line-length)],
      'ce'                  => [qw(cuddled-else)],
      'nce'                 => [qw(nocuddled-else)],
      'h'                   => [qw(help)],
      'st'                  => [qw(standard-output)],
      'log'                 => [qw(logfile)],
      'nlog'                => [qw(nologfile)],
      'D'                   => [qw(DEBUG)],
      'nD'                  => [qw(noDEBUG)],
      'I'                   => [qw(DIAGNOSTICS)],
      'nI'                  => [qw(noDIAGNOSTICS)],
      'syn'                 => [qw(check-syntax)],
      'nsyn'                => [qw(nocheck-syntax)],
      'q'                   => [qw(quiet)],
      'msc'                 => [qw(minimum-space-to-comment)],
      'xsc'                 => [qw(maximum-space-to-comment)],
      'o'                   => [qw(outfile)],
      't'                   => [qw(tabs)],
      'nt'                  => [qw(notabs)],
      'sfs'                 => [qw(space-for-semicolon)],
      'nsfs'                => [qw(nospace-for-semicolon)],
      'sts'                 => [qw(space-terminal-semicolon)],
      'nsts'                => [qw(nospace-terminal-semicolon)],
      'i'                   => [qw(indent-columns)],
      'ibc'                 => [qw(indent-block-comments)],
      'nibc'                => [qw(noindent-block-comments)],
      'dbc'                 => [qw(delete-block-comments)],
      'ndbc'                => [qw(nodelete-block-comments)],
      'tbc'                 => [qw(tee-block-comments)],
      'ntbc'                => [qw(notee-block-comments)],
      'dsc'                 => [qw(delete-side-comments)],
      'ndsc'                => [qw(nodelete-side-comments)],
      'tsc'                 => [qw(tee-side-comments)],
      'ntsc'                => [qw(notee-side-comments)],
      'freeze-newlines'     => [qw(noadd-newlines nodelete-old-newlines)],
      'fnl'                 => [qw(freeze-newlines)],
      'freeze-whitespace'   => [qw(noadd-whitespace nodelete-old-whitespace)],
      'fws'                 => [qw(freeze-whitespace)],
      'indent-only'         => [qw(freeze-newlines freeze-whitespace)],
      'io'                  => [qw(indent-only)],
      'delete-all-comments' => [qw(delete-block-comments delete-side-comments)],
      'nodelete-all-comments' =>
      [qw(nodelete-block-comments nodelete-side-comments)],
      'dac'                => [qw(delete-all-comments)],
      'ndac'               => [qw(no-delete-all-comments)],
      'cb'                 => [qw(comma-break-flag)],
      'bl'                 => [qw(opening-brace-on-new-line)],
      'nbl'                => [qw(noopening-brace-on-new-line)],
      'icb'                => [qw(indent-closing-brace)],
      'nicb'               => [qw(noindent-closing-brace)],
      'br'                 => [qw(noopening-brace-on-new-line)],
      'nbr'                => [qw(opening-brace-on-new-line)],
      'tee-all-comments'   => [qw(tee-block-comments tee-side-comments)],
      'notee-all-comments' => [qw(notee-block-comments notee-side-comments)],
      'tac'                => [qw(tee-all-comments)],
      'ntac'               => [qw(notee-all-comments)],
      'dp'                 => [qw(delete-pod)],
      'tp'                 => [qw(tee-pod)],
      'ntp'                => [qw(notee-pod)],
      'f'                  => [qw(force-read-binary)],
      'nf'                 => [qw(noforce-read-binary)],
      'mft'                => [qw(maximum-fields-per-table)],
      'dsc'                => [qw(delete-semicolons)],
      'ndsc'               => [qw(nodelete-semicolons)],
      'asc'                => [qw(add-semicolons)],
      'nasc'               => [qw(noadd-semicolons)],
      'nhtml'              => [qw(nohtml)],
      'tdy'                => [qw(tidy-output)],
      'ntdy'               => [qw(notidy-output)],
      'baa'                => [qw(break-after-comma-arrows)],
      'nbaa'               => [qw(nobreak-after-comma-arrows)],
      'wls'                => [qw(want-left-space)],
      'nwls'               => [qw(nowant-left-space)],
      'wrs'                => [qw(want-right-space)],
      'nwrs'               => [qw(nowant-right-space)],

      # 'mangle' originally deleted pod and comments, but to keep it
      # reversible, it no longer does because i have removed:
      #   delete-pod  delete-block-comments delete-side-comments

      # But if you really want to delete them, just use
      #   -mangle -dp -dac

      # An interesting use for 'mangle' is to do this:
      #    perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
      # which will form as many one-line blocks as possible

      'mangle' => [
          qw(
          delete-old-newlines
          noadd-newlines
          delete-old-whitespace
          noadd-whitespace
          notabs
          indent-columns=0
          noblanks-before-blocks
          noblanks-before-subs
          maximum-consecutive-blank-lines=0
          maximum-line-length=100000
          delete-semicolons
          noadd-semicolons
          check-syntax
          )
      ],

      # 'extrude' originally deleted pod and comments, but to keep it
      # reversible, it no longer does because i have removed:
      #   delete-pod  delete-block-comments delete-side-comments

      # But if you really want to delete them, just use
      #   extrude -dp -dac
      #
      # An interesting use for 'extrude' is to do this:
      #    perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
      # which will break up all one-line blocks.

      'extrude' => [
          qw(
          delete-old-newlines
          delete-old-whitespace
          ci=0
          noadd-whitespace
          nofuzzy-line-length
          notabs indent-columns=0
          noblanks-before-blocks
          noblanks-before-subs
          maximum-consecutive-blank-lines=0
          maximum-line-length=1
          delete-semicolons
          noadd-semicolons
          check-syntax
          )
      ],

      # Additional styles can be added here
    );

    Perltidy::HtmlWriter->make_abbreviated_names( \%expansion );

    # Uncomment next line to dump all expansions for debugging:
    # dump_short_names(\%expansion);

    my ($word);
    my @raw_options        = ();
    my $config_file        = "";
    my $saw_ignore_profile = 0;
    my $i;

    #---------------------------------------------------------------
    # read any .perltidyrc configuration file
    #---------------------------------------------------------------
    foreach $i (@ARGV) {

        if ( $i =~ /-(npro|noprofile)$/ ) {
            $saw_ignore_profile = 1;
            last;
        }
    }

    unless ($saw_ignore_profile) {
        ( $config_file, my $rconfig_list ) = read_config_file( \%expansion );
        @ARGV = ( @$rconfig_list, @ARGV );
    }

    #---------------------------------------------------------------
    # Now look at the parameter list @ARGV and set the options hash, %Opts
    #---------------------------------------------------------------

    # set a limit to prevent an infinite loop;
    # 10 should be plenty, but it may be increased to allow deeply 
    # nested expansions.
    my $max_passes = 10;
    my @new_argv   = ();

    # keep looping until all expansions have been converted into actual
    # dash parameters..
    for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
        my @new_argv     = ();
        my $abbrev_count = 0;

        # loop over each item in @ARGV..
        foreach $word (@ARGV) {

            # if it is a dash flag (instead of a file name)..
            if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {

                # save the raw input for debug output in case of circular refs
                if ( $pass_count == 0 ) {
                    push ( @raw_options, $word );
                }

                my $abr   = $1;
                my $flags = $2;

                # if we see this dash item in the expansion hash..
                if ( $expansion{$abr} ) {
                    $abbrev_count++;

                    # stuff all of the words that it expands to into the
                    # new arg list for the next pass
                    foreach my $abbrev ( @{ $expansion{$abr} } ) {
                        push ( @new_argv, '--' . $abbrev . $flags );
                    }
                }

                # not in expansion hash, must be actual long name
                else {
                    push ( @new_argv, $word );
                }
            }

            # not a dash item, so just save it for the next pass
            else {
                push ( @new_argv, $word );
            }
        }    # end of this pass

        # update parameter list @ARGV to the new one
        @ARGV = @new_argv;
        last unless ( $abbrev_count > 0 );

        # make sure we are not in an infinite loop
        if ( $pass_count == $max_passes ) {
            print STDERR
"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
            print STDERR "Here are the raw options\n";
            local $" = ')(';
            print STDERR "(@raw_options)\n";
            my $num = @new_argv;

            if ( $num < 50 ) {
                print STDERR "After $max_passes passes here is ARGV\n";
                print STDERR "(@new_argv)\n";
            }
            else {
                print STDERR "After $max_passes passes ARGV has $num entries\n";
            }

            if ($config_file) {
                die <<"DIE";
Please check your configuration $config_file for circular-references. 
To deactivate it, use -npro.
DIE
            }
            else {
                die <<'DIE';
Program bug - circular-references in the %expansion hash, probably due to
a recent program change.
DIE
            }
        }    # end of check for circular references
    }    # end of loop over all passes

    # now let GetOptions look at @ARGV and our option string to set the
    # options hash
    if ( !GetOptions( \%Opts, @option_string ) ) {
        die "try 'perltidy --help'\n";
    }

    if ( $Opts{help} ) {
        usage();
        exit 1;
    }

    if ( $Opts{'version'} ) {
        print <<"EOM";
This is perltidy, v$VERSION 

Copyright 2000-2001, Steven L. Hancock

Perltidy is free software and may be copied under the terms of the GNU
General Public License, which is included in the distribution files.

Complete documentation for perltidy can be found using 'man perltidy'
or on the internet at http://perltidy.sourceforge.net.
EOM
        exit 1;
    }

    if ( $Opts{'dump-defaults'} ) {
        dump_defaults(@defaults);
        exit 1;
    }

    if ( $Opts{'dump-long-names'} ) {
        dump_long_names(@option_string);
        exit 1;
    }

    if ( $Opts{'dump-short-names'} ) {
        dump_short_names( \%expansion );
        exit 1;
    }

    if ( $Opts{'dump-options'} ) {
        dump_options( \%Opts );
        exit 1;
    }

    if ( $Opts{'dump-token-types'} ) {
        Perltidy::Tokenizer->dump_token_types(*STDOUT);
        exit 1;
    }

    #---------------------------------------------------------------
    # Now we have to handle any interactions among the options..
    #---------------------------------------------------------------

    # In quiet mode, there is no log file and hence no way to report
    # results of syntax check, so don't do it.
    if ( $Opts{'quiet'} ) {
        $Opts{'check-syntax'} = 0;
    }

    # either html output or tidy output, not both
    if ( $Opts{'html'} ) {
        $Opts{'tidy-output'} = 0;
    }

    # can't check syntax if no output
    if ( !$Opts{'tidy-output'} ) {
        $Opts{'check-syntax'} = 0;
    }

    # see if user set a non-negative logfile-gap
    if ( defined( $Opts{'logfile-gap'} ) && $Opts{'logfile-gap'} >= 0 ) {

        # a zero gap will be taken as a 1
        if ( $Opts{'logfile-gap'} == 0 ) {
            $Opts{'logfile-gap'} = 1;
        }

        # setting a non-negative logfile gap causes logfile to be saved
        $Opts{'logfile'} = 1;
    }

    # not setting logfile gap, or setting it negative, causes default of 50 
    else {
        $Opts{'logfile-gap'} = 50;
    }

    # set short-cut flag when only indentation is to be done.
    # Note that the user may or may not have already set the
    # indent-only flag.
    if ( !$Opts{'add-whitespace'} && !$Opts{'delete-old-whitespace'}
      && !$Opts{'add-newlines'} && !$Opts{'delete-old-newlines'} )
    {
        $Opts{'indent-only'} = 1;
    }

    # set shortcut flag if no blanks to be written
    unless ( $Opts{'maximum-consecutive-blank-lines'} ) {
        $Opts{'swallow-optional-blank-lines'} = 1;
    }
    return ( \%Opts, $config_file, \@raw_options );

}    # end of process_command_line

# Debug routine -- this will dump the expansion hash
sub dump_short_names {
    my ($rexpansion) = @_;
    print STDOUT "List of abbreviations:\n";
    foreach my $abbrev ( sort keys %$rexpansion ) {
        my @list = @{ $$rexpansion{$abbrev} };
        print STDOUT "$abbrev --> @list\n";
    }
}

sub read_config_file {

    # look in current directory first
    my $config_file = ".perltidyrc";
    my @config_list = ();

    if ( defined( $ENV{HOME} ) ) {

        # then home directory
        unless ( -e $config_file ) {
            $config_file = "$ENV{HOME}/" . $config_file;
        }
    }

    my ($rexpansion) = @_;
    my $name = undef;
    if ( -e $config_file ) {

        unless ( open CONFIG, "<$config_file" ) {
            warn "cannot open config file $config_file: $!\n";
            $config_file = "";
        }
        else {

            while (<CONFIG>) {
                chomp;
                s/\#.*$//;              # trim comments
                s/^\s*(.*?)\s*$/$1/;    # trim both ends
                next unless $_;

                if ( $_ =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
                    my ( $newname, $body, $curly ) = ( $2, $3, $4 );
                    if ($newname) {
                        if ($name) {
                            die
"No '}' seen after $name and before $newname in config file $config_file line $.\n";
                        }
                        $name = $newname;

                        if ( ${$rexpansion}{$name} ) {
                            local $" = ')(';
                            my @names = sort keys %$rexpansion;
                            print
"Here is a list of all installed aliases\n(@names)\n";
                            die
"Attempting to redefine alias ($name) in config file $config_file line $.\n";
                        }
                        ${$rexpansion}{$name} = [];
                    }

                    if ($body) {

                        if ($name) {
                            $body =~ s/^\s*\-//;      # remove the leading -
                            $body =~ s/(\s)-/$1/g;    # remove the interior -'s
                            push @{ ${$rexpansion}{$name} }, split /\s+/, $body;
                        }
                        else {
                            push ( @config_list, split /\s+/, $body );
                        }
                    }

                    if ($curly) {
                        unless ($name) {
                            die
"Unexpected '}' seen in config file $config_file line $.\n";
                        }
                        $name = undef;
                    }
                }
            }
            close CONFIG;
        }
    }
    else {
        $config_file = "";
    }
    return ( $config_file, \@config_list );
}

sub dump_long_names {

    my @names = sort @_;
    print STDOUT <<EOM;
# Command line long names (passed to GetOptions)
#---------------------------------------------------------------
# here is a summary of the Getopt codes:
# <none> does not take an argument
# =s takes a mandatory string
# :s takes an optional string
# =i takes a mandatory integer
# :i takes an optional integer
# ! does not take an argument and may be negated
#  i.e., -foo and -nofoo are allowed
# a double dash signals the end of the options list
#
#---------------------------------------------------------------
EOM

    foreach (@names) { print STDOUT "$_\n" }
}

sub dump_defaults {
    my @defaults = sort @_;
    print STDOUT "Default command line options:\n";
    foreach (@_) { print STDOUT "$_\n" }
}

sub dump_options {
    my ($rOpts) = @_;
    local $" = "\n";
    print STDOUT "Final parameter set for this run\n";
    foreach my $i ( sort keys %{$rOpts} ) {
        print STDOUT "$i=$rOpts->{$i}\n";
    }
}

sub usage {

    print STDOUT <<EOF;
This is perltidy version $VERSION, a perl script indenter.  Usage:

    perltidy [ options ] file1 file2 file3 ...
            (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
    perltidy [ options ] file1 -o outfile
    perltidy [ options ] file1 -st >outfile
    perltidy [ options ] <infile >outfile

Options have short and long forms. Short forms are shown; see
man pages for long forms.

I/O control
 -h      show this help
 -o=file name of the output file (only if single input file)
 -q      deactivate error messages (for running under editor)
 -syn    run perl -c to check syntax (default)
 -log    save .LOG file, which has useful diagnostics
 -g      like -log but writes more detailed .LOG file, for debugging scripts
 -opt    write set of options used to .LOG file
 -npro   ignore .perltidyrc command file 
 -st     send output to standard output

Basic Options:
 -i=n    use n columns per indentation level (default n=4)
 -t      tabs: use one tab character per indentation level
 -nt     no tabs: use n spaces per indentation level (default)
 -io     "indent only": just do indentation, no other formatting.
 -sil=n  set starting indentation level to n;  use if auto detection fails

Whitespace Control
 -fws    freeze whitespace; this disables all whitespace changes
           and disables the following switches:
 -bt=n   sets brace tightness,  n= (0 = loose, 1=default, 2 = tight)
 -pt=n   paren tightness (n=0, 1 or 2)
 -sbt    square bracket tightness (n=0, 1, or 2)
 -ci=n   sets continuation indentation=n,  default is n=2 spaces
 -ibc    indent block comments; this is the default
 -ssf    add space before semicolon in for( ; ; )
 -msc=n  minimum spaces to side comment, default 4
 -aws    add whitespace
 -dws    delete old whitespace          

Line Break Control
 -fnl    freeze newlines; this disables all line break changes
            and disables the following switches:
 -anl    add newlines;  ok to introduce new line breaks
 -bbs    add blank line before subs and packages
 -bbc    add blank line before block comments
 -bbb    add blank line between major blocks
 -sob    swallow optional blank lines
 -ce     cuddled else; use this style: '} else {'
 -dnl    delete old newlines
 -mbl=n  maximum consecutive blank lines (default=1)
 -l=n    maximum line length;  default n=80
 -bl     opening brace on new line 

Delete selected text
 -dac    delete all comments 
 -dbc    delete block comments     
 -dsc    delete side comments  
 -dp     delete pod

Send selected text to a '.TEE' file
 -tac    tee all comments          
 -tbc    tee block comments       
 -tsc    tee side comments       
 -tp     tee pod           

Other controls
 -mft   maximum fields per table; default n=40
 -html  write an html file (see 'man perl2web' for options)
 -x     do not format lines before hash-bang line
 -v     display version number to standard output and quit
 -dump-options      write options used in this run to standard output and quit
 -dump-defaults     write default options to standard output and quit
 -dump-short-names  write all option short names to standard output and quit
 -dump-long-names   write all option long names to standard output and quit

A prefix of "n" negates short form toggle switches, and a prefix of "no"
negates the long forms.  For example, -nt or --notabs mean to indent with
spaces rather than tabs.   Do not bundle switches together.

If you are unable to see this entire text, try "perltidy -h | more"
For more detailed information, and additional options, try "man perltidy",
or go to the perltidy home page at http://perltidy.sourceforge.net
EOF

}

sub process_this_file {

    my ( $tokenizer, $formatter ) = @_;

    # loop to process each line of this file
    my $line_of_tokens;
    while ( $line_of_tokens = $tokenizer->get_line() ) {
        $formatter->write_line($line_of_tokens);
    }

    # finish up
    $formatter->finish_up();
    $tokenizer->report_errors();
}

sub check_syntax {

    # Use 'perl -c' to make sure that we did not create bad syntax
    # This is a very good independent check for programming errors
    #
    # Given names of the input and output files, ($ifname, $ofname), 
    # we do the following:
    # - check syntax of the input file 
    # - if bad, all done (could be an incomplete code snippet)
    # - if infile syntax ok, then check syntax of the output file; 
    #   - if outfile syntax bad, issue warning; this implies a code bug! 
    # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good

    my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
    my $infile_syntax_ok = 0;

    # invoke perl with -x if requested
    my $dash_x = $rOpts->{'look-for-hash-bang'} ? "-x" : "";

    # this shouldn't happen unless perltidy.TMPI couldn't be made
    if ( $ifname eq '-' ) {
        $logger_object->write_logfile_entry(
          "Cannot run perl -c on STDIN and STDOUT\n");
        return $infile_syntax_ok;
    }

    $logger_object->write_logfile_entry(
      "checking input file syntax with perl -c...\n");
    $logger_object->write_logfile_entry(
      "------------------------------------------\n");

    # Not all operating systems/shells support redirection of the standard
    # error output.
    my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';

    my $perl_output = `perl -c $dash_x $ifname $error_redirection`;
    $logger_object->write_logfile_entry("$perl_output\n");

    if ( $perl_output =~ /syntax\s*OK/ ) {
        $infile_syntax_ok = 1;
        $logger_object->write_logfile_entry(
          "------------------------------------------\n");
        $logger_object->write_logfile_entry(
          "checking output file syntax with perl -c...\n");
        $logger_object->write_logfile_entry(
          "------------------------------------------\n");

        my $perl_output = `perl -c $dash_x $ofname $error_redirection`;
        $logger_object->write_logfile_entry("$perl_output\n");

        unless ( $perl_output =~ /syntax\s*OK/ ) {
            $logger_object->write_logfile_entry(
              "------------------------------------------\n");
            $logger_object->warning(
"The output file has a syntax error when tested with perl -c $dash_x $ofname !\n"
            );
            $logger_object->warning(
              "This implies an error in perltidy; the file $ofname is bad\n");
            $logger_object->report_definite_bug();

            # the perl version number will be helpful for diagnosing the problem
            $logger_object->write_logfile_entry(
              `perl -v $dash_x $ofname $error_redirection` . "\n" );
        }
    }
    else {
        $infile_syntax_ok = -1;
        $logger_object->write_logfile_entry(
          "------------------------------------------\n");
        $logger_object->write_logfile_entry(
          "The output file will not be checked because of input file problems\n"
        );
    }
    return $infile_syntax_ok;
}

#####################################################################
#
# the Perltidy::LineSource class supplies an object with a 'get_line()' method
# which returns the next line to be parsed
#
#####################################################################

package Perltidy::LineSource;

sub new {

    my $class      = shift;
    my $input_file = shift;
    my $rOpts      = shift;
    my $fh;
    my $input_file_copy = undef;
    my $fh_copy;

    unless ( $fh = IO::File->new("< $input_file") ) {
        warn("can't open $input_file: $!\n");
        return undef;
    }
    else {

        # in order to check output syntax when standard output is used, we have
        # to make a copy of the file
        if ( $input_file eq '-' && $rOpts->{'check-syntax'} ) {
            $input_file_copy = "perltidy.TMPI";
            $fh_copy         = IO::File->new(">$input_file_copy")
              or die ( "Couldn't open $input_file_copy: $!\n
                           It is needed to check syntax; deactivate with -nsyn"
            );
        }

        return bless {
            _fh              => $fh,
            _fh_copy         => $fh_copy,
            _filename        => $input_file,
            _input_file_copy => $input_file_copy,
        }, $class;
    }
}

sub get_input_file_copy_name {
    my $self   = shift;
    my $ifname = $self->{_input_file_copy};
    unless ($ifname) {
        $ifname = $self->{_filename};
    }
    return $ifname;
}

sub close_input_file {
    my $self = shift;
    close $self->{_fh};
    close $self->{_fh_copy} if $self->{_fh_copy};
}

sub unlink_copy {
    my $self = shift;
    unlink $self->{_fh_copy} if $self->{_fh_copy};
}

sub get_line {
    my $self    = shift;
    my $line    = undef;
    my $fh      = $self->{_fh};
    my $fh_copy = $self->{_fh_copy};
    $line = <$fh>;
    if ( $line && $fh_copy ) { print $fh_copy $line; }
    return $line;
}

#####################################################################
#
# the Perltidy::LineSink class supplies a write_line method for 
# actual file writing
#
#####################################################################

package Perltidy::LineSink;

sub new {

    my ( $class, $output_file, $tee_file, $rOpts ) = @_;
    my $fh      = undef;
    my $fh_copy = undef;
    my $fh_tee  = undef;
    my $output_file_copy = "";
    my $output_file_open = 0;

    if ( $rOpts->{'tidy-output'} ) {
        $fh = IO::File->new(">$output_file")
          or die ("couldn't open output file $output_file: $!\n");
        if ( $output_file eq '-' ) { $output_file_copy = "perltidy.TMPO"; }
        $output_file_open = 1;
    }

    # in order to check output syntax when standard output is used, we have to
    # make a copy of the file
    if ($output_file_copy) {
        if ( $rOpts->{'check-syntax'} ) {
            $fh_copy = IO::File->new(">$output_file_copy")
              or die ( "couldn't open $output_file_copy: $!\n
                   which is needed for to check syntax; deactivate with -nsyn"
            );
        }
        else {
            $output_file_copy = "";
        }
    }

    bless {
        _fh               => $fh,
        _fh_copy          => $fh_copy,
        _fh_tee           => $fh_tee,
        _output_file      => $output_file,
        _output_file_open => $output_file_open,
        _output_file_copy => $output_file_copy,
        _tee_flag         => 0,
        _tee_file         => $tee_file,
        _tee_file_opened  => 0,
    }, $class;
}

sub write_line {

    my $self    = shift;
    my $fh      = $self->{_fh};
    my $fh_copy = $self->{_fh_copy};

    my $output_file_open = $self->{_output_file_open};

    print $fh $_[0] if ( $self->{_output_file_open} );
    print $fh_copy $_[0] if ( $self->{_output_file_copy} );

    if ( $self->{_tee_flag} ) {
        unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
        my $fh_tee = $self->{_fh_tee};
        print $fh_tee $_[0];
    }
}

sub get_output_file_copy {
    my $self   = shift;
    my $ofname = $self->{_output_file_copy};
    unless ($ofname) {
        $ofname = $self->{_output_file};
    }
    return $ofname;
}

sub tee_on {
    my $self = shift;
    $self->{_tee_flag} = 1;
}

sub tee_off {
    my $self = shift;
    $self->{_tee_flag} = 0;
}

sub really_open_tee_file {
    my $self     = shift;
    my $tee_file = $self->{_tee_file};
    my $fh_tee;
    $fh_tee = IO::File->new(">$tee_file")
      or die ("couldn't open TEE file $tee_file: $!\n");
    $self->{_tee_file_opened} = 1;
    $self->{_fh_tee} = $fh_tee;
}

sub close_output_file {
    my $self = shift;
    close $self->{_fh} if $self->{_output_file_open};
    close $self->{_fh_copy} if ( $self->{_output_file_copy} );
    $self->close_tee_file();
}

sub close_tee_file {
    my $self = shift;

    if ( $self->{_tee_file_opened} ) {
        close $self->{_fh_tee};
        $self->{_tee_file_opened} = 0;
    }
}

sub unlink_copy {
    my $self = shift;
    unlink( $self->{_output_file_copy} ) if $self->{_output_file_copy};
}

#####################################################################
#
# The Perltidy::Diagnostics class writes the DIAGNOSTICS file, which is 
# useful for program development.
#
# Only one such file is created regardless of the number of input
# files processed.  This allows the results of processing many files 
# to be summarized in a single file.
#
#####################################################################

package Perltidy::Diagnostics;

sub new {

    my $class = shift;
    bless {
        _write_diagnostics_count => 0,
           _last_diagnostic_file => "",
           _input_file           => "",
          _fh => undef,
    }, $class;
}

sub set_input_file {
    my $self = shift;
    $self->{_input_file} = $_[0];
}

# This is a diagnostic routine which is useful for program development.
# Output from debug messages go to a file named DIAGNOSTICS, where
# they are labeled by file and line.  This allows many files to be
# scanned at once for some particular condition of interest.
sub write_diagnostics {
    my $self = shift;

    unless ( $self->{_write_diagnostics_count} ) {
        open DIAGNOSTICS, ">DIAGNOSTICS"
          or death("couldn't open DIAGNOSTICS: $!\n");
    }

    my $last_diagnostic_file = $self->{_last_diagnostic_file};
    my $input_file = $self->{_input_file};
    if ( $last_diagnostic_file ne $input_file ) {
        print DIAGNOSTICS "\nFILE:$input_file\n";
    }
    $self->{_last_diagnostic_file} = $input_file;
    my $input_line_number = Perltidy::Tokenizer::get_input_line_number();
    print DIAGNOSTICS "$input_line_number:\t@_";
    $self->{_write_diagnostics_count}++;
}

#####################################################################
#
# The Perltidy::Logger class writes the .LOG and .ERR files
#
#####################################################################

package Perltidy::Logger;

sub new {
    my $class = shift;
    my $fh;
    my ( $rOpts, $log_file, $warning_file ) = @_;
    $fh = IO::File->new(">$log_file")
      or die ("couldn't open log file $log_file: $!\n");

    # remove any old error output file
    if ( -e $warning_file ) { unlink($warning_file) }

    bless {
        _log_file                => $log_file,
        _fh                      => $fh,
        _fh_warnings             => undef,
        _rOpts                   => $rOpts,
        _fh_warnings             => undef,
        _last_input_line_written => 0,
        _at_end_of_file          => 0,
        _use_prefix              => 1,
        _block_log_output        => 0,
        _line_information_string => "",
        _wrote_column_headings   => 0,
        _warning_file            => $warning_file,
        _warning_count           => 0,
        _saw_code_bug            => -1,             # -1=no 0=maybe 1=for sure
        _saw_brace_error         => 0,
    }, $class;
}

sub close_log_file {
    my $self = shift;
    close $self->{_fh};
    close $self->{_fh_warnings} if ( $self->{_warning_count} );
}

sub get_use_prefix {
    my $self = shift;
    return $self->{_use_prefix};
}

sub block_log_output {
    my $self = shift;
    $self->{_block_log_output} = 1;
}

sub unblock_log_output {
    my $self = shift;
    $self->{_block_log_output} = 0;
}

sub interrupt_logfile {
    my $self = shift;
    $self->{_use_prefix} = 0;
    $self->warning("\n");
    $self->write_logfile_entry( '#' x 24 . "  WARNING  " . '#' x 25 . "\n" );
}

sub resume_logfile {
    my $self = shift;
    $self->write_logfile_entry( '#' x 60 . "\n" );
    $self->{_use_prefix} = 1;
}

sub we_are_at_the_last_line {
    my $self = shift;
    $self->write_logfile_entry("Last line\n\n");
    $self->{_at_end_of_file} = 1;
}

# record some stuff in case we go down in flames
sub black_box {
    my $self = shift;
    my ( $line_of_tokens, $output_line_number ) = @_;
    my $input_line        = $line_of_tokens->{_line_text};
    my $input_line_number = $line_of_tokens->{_line_number};
    my $rlevels           = $line_of_tokens->{_rlevels};
    my $structural_indentation_level = $$rlevels[0];
    my $python_indentation_level = $line_of_tokens->{_python_indentation_level};

    # make columns of information for every line in case a logfile message
    # needs to go out
    $self->make_line_information_string( $line_of_tokens, $output_line_number );

    my $last_input_line_written = $self->{_last_input_line_written};
    my $rOpts = $self->{_rOpts};
    if ( ( ( $input_line_number - $last_input_line_written ) >=
      $rOpts->{'logfile-gap'} )
      || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) )
    {
        $self->{_last_input_line_written} = $input_line_number;
        ( my $out_str = $input_line ) =~ s/^\s*//;
        chomp $out_str;

        $out_str = ( '.' x $structural_indentation_level ) . $out_str;

        if ( length($out_str) > 35 ) {
            $out_str = substr( $out_str, 0, 35 ) . " ....";
        }
        $self->logfile_output( "", "$out_str\n" );
    }
}

sub write_logfile_entry {
    my $self = shift;

    # add leading >>> to avoid confusing error mesages and code
    $self->logfile_output( ">>>", "@_" );
}

sub write_column_headings {
    my $self = shift;

    $self->{_wrote_column_headings} = 1;
    my $fh = $self->{_fh};
    print $fh <<EOM;
The nesting depths in the table below are at the start of the lines.
The indicated output line numbers are not always exact.

in/out   indent  nesting   code + messages; (messages begin with >>>)
lines    levels  depths    (code begins with one '.' per indent level)
-------  ------ --------   -------------------------------------------
EOM
}

sub make_line_information_string {
    my $self = shift;
    my ( $line_of_tokens, $output_line_number ) = @_;
    my $brace_depth = $line_of_tokens->{_curly_brace_depth};
    my $paren_depth = $line_of_tokens->{_paren_depth};
    my $square_bracket_depth     = $line_of_tokens->{_square_bracket_depth};
    my $input_line_number        = $line_of_tokens->{_line_number};
    my $python_indentation_level = $line_of_tokens->{_python_indentation_level};
    my $rlevels = $line_of_tokens->{_rlevels};
    my $structural_indentation_level = $$rlevels[0];

    if ($input_line_number) {
        $self->write_column_headings() unless $self->{_wrote_column_headings};

        # keep logfile columns aligned for scripts up to 999 lines;
        # for longer scripts it doesn't really matter
        my $extra_space = "";
        $extra_space .= ( $input_line_number < 10 ) ? "  " :
          ( $input_line_number < 100 ) ? " " : "";
        $extra_space .= ( $output_line_number < 10 ) ? "  " :
          ( $output_line_number < 100 ) ? " " : "";
        $self->{_line_information_string} =
"L$input_line_number->$output_line_number$extra_space i$python_indentation_level->$structural_indentation_level ($paren_depth [$square_bracket_depth {$brace_depth";
    }
}

sub logfile_output {
    my $self = shift;
    my ( $prompt, $msg ) = @_;
    return if ( $self->{_block_log_output} );

    my $fh = $self->{_fh};
    if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
        print $fh "$msg";
    }
    else {
        my $line_information_string = $self->{_line_information_string};

        if ($line_information_string) {
            print $fh "$line_information_string   $prompt$msg";
        }
        else {
            print $fh "$msg";
        }
    }
}

sub get_saw_brace_error {
    my $self = shift;
    return $self->{_saw_brace_error};
}

sub increment_brace_error {
    my $self = shift;
    $self->{_saw_brace_error}++;
}

sub brace_warning {
    my $self = shift;
    use constant BRACE_WARNING_LIMIT => 10;
    my $saw_brace_error = $self->{_saw_brace_error};

    if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
        $self->warning(@_);
    }
    $saw_brace_error++;
    $self->{_saw_brace_error} = $saw_brace_error;

    if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
        $self->warning("No further warnings of this type will be given\n");
    }
}

sub warning {
    my $self = shift;
    use constant WARNING_LIMIT => 50;

    my $rOpts = $self->{_rOpts};
    unless ( $rOpts->{'quiet'} ) {

        my $warning_count = $self->{_warning_count};
        unless ($warning_count) {
            my $warning_file = $self->{_warning_file};
            my $fh_warnings;
            $fh_warnings = IO::File->new(">$warning_file")
              or death("couldn't open $warning_file: $!\n");
            print STDERR "Please see file $warning_file!\n";
            $self->{_fh_warnings} = $fh_warnings;
        }

        my $fh_warnings = $self->{_fh_warnings};
        if ( $warning_count < WARNING_LIMIT ) {
            if ( $self->get_use_prefix() > 0 ) {
                my $input_line_number =
                  Perltidy::Tokenizer::get_input_line_number();
                print $fh_warnings "$input_line_number:\t@_";
                $self->write_logfile_entry("WARNING: @_");
            }
            else {
                print $fh_warnings @_;
                $self->write_logfile_entry(@_);
            }
        }
        $warning_count++;
        $self->{_warning_count} = $warning_count;

        if ( $warning_count == WARNING_LIMIT ) {
            print $fh_warnings "No further warnings will be given";
        }
    }
}

# programming bug codes:
#   -1 = no bug
#   0 = maybe, not sure.
#   1 = definitely
sub report_possible_bug {
    my $self         = shift;
    my $saw_code_bug = $self->{_saw_code_bug};
    $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
}

sub report_definite_bug {
    my $self = shift;
    $self->{_saw_code_bug} = 1;
}

sub ask_user_for_bug_report {
    my $self = shift;

    my ($infile_syntax_ok) = @_;
    my $saw_code_bug = $self->{_saw_code_bug};
    if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
        $self->warning(<<EOM);

You may have encountered a code bug in perltidy.  If you have a valid
script, please report it so that it can be corrected.  Include the
smallest possible script which has the problem, along with the .LOG
file. See the manual pages for contact information.  Thank you!
EOM

    }
    elsif ( $saw_code_bug == 1 ) {
        $self->warning(<<EOM);

Oops, you seem to have encountered a bug in perltidy.  Please report it
so that it can be corrected.  Include the smallest possible script which
produces this message, along with the .LOG file if appropriate. See the
manual pages for contact information.  Your efforts are appreciated.  
Thank you!
EOM
    }
}

sub finish {
    my $self = shift;
    my ($infile_syntax_ok) = @_;

    my $rOpts         = $self->{_rOpts};
    my $warning_count = $self->{_warning_count};
    if ($warning_count) {
        my $log_file = $self->{_log_file};
        $self->block_log_output();    # avoid echoing this to the logfile
        $self->warning( "The logfile $log_file may contain useful information\n"
        );
        $self->unblock_log_output();

        if ( $self->{_saw_brace_error} && ( $rOpts->{'logfile-gap'} > 1 ) ) {
            $self->warning("Running with -g may help you locate the problem\n");
        }
    }
    $self->ask_user_for_bug_report($infile_syntax_ok);
    $self->close_log_file();

    # delete the log file unless it is needed or wanted
    my $log_file = $self->{_log_file};
    unlink($log_file) unless ( $warning_count || $rOpts->{'logfile'} );
}

#####################################################################
#
# The Perltidy::HtmlWriter class writes a copy of the input stream in html
#
#####################################################################

package Perltidy::HtmlWriter;

# class variables
use vars qw{
  %html_color
  %html_bold
  %html_italic
  %token_short_names
  %short_to_long_names
  $rOpts
  $css_filename
  $css_linkname
};

BEGIN {

    # This is the official list of tokens which may be identified by the
    # user.  Long names are used as getopt keys.  Short names are
    # convenient short abbreviations for specifying input.  Short names
    # somewhat resemble token type characters, but are often different
    # because they may only be alphanumeric, to allow command line
    # input.  Also, note that because of case insensitivity of html,
    # this table must be in a single case only (I've chosen to use all
    # lower case).
    # When adding NEW_TOKENS: update this hash table
    # short names => long names
    %short_to_long_names = (
      'n'  => 'numeric',
      'p'  => 'paren',
      'q'  => 'quote',
      's'  => 'structure',
      'c'  => 'comment',
      'ws' => 'whitespace',
      'v'  => 'v-string',
      'cm' => 'comma',
      'w'  => 'bareword',
      'co' => 'colon',
      'pu' => 'punctuation',
      'i'  => 'identifier',
      'j'  => 'label',
      'h'  => 'here-doc-target',
      'hh' => 'here-doc-text',
      'k'  => 'keyword',
      'sc' => 'semicolon',
    );

    # Now we have to map actual token types into one of the above short
    # names; any token types not mapped will get 'punctuation'
    # properties.
    # When adding NEW_TOKENS: update this hash table
    # $type => $short_name
    %token_short_names = (
      '#'  => 'c',
      'n'  => 'n',
      'v'  => 'v',
      'k'  => 'k',
      'F'  => 'k',
      'Q'  => 'q',
      'q'  => 'q',
      'J'  => 'j',
      'j'  => 'j',
      'h'  => 'h',
      'H'  => 'hh',
      'w'  => 'w',
      'b'  => 'ws',
      ','  => 'cm',
      '=>' => 'cm',
      ';'  => 'sc',
      ':'  => 'co',
      'f'  => 'sc',
      '('  => 'p',
      ')'  => 'p',
    );

    # These token types will all be called identifiers for now
    # FIXME: need to separate user defined modules as separate type
    my @identifier = qw" i t U C Y Z G :: ";
    @token_short_names{@identifier} = ('i') x scalar(@identifier);

    # These token types will be called 'structure'
    my @structure = qw" { } ";
    @token_short_names{@structure} = ('s') x scalar(@structure);

    # OLD NOTES: save for reference
    # Any of these could be added later if it would be useful.
    # For now, they will by default become punctuation
    #    my @list = qw" L R [ ] ";
    #    @token_long_names{@list} = ('non-structure') x scalar(@list);
    #
    #    my @list = qw"
    #      / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
    #      ";
    #    @token_long_names{@list} = ('math') x scalar(@list);
    #
    #    my @list = qw" & &= ~ ~= ^ ^= | |= ";
    #    @token_long_names{@list} = ('bit') x scalar(@list);
    #
    #    my @list = qw" == != < > <= <=> ";
    #    @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
    #
    #    my @list = qw" && || ! &&= ||= ";
    #    @token_long_names{@list} = ('logical') x scalar(@list);
    #
    #    my @list = qw" . .= =~ !~ x x= ";
    #    @token_long_names{@list} = ('string-operators') x scalar(@list);
    #
    #    # Incomplete..
    #    my @list = qw" .. -> <> ... \ ? ";
    #    @token_long_names{@list} = ('misc-operators') x scalar(@list);

}

sub make_getopt_long_names {
    my $class = shift;
    my ($rgetopt_names) = @_;
    while ( my ( $short_name, $name ) = each %short_to_long_names ) {
        push @$rgetopt_names, "html-color-$name=s";
        push @$rgetopt_names, "html-italic-$name:i";
        push @$rgetopt_names, "html-bold-$name:i";
    }
    push @$rgetopt_names, "html-color-background=s";
    push @$rgetopt_names, "html-linked-style-sheet=s";
    push @$rgetopt_names, "nohtml-style-sheets";
    push @$rgetopt_names, "html-pre-only";
    push @$rgetopt_names, "html-line-numbers";
    push @$rgetopt_names, "stylesheet";
}

sub make_abbreviated_names {

    # We're appending things like this to the expansion list:
    #      'hcc'    => [qw(html-color-comment)],
    #      'hck'    => [qw(html-color-keyword)],
    #  etc
    my $class = shift;
    my ($rexpansion) = @_;

    # abbreviations for color/bold/italic properties
    while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
        ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
        ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
        ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
    }

    # abbreviations for all other html options
    ${$rexpansion}{"hcbg"} = ["html-color-background"];
    ${$rexpansion}{"pre"}  = ["html-pre-only"];
    ${$rexpansion}{"nnn"}  = ["html-line-numbers"];
    ${$rexpansion}{"css"}  = ["html-linked-style-sheet"];
    ${$rexpansion}{"nss"}  = ["nohtml-style-sheets"];
    ${$rexpansion}{"ss"}   = ["stylesheet"];
}

sub check_options {

    # This will be called once after options have been parsed
    my $class = shift;
    $rOpts = shift;

    # X11 color names for default settings that seemed to look ok
    use constant ForestGreen   => "#228B22";
    use constant SaddleBrown   => "#8B4513";
    use constant IndianRed3    => "#CD5555";
    use constant DeepSkyBlue4  => "#00688B";
    use constant MediumOrchid3 => "#B452CD";
    use constant black         => "#000000";
    use constant white         => "#FFFFFF";

    # Fix input parameters because of how getopts works:
    # 0 to become a 1; negative to become a 0 
    while ( my ( $short_name, $name ) = each %short_to_long_names ) {
        for my $prop ( 'bold', 'italic' ) {
            my $key = "html-$prop-$name";
            if ( defined( $rOpts->{$key} ) ) {
                $rOpts->{$key} = ( $rOpts->{$key} >= 0 ) ? 1 : 0;
            }
        }
    }

    # set default color, bold, italic properties
    # anything not listed here will be given the default (punctuation) color --
    # these types currently not listed and get default: ws pu s sc cm co p
    # When adding NEW_TOKENS: add an entry here if you don't want defaults

    # set_default_properties( $short_name, default_color, bold?, italic? );
    set_default_properties( 'c',  ForestGreen,   0, 0 );
    set_default_properties( 'k',  SaddleBrown,   1, 0 );
    set_default_properties( 'q',  IndianRed3,    0, 0 );
    set_default_properties( 'hh', IndianRed3,    0, 1 );
    set_default_properties( 'h',  IndianRed3,    1, 0 );
    set_default_properties( 'i',  DeepSkyBlue4,  0, 0 );
    set_default_properties( 'w',  black,         0, 0 );
    set_default_properties( 'n',  MediumOrchid3, 0, 0 );
    set_default_properties( 'v',  MediumOrchid3, 0, 0 );
    set_default_properties( 'j',  black,         1, 0 );

    set_default_color( 'html-color-background',  white );
    set_default_color( 'html-color-punctuation', black );

    # setup property lookup tables for tokens based on their short names
    # every token type has a short name, and will use these tables
    # to do the html markup
    while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
        $html_color{$short_name}  = $rOpts->{"html-color-$long_name"};
        $html_bold{$short_name}   = $rOpts->{"html-bold-$long_name"};
        $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
    }

    # write style sheet to STDOUT and die if requested
    if ( defined( $rOpts->{'stylesheet'} ) ) {
        write_style_sheet_file('-');
        exit;
    }

    # make sure user gives a file name after -css
    if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
        $css_linkname = $rOpts->{'html-linked-style-sheet'};
        if ( $css_linkname =~ /^-/ ) {
            die "You must specify a valid filename after -css\n";
        }
    }

    # check for conflict
    if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
        $rOpts->{'nohtml-style-sheets'} = 0;
        warning("You can't specify both -css and -nss; -nss ignored\n");
    }

    # write a style sheet file if necessary
    if ($css_linkname) {

        # if the selected filename exists, don't write, because user may
        # have done some work by hand to create it; use backup name instead
        # Also, this will avoid a potential disaster in which the user
        # forgets to specify the style sheet, like this:
        #    perltidy -html -css myfile1.pl myfile2.pl
        # This would cause myfile1.pl to parsed as the style sheet by GetOpts

        my $css_filename = $css_linkname;
        if ( -e $css_filename ) {
        }
        else {

            #print STDOUT "wrote external stylesheet file $css_filename\n";
            write_style_sheet_file($css_filename);
        }
    }
}

sub write_style_sheet_file {

    my ($css_filename) = @_;
    unless ( open CSS, "> $css_filename" ) {
        die "can't open $css_filename: $!\n";
    }
    write_style_sheet_data(*CSS);
    close CSS;
}

sub write_style_sheet_data {

    # write the style sheet data to an open file handle
    my ($fh) = @_;

    my $bg_color   = $rOpts->{'html-color-background'};
    my $text_color = $rOpts->{'html-color-punctuation'};

    print $fh <<"EOM";
/* default style sheet generated by perltidy */
body {background: $bg_color; color: $text_color}
pre { color: $text_color; 
      background: $bg_color;
      font-family: courier;
    } 

EOM

    while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
        my $abbrev = '.' . $short_name;
        my $color = $html_color{$short_name};
        if ( !defined($color) ) { $color = $text_color }
        print $fh "$abbrev \{ color: $color;";

        if ( $html_bold{$short_name} ) {
            print $fh " font-weight:bold;";
        }

        if ( $html_italic{$short_name} ) {
            print $fh " font-style:italic;";
        }
        print $fh "} /* $long_name */\n";
    }
}

sub set_default_color {

    # make sure that options hash $rOpts->{$key} contains a valid color
    my ( $key, $color ) = @_;
    if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
    $rOpts->{$key} = check_RGB($color);
}

sub check_RGB {

    # if color is a 6 digit hex RGB value, prepend a #, otherwise
    # assume that it is a valid ascii color name
    my ($color) = @_;
    if ( $color =~ /^[0-9a-fA-F_]{6,6}$/ ) { $color = "#$color" }
    return $color;
}

sub set_default_properties {
    my ( $short_name, $color, $bold, $italic ) = @_;

    set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
    my $key;
    $key = "html-bold-$short_to_long_names{$short_name}";
    $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
    $key = "html-italic-$short_to_long_names{$short_name}";
    $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
}

sub new {

    my ( $class, $input_file, $html_file ) = @_;

    my $html_file_opened = 0;
    unless ( open( HTML, "> $html_file" ) ) {
        warn("can't open $html_file: $!\n");
        return undef;
    }
    $html_file_opened = 1;

    unless ( $rOpts->{'html-pre-only'} ) {
        print HTML <<"HTML_START";
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<HTML>
<HEAD>
HTML_START

        # use css linked to another file
        if ( $rOpts->{'html-linked-style-sheet'} ) {
            print HTML
              qq(<link rel=stylesheet href="$css_linkname" type="text/css">);
            print HTML <<'ENDCSS';
<TITLE>$input_file</TITLE>
</HEAD>
<BODY> 
ENDCSS
        }

        # use css embedded in this file
        elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
            print HTML <<'ENDCSS';
<STYLE TYPE="text/css">
<!--
ENDCSS
            write_style_sheet_data(*HTML);
            print HTML <<'ENDCSS';
-->
</STYLE>
<TITLE>$input_file</TITLE>
</HEAD>
<BODY> 
ENDCSS
        }

        # no css used
        else {

            print HTML <<"HTML_START";
<TITLE>$input_file</TITLE>
</HEAD>
<BODY BGCOLOR=\"$rOpts->{'html-color-background'}\" TEXT=\"$rOpts->{'html-color-punctuation'}\">
HTML_START
        }
    }

    print HTML <<"END_PRE";
<!-- filename: $input_file -->
<PRE>
END_PRE

    bless {
        _html_file        => $html_file,
        _html_file_opened => $html_file_opened,
    }, $class;
}

sub close_html_file {
    my $self = shift;
    return unless $self->{_html_file_opened};
    print HTML <<"PRE_END";
</PRE>
PRE_END
    unless ( $rOpts->{'html-pre-only'} ) {
        print HTML <<"HTML_END";
</BODY>
</HTML>
HTML_END
    }
    close HTML;
}

sub markup_tokens {
    my $self = shift;
    my ( $rtokens, $rtoken_type ) = @_;
    my ( @colored_tokens, $j, $string, $type, $token );

    for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
        $type  = $$rtoken_type[$j];
        $token = $$rtokens[$j];
        $token = $self->markup_html_element( $token, $type );
        push @colored_tokens, $token;
    }
    return \@colored_tokens;
}

sub markup_html_element {
    my $self = shift;
    my ( $token, $type ) = @_;
    return $token if ( $type eq 'b' );
    return $token if ( $token =~ /^\s*$/ );
    $token =~ s/\&/&amp;/g;
    $token =~ s/\</&lt;/g;
    $token =~ s/\>/&gt;/g;
    $token =~ s/\"/&quot;/g;

    # get the short abbreviation for this token type
    my $short_name = $token_short_names{$type};
    if ( !defined($short_name) ) {
        $short_name = "pu";    # punctuation is default
    }

    # handle style sheets..
    if ( !$rOpts->{'nohtml-style-sheets'} ) {
        if ( $short_name ne 'pu' ) {
            $token = qq(<SPAN CLASS="$short_name">) . $token . "</SPAN>";
        }
    }

    # handle no style sheets..
    else {
        my $color = $html_color{$short_name};

        if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
            $token = qq(<FONT COLOR="$color">) . $token . "</FONT>";
        }
        if ( $html_italic{$short_name} ) { $token = "<I>$token</I>" }
        if ( $html_bold{$short_name} )   { $token = "<B>$token</B>" }
    }
    return $token;
}

sub finish_up {

    # called after last line
    my $self = shift;
    $self->close_html_file();
    return;
}

sub write_line {

    my $self = shift;
    return unless $self->{_html_file_opened};
    my ($line_of_tokens) = @_;
    my $line_type   = $line_of_tokens->{_line_type};
    my $input_line  = $line_of_tokens->{_line_text};
    my $line_number = $line_of_tokens->{_line_number};
    chomp $input_line;

    # markup line of code..
    my $html_line;
    if ( $line_type eq 'CODE' ) {
        my $rtoken_type = $line_of_tokens->{_rtoken_type};
        my $rtokens     = $line_of_tokens->{_rtokens};

        if ( $input_line =~ /(^\s*)/ ) {
            $html_line = $1;
        }
        else {
            $html_line = "";
        }
        my $rcolored_tokens = $self->markup_tokens( $rtokens, $rtoken_type );
        $html_line .= join '', @$rcolored_tokens;
    }

    # markup line of non-code..
    else {
        my $line_character;
        if ( $line_type eq 'HERE' )        { $line_character = 'H' }
        elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
        else { $line_character = 'Q' }
        $html_line = $self->markup_html_element( $input_line, $line_character );
    }

    # add the line number if requested
    if ( $rOpts->{'html-line-numbers'} ) {
        my $extra_space .= ( $line_number < 10 ) ? "   " :
          ( $line_number < 100 ) ? "  " : ( $line_number < 1000 ) ? " " : "";
        $html_line = $extra_space . $line_number . " " . $html_line;
    }

    # write the line
    print HTML "$html_line\n";
}

#####################################################################
#
# The Perltidy::Formatter package adds indentation, whitespace, and line breaks
# to the token stream
#
#####################################################################

package Perltidy::Formatter;
use vars qw{
  @tokens_to_go
  @types_to_go
  @levels_to_go
  @nesting_depth_to_go
  @block_type_to_go
  @nobreak_to_go
  @old_breakpoint_to_go
  @forced_breakpoint_to_go
  $last_nonblank_index_to_go
  $last_nonblank_type_to_go
  $last_nonblank_token_to_go
  $forced_breakpoint_count
  $forced_breakpoint_undo_count
  @forced_breakpoint_undo_stack
  @lengths_to_go
  @bond_strength_to_go
  @matching_token_to_go
  $max_index_to_go
  $tabbing
  $tabstr
  $embedded_tab_count
  $first_embedded_tab_at
  $last_embedded_tab_at
  $deleted_semicolon_count
  $first_deleted_semicolon_at
  $last_deleted_semicolon_at
  $added_semicolon_count
  $first_added_semicolon_at
  $last_added_semicolon_at
  $saw_negative_indentation
  $first_tabbing_disagreement
  $last_tabbing_disagreement
  $in_tabbing_disagreement
  $tabbing_disagreement_count
  $input_line_tabbing
  $last_line_leading_type
  $last_nonblank_token
  $last_nonblank_type
  $last_last_nonblank_token
  $last_last_nonblank_type
  $last_nonblank_block_type
  $last_output_level
  $continuation_flag
  $current_statement_length
  $do_follower_pattern
  $if_brace_follower_pattern
  %space_before_paren
  $brace_follower_pattern
  $looking_for_else
  @want_comma_break
  @dont_align
  @interrupted_list
  $before_pattern
  $index_start_one_line_block
  $level_one_line_block
  $ci_one_line_block
  $semicolons_before_block_self_destruct
  $index_max_forced_break
  $input_line_number
  $diagnostics_object
  $logger_object
  $file_writer_object
  $formatter_self

  %FORMATTER_DEBUG_FLAG
  %tightness
  %matching_token
  $rOpts
  %right_bond_strength
  %left_bond_strength
  %binary_ws_rules
  %want_left_space
  %want_right_space
  %is_digraph
  %is_trigraph
};

BEGIN {

    my @digraphs = qw(
      .. :: << >> ** && .. ||  -> => += -= .= %= &= |= ^= *= <>
      <= >= == =~ !~ != ++ -- /= x=
    );
    @is_digraph{@digraphs} = (1) x scalar(@digraphs);

    my @trigraphs = qw( ... **= <<= >>= &&= ||= <=> );
    @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);

}

# Token bond strengths.  
use constant NO_BREAK    => 10000;
use constant VERY_STRONG => 100;
use constant STRONG      => 2.1;
use constant NOMINAL     => 1.1;
use constant WEAK        => 0.8;
use constant VERY_WEAK   => 0.55;

# values for testing indexes in output array
use constant UNDEFINED_INDEX => -1;

# Maximum number of little messages; probably need not be changed.
use constant MAX_NAG_MESSAGES => 6;

sub get_tabstr {
    return $tabstr;
}

# interface to Perltidy::Logger routines
sub warning {
    if ($logger_object) {
        $logger_object->warning(@_);
    }
}

sub write_logfile_entry {
    if ($logger_object) {
        $logger_object->write_logfile_entry(@_);
    }
}

sub black_box {
    if ($logger_object) {
        $logger_object->black_box(@_);
    }
}

sub report_definite_bug {
    if ($logger_object) {
        $logger_object->report_definite_bug();
    }
}

sub get_saw_brace_error {
    if ($logger_object) {
        $logger_object->get_saw_brace_error();
    }
}

sub we_are_at_the_last_line {
    if ($logger_object) {
        $logger_object->we_are_at_the_last_line();
    }
}

# interface to Perltidy::Diagnostics routine
sub write_diagnostics {

    if ($diagnostics_object) {
        $diagnostics_object->write_diagnostics(@_);
    }
}

sub new {

    my $class = shift;

    # we are given an object with a write_line() method to take lines
    my %defaults = (
      sink_object        => undef,
      diagnostics_object => undef,
      logger_object      => undef,
    );
    my %args = ( %defaults, @_ );

    $logger_object      = $args{logger_object};
    $diagnostics_object = $args{diagnostics_object};

    # FIXME: we create another object with a get_line() and peek_ahead() method
    my $sink_object = $args{sink_object};
    $file_writer_object =
      Perltidy::FileWriter->new( $sink_object, $rOpts, $logger_object );

    prepare_for_a_new_line();
    @tokens_to_go               = ();
    @types_to_go                = ();
    @levels_to_go               = ();
    @nobreak_to_go              = ();
    @old_breakpoint_to_go       = ();
    @forced_breakpoint_to_go    = ();
    @lengths_to_go              = ();
    @bond_strength_to_go        = ();
    @matching_token_to_go       = ();
    @nesting_depth_to_go        = (0);
    @want_comma_break           = (0);
    @dont_align                 = (0);
    @interrupted_list           = (0);
    $saw_negative_indentation   = 0;
    $first_tabbing_disagreement = 0;
    $last_tabbing_disagreement  = 0;
    $tabbing_disagreement_count = 0;
    $in_tabbing_disagreement    = 0;
    $input_line_tabbing         = undef;
    $last_line_leading_type     = '#';
    $last_nonblank_token        = ';';
    $last_nonblank_type         = ';';
    $last_last_nonblank_token   = ';';
    $last_last_nonblank_type    = ';';
    $last_nonblank_block_type   = "";
    $last_output_level          = 0;
    $continuation_flag          = 0;
    $looking_for_else           = 0;
    $embedded_tab_count         = 0;
    $first_embedded_tab_at      = 0;
    $last_embedded_tab_at       = 0;
    $deleted_semicolon_count    = 0;
    $first_deleted_semicolon_at = 0;
    $last_deleted_semicolon_at  = 0;
    $added_semicolon_count      = 0;
    $first_added_semicolon_at   = 0;
    $last_added_semicolon_at    = 0;
    Perltidy::VerticalAligner::initialize( $rOpts, $file_writer_object,
      $logger_object, $diagnostics_object );

    if ( $rOpts->{'tabs'} ) {
        write_logfile_entry("Indentation will be with a tab character\n");
    }
    else {
        write_logfile_entry(
          "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
    }

    # This is the start of a formatter referent
    $formatter_self = {};

    bless $formatter_self, $class;
}

sub prepare_for_a_new_line {

    $index_max_forced_break       = UNDEFINED_INDEX;
    $max_index_to_go              = UNDEFINED_INDEX;
    $last_nonblank_index_to_go    = UNDEFINED_INDEX;
    $last_nonblank_type_to_go     = '';
    $last_nonblank_token_to_go    = '';
    $forced_breakpoint_count      = 0;
    $forced_breakpoint_undo_count = 0;
    $brace_follower_pattern       = "";
    $current_statement_length     = 0;
    destroy_one_line_block();
}

sub write_line {

    my $self = shift;
    my ($line_of_tokens) = @_;

    my $line_type  = $line_of_tokens->{_line_type};
    my $input_line = $line_of_tokens->{_line_text};

    # handle line of code..
    if ( $line_type eq 'CODE' ) {

        # let logger see all non-blank lines of code
        if ( $input_line !~ /^\s*$/ ) {
            my $output_line_number =
              $file_writer_object->get_output_line_number();
            black_box( $line_of_tokens, $output_line_number );
        }
        print_line_of_tokens($line_of_tokens);
    }

    # handle line of non-code..
    else {

        # set special flags
        my $skip_line = 0;
        my $tee_line = 0;
        if ( $line_type =~ /^POD/ ) {

            # pod docs should have a preceding blank line
            if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
            if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
            if ( !$skip_line && $line_type eq 'POD_START' ) {
                want_blank_line();
            }
        }

        # write unindented non-code line
        if ( !$skip_line ) {
            if ($tee_line) { $file_writer_object->tee_on() }
            write_unindented_line($input_line);
            if ($tee_line) { $file_writer_object->tee_off() }
        }
    }
}

sub destroy_one_line_block {
    $index_start_one_line_block = UNDEFINED_INDEX;
    $semicolons_before_block_self_destruct = 0;
}

sub create_one_line_block {
    $index_start_one_line_block = $_[0];
    $level_one_line_block       = $_[1];
    $ci_one_line_block = $_[2];
    $semicolons_before_block_self_destruct = $_[3];
}

sub length_from_index {
    my ($index) = @_;
    my $length = $current_statement_length;
    if ( $index > 0 ) { $length -= $lengths_to_go[ $index - 1 ] }
    return $length;
}

sub is_one_line_block_too_long {

    # FIXME: need to check levels to get ci
    my ($level) = @_;

    # shouldn't happen:
    if ( $level != $level_one_line_block ) {
        warning("level=$level but should be $level_one_line_block\n");
        return 1;
    }

    my $length = length_from_index($index_start_one_line_block);
    my $ci     = $ci_one_line_block;

    # Note: add 1 to account for this token
    my $total_spaces =
      ( 1 + $length + $ci + $level * $rOpts->{'indent-columns'} );

    return ( $total_spaces > $rOpts->{'maximum-line-length'} );
}

sub finish_up {

    # flush buffer and write any informative messages
    my $self = shift;

    flush();
    $file_writer_object->decrement_output_line_number()
      ;    # fix up line number since it was incremented
    we_are_at_the_last_line();
    if ( $added_semicolon_count > 0 ) {
        my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
        my $what  =
          ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
        write_logfile_entry("$added_semicolon_count $what added:\n");
        write_logfile_entry("  $first at input line $first_added_semicolon_at\n"
        );

        if ( $added_semicolon_count > 1 ) {
            write_logfile_entry(
              "   Last at input line $last_added_semicolon_at\n");
        }
        write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
        write_logfile_entry("\n");
    }

    if ( $deleted_semicolon_count > 0 ) {
        my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
        my $what  =
          ( $deleted_semicolon_count > 1 ) ? "semicolons were" :
          "semicolon was";
        write_logfile_entry(
          "$deleted_semicolon_count unnecessary $what deleted:\n");
        write_logfile_entry(
          "  $first at input line $first_deleted_semicolon_at\n");

        if ( $deleted_semicolon_count > 1 ) {
            write_logfile_entry(
              "   Last at input line $last_deleted_semicolon_at\n");
        }
        write_logfile_entry("  (Use -ndsc to prevent semicolon deletion)\n");
        write_logfile_entry("\n");
    }

    if ( $embedded_tab_count > 0 ) {
        my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
        my $what  =
          ( $embedded_tab_count > 1 ) ? "quotes or patterns" :
          "quote or pattern";
        write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
        write_logfile_entry(
"This means the display of this script could vary with device or software\n"
        );
        write_logfile_entry("  $first at input line $first_embedded_tab_at\n");

        if ( $embedded_tab_count > 1 ) {
            write_logfile_entry( "   Last at input line $last_embedded_tab_at\n"
            );
        }
        write_logfile_entry("\n");
    }

    if ($first_tabbing_disagreement) {
        write_logfile_entry(
"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
        );
    }

    if ($in_tabbing_disagreement) {
        write_logfile_entry(
"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
        );
    }
    else {

        if ($last_tabbing_disagreement) {

            write_logfile_entry(
"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
            );
        }
        else {
            write_logfile_entry("No indentation disagreement seen\n");
        }
    }
    write_logfile_entry("\n");

    Perltidy::VerticalAligner::report_anything_unusual();

    $file_writer_object->report_line_length_errors();
}

sub check_options {

    # This routine is called to check the Opts hash after it is defined

    ($rOpts) = @_;
    my ( $tabbing_string, $tab_msg );

    if ( $rOpts->{'tabs'} ) {
        $tabstr = "\t";
    }
    else {
        $tabstr = " " x $rOpts->{'indent-columns'};
    }

    if ( !$rOpts->{'space-for-semicolon'} ) {
        $want_left_space{'f'} = -1;
    }

    if ( $rOpts->{'space-terminal-semicolon'} ) {
        $want_left_space{';'} = 1;
    }

    # implement user whitespace preferences
    my @list;
    if ( $rOpts->{'want-left-space'} ) {
        @list = split /\s/, $rOpts->{'want-left-space'};
        @want_left_space{@list} = (1) x scalar(@list);
    }

    if ( $rOpts->{'want-right-space'} ) {
        @list = split /\s/, $rOpts->{'want-right-space'};
        @want_right_space{@list} = (1) x scalar(@list);
    }
    if ( $rOpts->{'nowant-left-space'} ) {
        @list = split /\s/, $rOpts->{'nowant-left-space'};
        @want_left_space{@list} = (-1) x scalar(@list);
    }

    if ( $rOpts->{'nowant-right-space'} ) {
        @list = split /\s/, $rOpts->{'nowant-right-space'};
        @want_right_space{@list} = (-1) x scalar(@list);
    }
    if ( $rOpts->{'dump-want-left-space'} ) {
        dump_want_left_space(*STDOUT);
        exit 1;
    }

    if ( $rOpts->{'dump-want-right-space'} ) {
        dump_want_right_space(*STDOUT);
        exit 1;
    }

    # Define here tokens which may follow the closing brace of a do statement
    # on the same line, as in:
    #   } while ( $something);
    $do_follower_pattern = '^(until|while|unless|if|;|,)$';

    # These tokens may follow the closing brace of an if or elsif block.
    # In other words, for cuddled else we want code to look like:
    #   } elsif ( $something) {
    #   } else {
    if ( $rOpts->{'cuddled-else'} ) {
        $if_brace_follower_pattern = '^(else|elsif)$';
    }
    else { $if_brace_follower_pattern = ""; }

    if ( $rOpts->{'opening-brace-on-new-line'} ) {
        $left_bond_strength{'{'}  = WEAK;
        $right_bond_strength{'{'} = VERY_STRONG;
    }
    else {
        $right_bond_strength{'{'} = WEAK;
        $left_bond_strength{'{'}  = VERY_STRONG;
    }

    # make -l=0  equal to -l=infinite
    if ( !$rOpts->{'maximum-line-length'} ) {
        $rOpts->{'maximum-line-length'} = 1000000;
    }

    # hashes used to simplify setting whitespace
    %tightness = (
      '{' => $rOpts->{'brace-tightness'},
      '}' => $rOpts->{'brace-tightness'},
      '(' => $rOpts->{'paren-tightness'},
      ')' => $rOpts->{'paren-tightness'},
      '[' => $rOpts->{'square-bracket-tightness'},
      ']' => $rOpts->{'square-bracket-tightness'},
    );
    %matching_token = (
      '{' => '}',
      '(' => ')',
      '[' => ']',
    );
}

sub dump_want_left_space {
    my $fh = shift;
    local $" = "\n";
    print $fh "Contents of want_left_space hash\n";
    foreach my $i ( sort keys %want_left_space ) {
        print $fh "$i\t$want_left_space{$i}\n";
    }
}

sub dump_want_right_space {
    my $fh = shift;
    local $" = "\n";
    print $fh "Contents of want_right_space hash\n";
    foreach my $i ( sort keys %want_right_space ) {
        print $fh "$i\t$want_right_space{$i}\n";
    }
}

sub is_essential_whitespace {

    # Essential whitespace means whitespace which cannot be safely deleted.
    # We are given three tokens and their types:
    # ($tokenl, $typel) is the token to the left of the space in question
    # ($tokenr, $typer) is the token to the right of the space in question
    # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
    my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;

    # never combine two bare words or numbers
    ( ( $tokenr =~ /^[\'\w]/ ) && ( $tokenl =~ /[\'\w]$/ ) )

      # do not combine a number with a concatination dot
      # example: pom.caputo:
      # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
      || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
      || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )

      # do not join a minus with a bare word, because you might form
      # a file test operator.  Example from Complex.pm:
      # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
      #|| ( ( $tokenr =~ /^[\w]/ ) && ( $tokenl =~ /\-$/ ) )
      || ( ( $tokenr =~ /^[A-Za-z]$/ ) && ( $tokenl =~ /\-$/ ) )

      # keep a space between a quote and a bareword to prevent the
      # bareword from becomming a quote modifier.
      || ( ( $tokenr =~ /^[a-zA-Z_]/ ) && ( $typel eq 'Q' ) )

      # perl is very fussy about spaces before <<
      || ( $tokenr =~ /^\<\</ )

      # avoid combining tokens to create new meanings. Example:
      # 	$a+ +$b must not become $a++$b
      || ( $is_digraph{ $tokenl . $tokenr } )
      || ( $is_trigraph{ $tokenl . $tokenr } )

      # another example: do not combine these two &'s:
      # 	allow_options & &OPT_EXECCGI
      || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )

      # don't combine $$ or $# with any alphanumeric
      # (testfile mangle.t with --mangle)
      || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )

      # retain any space after possible filehandle
      # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
      || ( $typel eq 'Z' || $typell eq 'Z' )

      # retain any space after here doc operator ( hereerr.t)
      || ( $typel eq 'h' )

      # be careful with a space around ++ and --, to avoid ambiguity as to
      # which token it applies
      || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
      || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\{\(\[]/ ) )

      # need space after foreach my; for example, this will fail:
      # foreach my$ft(@filetypes)...
      || ( $tokenll =~ /^(for|foreach)$/ && ( $tokenl eq 'my' )
      && ( $tokenr =~ /^\$/ ) )

      # must have space between grep and left paren; "grep(" will fail
      || ( $tokenl =~ /^(sort|grep|map)$/ && ( $tokenr eq '(' ) )

      # don't stick numbers next to left parens, as in:
      #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
      || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )

      # don't join something like: for bla::bla:: abc
      # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
      || ( $tokenl =~ /\:\:$/ && ( $tokenr =~ /^[\'\w]/ ) )
      ;    # the value of this long logic sequence is the result we want
}

sub set_white_space_flag {

=pod
	 This routine examines each pair of nonblank tokens and
	 sets values for array @white_space_flag. 
	
	 $white_space_flag[$j] is a flag indicating whether a white space 
	 BEFORE token $j is needed, with the following values:

			 -1 do not want a space before token $j
			  0 optional space or $j is a whitespace
			  1 want a space before token $j
	

	The values for the first token will be defined based
	upon the contents of the "to_go" output array.  

    Note: retain debug print statements because they are usually
    required after adding new token types.

=cut
    BEGIN {

        # whitespace codes
        use constant WS_YES      => 1;
        use constant WS_OPTIONAL => 0;
        use constant WS_NO       => -1;

        # initialize these global hashes, which control the use of
        # whitespace around tokens:
        #
        # %binary_ws_rules
        # %want_left_space
        # %want_right_space
        # %space_before_paren
        #
        # Many token types are identical to the tokens themselves.
        # See the tokenizer for a complete list. Here are some special types:
        # 	k = perl keyword
        # 	f = semicolon in for statement
        # 	m = unary minus
        # 	p = unary plus
        # Note that :: is excluded since it should be contained in an identifier
        # Note that '->' is excluded because it never gets space
        # parentheses and brackets are excluded since they are handled specially
        # curly braces are included but may be overridden by logic, such as
        # newline logic.

        # NEW_TOKENS: create a whitespace rule here.  This can be as
        # simple as adding your new letter to @spaces_both_sides, for
        # example.

        my @spaces_both_sides = qw"
          + - * / % ? = . : x < > | & ^ .. << >> ** && .. ||  => += -=
          .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... **= <<= >>=
          &&= ||= <=> k f w F n C Y U G v
          ";

        my @spaces_left_side = qw"
          t ! ~ m p { \ h pp mm Z j
          ";
        push ( @spaces_left_side, '#' );

        my @spaces_right_side = qw"
          ; } ) ] R J ++ --
          ";
        push ( @spaces_right_side, ',' );
        my @space_before_paren = qw(
          my local and or eq ne if else elsif until unless while
          for foreach push return shift unshift pop join split die
        );
        @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
        @want_right_space{@spaces_both_sides} =
          (1) x scalar(@spaces_both_sides);
        @want_left_space{@spaces_left_side}  = (1) x scalar(@spaces_left_side);
        @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
        @want_left_space{@spaces_right_side} =
          (-1) x scalar(@spaces_right_side);
        @want_right_space{@spaces_right_side} =
          (1) x scalar(@spaces_right_side);
        @space_before_paren{@space_before_paren} =
          (1) x scalar(@space_before_paren);
        $want_left_space{'L'}   = WS_NO;
        $want_left_space{'->'}  = WS_NO;
        $want_right_space{'->'} = WS_NO;
        $want_left_space{'**'}  = WS_NO;
        $want_right_space{'**'} = WS_NO;

        # hash type information must stay tightly bound
        # as in :  ${xxxx}
        $binary_ws_rules{'i'}{'L'} = WS_NO;
        $binary_ws_rules{'i'}{'{'} = WS_YES;
        $binary_ws_rules{'k'}{'{'} = WS_YES;
        $binary_ws_rules{'U'}{'{'} = WS_YES;
        $binary_ws_rules{'i'}{'['} = WS_NO;
        $binary_ws_rules{'R'}{'L'} = WS_NO;
        $binary_ws_rules{'R'}{'{'} = WS_NO;
        $binary_ws_rules{'t'}{'L'} = WS_NO;
        $binary_ws_rules{'t'}{'{'} = WS_NO;
        $binary_ws_rules{'}'}{'L'} = WS_NO;
        $binary_ws_rules{'}'}{'{'} = WS_NO;
        $binary_ws_rules{'$'}{'L'} = WS_NO;
        $binary_ws_rules{'$'}{'{'} = WS_NO;
        $binary_ws_rules{'@'}{'L'} = WS_NO;
        $binary_ws_rules{'@'}{'{'} = WS_NO;
        $binary_ws_rules{'='}{'L'} = WS_YES;

        # the following includes ') {'
        # as in :    if ( xxx ) { yyy }
        $binary_ws_rules{']'}{'L'} = WS_NO;
        $binary_ws_rules{']'}{'{'} = WS_NO;
        $binary_ws_rules{')'}{'{'} = WS_YES;
        $binary_ws_rules{')'}{'['} = WS_NO;
        $binary_ws_rules{']'}{'['} = WS_NO;
        $binary_ws_rules{']'}{'{'} = WS_NO;
        $binary_ws_rules{'}'}{'['} = WS_NO;
        $binary_ws_rules{'R'}{'['} = WS_NO;

        $binary_ws_rules{']'}{'++'} = WS_NO;
        $binary_ws_rules{']'}{'--'} = WS_NO;
        $binary_ws_rules{')'}{'++'} = WS_NO;
        $binary_ws_rules{')'}{'--'} = WS_NO;

        $binary_ws_rules{'R'}{'++'} = WS_NO;
        $binary_ws_rules{'R'}{'--'} = WS_NO;

        $binary_ws_rules{'k'}{':'} = WS_NO;     # keep colon with label
        $binary_ws_rules{'w'}{':'} = WS_NO;
        $binary_ws_rules{'i'}{'Q'} = WS_YES;
        $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'

        # FIXME: we need to split 'i' into variables and functions
        # and have no space for functions but space for variables.  For now,
        # I have a special patch in the special rules below 
        $binary_ws_rules{'i'}{'('} = WS_NO;

        $binary_ws_rules{'w'}{'('} = WS_NO;
        $binary_ws_rules{'w'}{'{'} = WS_YES;
    }
    my ( $jmax, $rtokens, $rtoken_type ) = @_;
    my ( $last_token, $last_type, $token, $type );
    my (@white_space_flag);
    my $j_tight_closing_paren = -1;

    if ( $max_index_to_go >= 0 ) {
        $token = $tokens_to_go[$max_index_to_go];
        $type  = $types_to_go[$max_index_to_go];
    }
    else {
        $token = ' ';
        $type  = 'b';
    }

    # loop over all tokens
    my ( $j, $ws );

    for ( $j = 0 ; $j <= $jmax ; $j++ ) {

        if ( $$rtoken_type[$j] eq 'b' ) {
            $white_space_flag[$j] = WS_OPTIONAL;
            next;
        }

        # set a default value, to be chaned as needed
        $ws         = undef;
        $last_token = $token;
        $last_type  = $type;
        $token      = $$rtokens[$j];
        $type       = $$rtoken_type[$j];

        #---------------------------------------------------------------
        # section 1:
        # handle space on the inside of opening braces
        #---------------------------------------------------------------
        if ( ( $last_type =~ /^[L\{\(\[]$/ ) ) {

            $j_tight_closing_paren = -1;

            # let's keep empty matched braces together: () {} []
            if ( $token eq $matching_token{$last_token} ) {
                $ws = WS_NO;
            }
            else {

                # we're considering the right of an opening brace
                # tightness = 0 means always pad inside with space
                # tightness = 1 means pad inside if "complex"
                # tightness = 2 means never pad inside with space
                if ( $tightness{$last_token} <= 0 ) {
                    $ws = WS_YES;
                }
                elsif ( $tightness{$last_token} > 1 ) {
                    $ws = WS_NO;
                }
                else {
                    my $j_next =
                      ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
                    my $tok_next  = $$rtokens[$j_next];
                    my $type_next = $$rtoken_type[$j_next];

                    # for tightness = 1, if there is just one token
                    # within the matching pair, we will keep it tight
                    if ( $tok_next eq $matching_token{$last_token}

                      # but watch out for this: [ [ ]    (misc.t)
                      #&& $tok_next ne $matching_token{$token}) {
                      && $last_token ne $token )
                    {

                        # remember where to put the space for the closing paren
                        $j_tight_closing_paren = $j_next;
                        $ws = WS_NO;
                    }
                    else {
                        $ws = WS_YES;
                    }
                }
            }
        }    # done with opening braces and brackets
        my $ws_1 = $ws;    # for debugging

        #---------------------------------------------------------------
        # section 2:
        # handle space on inside of closing brace pairs
        #---------------------------------------------------------------
        if ( $type =~ /[\}\)\]\R]/ ) {

            if ( $j == $j_tight_closing_paren ) {

                $j_tight_closing_paren = -1;
                $ws = WS_NO;
            }
            else {

                if ( !defined($ws) ) {
                    $ws = ( $tightness{$token} > 1 ) ? WS_NO : WS_YES;
                }
            }
        }

        my $ws_2 = $ws;    # for debugging

        #---------------------------------------------------------------
        # section 3:
        # use the binary table
        #---------------------------------------------------------------
        if ( !defined($ws) ) {
            $ws = $binary_ws_rules{$last_type}{$type};
        }
        my $ws_3 = $ws;    # for debugging

        #---------------------------------------------------------------
        # section 4:
        # some special cases
        #---------------------------------------------------------------
        if ( $token eq '(' ) {

            if ( $last_type =~ /^[kU]$/ ) {

                # Do not introduce new space between keyword or function
                # and ( except in special cases) because This can
                # introduce errors in some cases ( prnterr1.t )
                unless ( $space_before_paren{$last_token} ) {
                    $ws = WS_NO;
                }
            }

            # space between something like $i and ( in
            # for $i ( 0 .. 20 ) {
            # FIXME: eventually, type 'i' needs to be split into multiple
            # token types so this can be a hardwired rule.
            elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
                $ws = WS_YES;
            }
        }

        # keep space between 'sub' and '{' for anonymous sub definition
        if ( $type eq '{' ) {
            if ( $last_token eq 'sub' ) {
                $ws = WS_YES;
            }
        }

        # always preserver whatever space was used after a possible
        # filehandle or here doc operator
        elsif ( $last_type =~ /^[Zh]$/ ) {
            $ws = WS_OPTIONAL;
        }
        elsif ( $type eq 'i' ) {

            # never a space before ->
            if ( $token =~ /^\-\>/ ) {
                $ws = WS_NO;
            }
        }
        my $ws_4 = $ws;

        #---------------------------------------------------------------
        # section 5:
        # default rules not covered above
        #---------------------------------------------------------------
        # if we fall through to here,
        # look at the pre-defined hash tables for the two tokens, and
        # if (they are equal) use the common value
        # if (either is zero or undef) use the other
        # if (either is -1) use it
        # That is,
        # left  vs right
        # 1		vs	1 	--> 1
        # 0		vs	0 	--> 0
        # -1	vs	-1 	--> -1
        # 0		vs	-1 	--> -1
        # 0		vs	1 	--> 1
        # 1		vs	0 	--> 1
        # -1	vs	0 	--> -1
        # -1	vs	1 	--> -1
        # 1		vs	-1 	--> -1
        if ( !defined($ws) ) {
            my $wl = $want_left_space{$type};
            my $wr = $want_right_space{$last_type};
            if ( !defined($wl) ) { $wl = 0 }
            if ( !defined($wr) ) { $wr = 0 }
            $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
        }

        if ( !defined($ws) ) {
            $ws = 0;
            write_diagnostics(
              "WS flag is undefined for tokens $last_token $token\n");
        }

        if ( ( $last_type !~ /^[Zh]$/ ) && $j > 0 && $j < $jmax
          && ( $ws == 0 ) )
        {

            # If this happens, we have a non-fatal but undesirable 
            # hole in the above rules which should be patched.
            write_diagnostics( "WS flag is zero for tokens $last_token $token\n"
            );
        }
        $white_space_flag[$j] = $ws;

        if ( $FORMATTER_DEBUG_FLAG{WHITE} ) {
            my $str = substr( $last_token, 0, 15 );
            $str .= ' ' x ( 16 - length($str) );
            if ( !defined($ws_1) ) { $ws_1 = "*" }
            if ( !defined($ws_2) ) { $ws_2 = "*" }
            if ( !defined($ws_3) ) { $ws_3 = "*" }
            if ( !defined($ws_4) ) { $ws_4 = "*" }
            print
"WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
        }
    }
    return \@white_space_flag;
}

sub print_line_of_tokens {

    my ($line_of_tokens) = @_;

    # extract input line number for error messages
    $input_line_number = $line_of_tokens->{_line_number};

    my $rtoken_type = $line_of_tokens->{_rtoken_type};
    my $rtokens     = $line_of_tokens->{_rtokens};
    my $rlevels     = $line_of_tokens->{_rlevels};
    my $rslevels    = $line_of_tokens->{_rslevels};
    my $rblock_type = $line_of_tokens->{_rblock_type};
    my $input_line  = $line_of_tokens->{_line_text};

    my $in_continued_quote = $line_of_tokens->{_starting_in_quote};
    my $in_quote = $line_of_tokens->{_ending_in_quote};
    my $python_indentation_level = $line_of_tokens->{_python_indentation_level};
    my ( $token, $type, $j, $j_next );
    my ( $next_nonblank_token, $next_nonblank_token_type );
    my $jmax = @$rtokens - 1;
    my ($rwhite_space_flag);
    my $block_type = "";

    # Handle a continued quote..
    if ($in_continued_quote) {

        # A line which is entirely a quote or pattern must go out
        # verbatim.  Note: the \n is contained in $input_line.
        if ( $jmax <= 0 ) {
            if ( ( $input_line =~ "\t" ) ) {
                note_embedded_tab();
            }
            write_unindented_line("$input_line");
            return;
        }

        # A cheat: for quotes and patterns followed by other
        # tokens, we can set the token level to zero to force the line
        # to be left adjusted, so that the leading part of the line is
        # identical to the input line, whitespace and all. 
        $$rlevels[0] = 0;
        $nesting_depth_to_go[0] = 0;
        $continuation_flag = 0;
    }

    # delete trailing blank tokens
    if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }

    # Handle a blank line..
    if ( $jmax < 0 ) {

        # For the 'swallow-optional-blank-lines' option, we delete all
        # old blank lines and let the blank line rules generate any
        # needed blanks.
        if ( !$rOpts->{'swallow-optional-blank-lines'} ) {
            flush();
            $file_writer_object->write_blank_code_line();
            $last_line_leading_type = 'b';
        }
        return;
    }

    # Handle a block (full-line) comment..
    if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
        if ( $rOpts->{'delete-block-comments'} ) {return}

        if ( $rOpts->{'tee-block-comments'} ) {
            $file_writer_object->tee_on();
        }
        flush();

        # output a blank line before block comments
        if ( ( $last_line_leading_type ne '#' )
          && ( $last_line_leading_type ne 'b' )
          && $rOpts->{'blanks-before-comments'} )
        {
            $file_writer_object->write_blank_code_line();
            $last_line_leading_type = 'b';
        }

        if ( $rOpts->{'indent-block-comments'} ) {
            store_token_to_go( $$rtokens[0], $$rtoken_type[0], $$rblock_type[0],
              0, $$rlevels[0], $$rslevels[0] );
            flush();
        }
        else {
            $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
            $last_line_leading_type = '#';
        }
        if ( $rOpts->{'tee-block-comments'} ) { $file_writer_object->tee_off() }
        return;
    }

    # compare input/output indentation except for continuation lines
    # (because they have an unknown amount of initial blank space)
    # and lines which are quotes (because they may have been outdented)
    # Note: this test is placed here because we know the continuation flag
    # at this point, which allows us to avoid non-meaningful checks.
    my $structural_indentation_level = $$rlevels[0];
    compare_indentation_levels( $python_indentation_level,
      $structural_indentation_level )
      unless ( $continuation_flag
      || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' ) );

    # take care of indentation-only
    # also write a line which is entirely a 'qw' list
    if ( $rOpts->{'indent-only'}
      || ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq 'q' ) ) )
    {
        flush();
        $input_line =~ s/^\s*(.*?)\s*$/$1/;    # trim both ends
        my $terminal_type =
          terminal_type( $rtoken_type, $rblock_type, 0, $jmax );
        my @fields   = ($input_line);
        my @tokens   = ();
        my @patterns = ('q');
        output_indented_line( 0, 0, \@fields, \@tokens, \@patterns,
          $$rlevels[0], $$rslevels[0], $terminal_type, 0 );
        return;
    }
    push ( @$rtokens, ' ', ' ' );       # making $j+2 valid simplifies coding
    push ( @$rtoken_type, 'b', 'b' );
    ($rwhite_space_flag) =
      set_white_space_flag( $jmax, $rtokens, $rtoken_type );

    # find input tabbing to allow checks for tabbing disagreement
    $input_line_tabbing = "";
    if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
    my $no_internal_newlines = 1 - $rOpts->{'add-newlines'};

    # if the buffer hasn't been flushed, add a leading space if
    # necessary to keep essential whitespace. This is really only
    # necessary if we are squeezing out all ws.
    if ( $max_index_to_go >= 0 ) {

        if (
          is_essential_whitespace(
          $last_last_nonblank_token,       $last_last_nonblank_type,
          $tokens_to_go[$max_index_to_go], $types_to_go[$max_index_to_go],
          $$rtokens[0], $$rtoken_type[0] ) )
        {
            my $level = $levels_to_go[$max_index_to_go];
            my $slevel = $nesting_depth_to_go[$max_index_to_go];
            store_token_to_go( ' ', 'b', ' ', $no_internal_newlines, $level,
              $slevel );
        }
    }

    # If we just saw the end of an elsif block, write nag message
    # if we do not see another elseif or an else.  
    if ($looking_for_else) {

        unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
            write_logfile_entry("(No else block)\n");
        }
        $looking_for_else = 0;
    }

    # This is a good place to kill incomplete one-line blocks
    if ( ( $semicolons_before_block_self_destruct == 0 )
      && ( $max_index_to_go >= 0 ) && ( $tokens_to_go[$max_index_to_go] eq ';' )
      && ( $$rtokens[0] ne '}' ) )
    {
        destroy_one_line_block();
        output_line_to_go();
    }

    # loop to process the tokens one-by-one
    $type  = 'b';
    $token = "";

    for ( $j = 0 ; $j <= $jmax ; $j++ ) {
        my $last_type  = $type;
        my $last_token = $token;
        $token      = $$rtokens[$j];
        $type       = $$rtoken_type[$j];
        $block_type = $$rblock_type[$j];
        my $level  = $$rlevels[$j];
        my $slevel = $$rslevels[$j];
        last if ( ( $type eq '#' ) && ( $rOpts->{'delete-side-comments'} ) );

        # If we are continuing after seeing a right curly brace, flush buffer 
        # unless we see what we are looking for, as in
        # 	} else ...
        if ( $brace_follower_pattern && $type ne 'b' ) {

            unless ( $token =~ /$brace_follower_pattern/ ) {
                output_line_to_go();
            }
            $brace_follower_pattern = "";
        }

        # Modify certain tokens here for whitespace
        # The following is not yet done, but could be:
        # 	sub (x x x)
        # These become type 'i', space and all.
        if ( $type =~ /^[it]$/ ) {

            # change "$  var"  to "$var" etc
            if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
                $token =~ s/\s*//g;
            }

            if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
        }

        # patch to add space to something like "x10"
        # This avoids having to split this token in the pre-tokenizer
        if ( ( $type eq 'n' ) && ( $token =~ /^x\d+/ ) ) { $token =~ s/x/x / }

        # warn about tabs
        if ( ( $type =~ /^[qQ]$/ ) && ( $token =~ "\t" ) ) {
            note_embedded_tab();
        }

        # insert any needed whitespace
        if ( ( $max_index_to_go >= 0 ) && ( $type ne 'b' )
          && ( $types_to_go[$max_index_to_go] ne 'b' )
          && ( $rOpts->{'add-whitespace'} ) )
        {
            my $ws = $$rwhite_space_flag[$j];

            if ( $ws == 1 ) {
                store_token_to_go( ' ', 'b', "", $no_internal_newlines, $level,
                  $slevel );
            }
        }
        $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
        $next_nonblank_token      = $$rtokens[$j_next];
        $next_nonblank_token_type = $$rtoken_type[$j_next];

        if ( ( $next_nonblank_token_type eq '#' ) and ( $token ne '}' ) ) {
            $no_internal_newlines = 1;
        }

        # We're only going to handle breaking for code blocks at this
        # (top) level.  Other indentation breaks will be handled by
        # sub scan_list, which is better suited to dealing with them.
        if ( $type eq '{' && $token eq '{' && $block_type ) {

            # Look ahead to see if we might form a one-line block
            my $too_long =
              starting_one_line_block( $j, $jmax, $level, $slevel, $rtokens,
              $rtoken_type, $rblock_type );
            clear_breakpoint_undo_stack();

            # to simplify the logic below, set a flag to indicate if 
            # this opening brace is far from the keyword which introduces it
            my $keyword_on_same_line = 1;
            if ( ( $max_index_to_go >= 0 ) && ( $last_nonblank_type eq ')' ) ) {
                if ( $block_type =~ /^(if|else|elsif)$/
                  && ( $tokens_to_go[0] eq '}' )
                  && ( $rOpts->{'cuddled-else'} ) )
                {
                    $keyword_on_same_line = 1;
                }
                elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) {
                    $keyword_on_same_line = 0;
                }
            }

            # Break before an opening '{' ...
            if (

              # if requested 
              $rOpts->{'opening-brace-on-new-line'}

              # and we were unable to start looking for a block,
              && ( $index_start_one_line_block == UNDEFINED_INDEX )

              # or if it will not be on same line as its keyword, so that
              # it will be outdented (eval.t, overload.t) 
              || !$keyword_on_same_line

              )
            {

                # but only if allowed
                unless ($no_internal_newlines) {
                    output_line_to_go();
                }
            }

            store_token_to_go( $token, $type, $block_type,
              $no_internal_newlines, $level, $slevel );

            unless ($no_internal_newlines) {
                output_line_to_go();
            }
        }

        elsif ( $type eq '}' && $token eq '}' && $block_type ) {

            # If there is a pending one-line block, we have to check
            # that it is short enough, because the final length may have
            # changed.
            if ( $index_start_one_line_block != UNDEFINED_INDEX ) {

                if ( ( $semicolons_before_block_self_destruct == 0
                  && $last_nonblank_type ne ';' )
                  || is_one_line_block_too_long($level) )
                {
                    destroy_one_line_block();
                }
            }

            # put a break before this closing curly brace if appropriate
            unless ( $no_internal_newlines
              || $index_start_one_line_block != UNDEFINED_INDEX )
            {

                # add missing semicolon if appropriate
                if ( ( $max_index_to_go > 0 ) && ( $last_nonblank_token ne ';' )
                  && $rOpts->{'add-semicolons'} )
                {

                    my $lev  = $levels_to_go[$max_index_to_go];
                    my $slev = $nesting_depth_to_go[$max_index_to_go];
                    if ( $types_to_go[$max_index_to_go] eq 'b' ) {
                        $max_index_to_go--;
                    }
                    store_token_to_go( ';', ';', ' ', $no_internal_newlines,
                      $lev, $slev );
                    note_added_semicolon();
                }

                # then write out everything before this closing curly brace
                output_line_to_go();
            }

            # If a side comment follows this '}', set a flag to prevent
            # a line break between the two.  Otherwise, the side comment
            # would be promoted to a full line comment.
            if ( $next_nonblank_token_type eq '#' ) {
                $no_internal_newlines = 1;
            }

            # store the closing curly brace
            store_token_to_go( $token, $type, $block_type,
              $no_internal_newlines, $level, $slevel );

            # ok, we just stored a closing curly brace.  Often, but
            # not always, we want to end the line immediately.
            # So now we have to check for special cases.

            # if this '}' successfully ends a one-line block..
            if ( $index_start_one_line_block != UNDEFINED_INDEX ) {

                # we have to actually make it by removing tentative
                # breaks that were set within it
                undo_forced_breakpoint_stack(0);
                set_nobreaks( $index_start_one_line_block,
                  $max_index_to_go - 1 );

                # then re-initialize for the next one-line block
                destroy_one_line_block();

                # then decide if we want to break after the '}' ..
                # We will keep going to allow certain brace followers as in:
                #   do { $ifclosed = 1; last } unless $losing;
                #
                # But make a line break if the curly ends a significant block:
                if ( $block_type =~ /^(until|while|for|if|elsif|else)$/ ) {
                    output_line_to_go() unless ($no_internal_newlines);
                }
            }

            # set string indicating what we need to look for brace follower
            # tokens
            if ( $block_type =~ /^(do)$/ ) {
                $brace_follower_pattern = $do_follower_pattern;
            }
            elsif ( $block_type =~ /^(if|elsif)$/ ) {
                $brace_follower_pattern = $if_brace_follower_pattern;
            }
            elsif ( $block_type =~ /^(else)$/ ) {
                $brace_follower_pattern = '^$';
            }
            elsif ( $block_type =~ /^(sort|map|grep)$/ ) {
                $brace_follower_pattern = "";
            }

            # None of the above:
            # include here everything you would allow to follow a short block
            # which is not an if/elsif/else/do/sort/map/grep
            # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
            # break1.t
            else {
                $brace_follower_pattern = '^(\,|\;|:|=>|or|and|\&\&|\|\}|\))$';
            }

            # See if an elsif block is followed by another elsif or else;
            # complain if not.
            if ( $block_type eq 'elsif' ) {

                if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
                    $looking_for_else = 1;    # ok, check on next line
                }
                else {

                    unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
                        write_logfile_entry("No else block :(\n");
                    }
                }
            }

            # Note: continue blocks are always un-cuddled for now, but
            # this is the place to allow cuddled continues

            # keep going after these block types: map,sort,grep
            if ( $block_type =~ /^(sort|grep|map)$/ ) {
            }

            # if no more tokens, postpone decision until re-entring
            elsif ( ( $next_nonblank_token_type eq 'b' )
              && $rOpts->{'add-newlines'} )
            {
                unless ($brace_follower_pattern) {
                    output_line_to_go() unless ($no_internal_newlines);
                }
            }

            elsif ($brace_follower_pattern) {

                unless ( $next_nonblank_token =~ /$brace_follower_pattern/ ) {
                    output_line_to_go() unless ($no_internal_newlines);
                }
                $brace_follower_pattern = "";
            }

            else {
                output_line_to_go() unless ($no_internal_newlines);
            }

        }
        elsif ( $type eq ';' ) {

            $semicolons_before_block_self_destruct--;
            if ( ( $semicolons_before_block_self_destruct < 0 )
              || ( $semicolons_before_block_self_destruct == 0
              && $next_nonblank_token_type !~ /^[b\}]$/ ) )
            {
                destroy_one_line_block();
            }

            if ( ( $last_nonblank_token eq '}' )
              && ( $last_nonblank_block_type =~
              /^(if|else|elsif|unless|while|for|foreach)$/ ) )
            {

                if ( $rOpts->{'delete-semicolons'}

                  # don't delete ; before a # because it would promote it
                  # to a block comment
                  && ( $next_nonblank_token_type ne '#' ) )
                {
                    note_deleted_semicolon();
                    output_line_to_go()
                      unless ( $no_internal_newlines
                      || $index_start_one_line_block != UNDEFINED_INDEX );
                    next;
                }
                else {
                    write_logfile_entry("Extra ';'\n");
                }
            }
            store_token_to_go( $token, $type, $block_type,
              $no_internal_newlines, $level, $slevel );

            output_line_to_go()
              unless ( $no_internal_newlines
              || ( $next_nonblank_token eq '}' ) );

        }
        elsif ( $type eq 'h' ) {    # here_doc target string...
            $no_internal_newlines = 1;    # no newlines after seeing here-target
            destroy_one_line_block();
            store_token_to_go( $token, $type, $block_type,
              $no_internal_newlines, $level, $slevel );
        }
        else {

            if ( $type eq 'b' ) {
                $token = ' ' if $rOpts->{'add-whitespace'};

                if ( $rOpts->{'delete-old-whitespace'} ) {
                    next unless is_essential_whitespace(
                      $last_last_nonblank_token,
                      $last_last_nonblank_type,
                      $tokens_to_go[$max_index_to_go],
                      $types_to_go[$max_index_to_go],
                      $$rtokens[ $j + 1 ],
                      $$rtoken_type[ $j + 1 ]
                    );
                }
                my $ws = $$rwhite_space_flag[ $j + 1 ];

                if ( defined($ws) && $ws == -1 ) {

                    next unless is_essential_whitespace(
                      $last_last_nonblank_token,
                      $last_last_nonblank_type,
                      $tokens_to_go[$max_index_to_go],
                      $types_to_go[$max_index_to_go],
                      $$rtokens[ $j + 1 ],
                      $$rtoken_type[ $j + 1 ]
                    );
                }
            }

            store_token_to_go( $token, $type, $block_type,
              $no_internal_newlines, $level, $slevel );
        }

        # remember two previous nonblank OUTPUT tokens
        if ( $type !~ /^[#b]/ ) {
            $last_last_nonblank_token = $last_nonblank_token;
            $last_last_nonblank_type  = $last_nonblank_type;
            $last_nonblank_token      = $token;
            $last_nonblank_type       = $type;
            $last_nonblank_block_type = $block_type;

        }    # end of loop to output tokens

    }    # end of loop over all tokens in this 'line_of_tokens'

    # we have to flush ..
    if (

      # if there is a side comment
      ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )

      # if this line which ends in a quote
      || $in_quote

      # to keep a label on one line if that is how it is now
      || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )

      # if we are instructed to keep all old line breaks
      || !$rOpts->{'delete-old-newlines'} )
    {
        destroy_one_line_block();
        output_line_to_go();
    }

    # mark old line breakpoints in current output stream
    if ( $max_index_to_go >= 0 ) {
        $old_breakpoint_to_go[$max_index_to_go] = 1;
    }
}

sub note_added_semicolon {
    $last_added_semicolon_at = $input_line_number;
    if ( $added_semicolon_count == 0 ) {
        $first_added_semicolon_at = $last_added_semicolon_at;
    }
    $added_semicolon_count++;
    write_logfile_entry("Added ';' here\n");
}

sub note_deleted_semicolon {
    $last_deleted_semicolon_at = $input_line_number;
    if ( $deleted_semicolon_count == 0 ) {
        $first_deleted_semicolon_at = $last_deleted_semicolon_at;
    }
    $deleted_semicolon_count++;
    write_logfile_entry("Deleted unnecessary ';'\n");    # i hope ;)
}

sub note_embedded_tab {
    $embedded_tab_count++;
    $last_embedded_tab_at = $input_line_number;
    if ( !$first_embedded_tab_at ) {
        $first_embedded_tab_at = $last_embedded_tab_at;
    }

    if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
        write_logfile_entry("Embedded tabs in quote or pattern\n");
    }
}

sub starting_one_line_block {

    # after seeing an opening curly brace, look for the closing brace
    # and see if the entire block will fit on a line.  This routine is
    # not always right because it uses the old whitespace, so a check
    # is made later (at the closing brace) to make sure we really 
    # have a one-line block.  We have to do this preliminary check,
    # though, because otherwise we would always break at a semicolon
    # within a one-line block if the block contains multiple statements.

    my ( $j, $jmax, $level, $slevel, $rtokens, $rtoken_type, $rblock_type ) =
      @_;

    # kill any current block
    destroy_one_line_block();

    # return value:  
    #  1=distance from start of block to opening brace exceeds line length
    #  0=otherwise

    my $i_start = UNDEFINED_INDEX;

    if ( $max_index_to_go < 0 ) {
    }
    else {

        # cannot use one-line blocks with cuddled else else/elsif lines
        if ( ( $tokens_to_go[0] eq '}' ) && ( $rOpts->{'cuddled-else'} ) ) {
            return 0;
        }
    }

    my $block_type = $$rblock_type[$j];

    # find the starting keyword for this block (such as 'if', 'else', ...)

    if ( $block_type =~ /^[\{\}\;\:]$/ ) {
        $i_start =
          ( $max_index_to_go == UNDEFINED_INDEX ) ? 0 : $max_index_to_go + 1;
    }

    elsif ( $last_nonblank_token_to_go eq ')' ) {

        # For something like "if (xxx) {", the keyword "if" will be
        # just after the most recent break. This will be 0 unless
        # we have just killed a one-line block and are starting another.
        # (doif.t)
        $i_start = $index_max_forced_break + 1;
        if ( $types_to_go[$i_start] eq 'b' ) {
            $i_start++;
        }

        unless ( $tokens_to_go[$i_start] eq $block_type ) {
            return 0;
        }
    }

    # the previous nonblank token should start these block types
    elsif ( ( $last_nonblank_token_to_go eq $block_type )
      || ( $block_type =~ /^sub/ && $last_nonblank_token_to_go =~ /^sub/ ) )
    {
        $i_start = $last_nonblank_index_to_go;
    }

    else {
        return 1;
    }

    my $length = length_from_index($i_start);

    # FIXME: Need to correct the ci value here (ci.t)
    # (not critical though)
    my $ci   = ( $i_start == 0 ) ? 0 : $rOpts->{'continuation-indentation'};
    my $bpos = $length + $level * $rOpts->{'indent-columns'};
    my $pos  = $bpos;
    my $i;

    # see if length is too long to even start
    if ( $pos > $rOpts->{'maximum-line-length'} ) {
        return 1;
    }

    for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {

        # old whitespace could be arbitrarily large, so don't use it
        if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
        else { $pos += length( $$rtokens[$i] ) }

        # Return false result if we exceed the maximum line length,
        if ( $pos > $rOpts->{'maximum-line-length'} ) {
            return 0;
        }

        # or encounter another opening brace before finding the closing brace.
        elsif ( $$rtokens[$i] eq '{' && $$rtoken_type[$i] eq '{'
          && $$rblock_type[$i] )
        {
            return 0;
        }

        # if we find our closing brace..
        elsif ( $$rtokens[$i] eq '}' && $$rtoken_type[$i] eq '}'
          && $$rblock_type[$i] )
        {

            # be sure we have the correct block type .. shouldn't happen
            #            if ( $$rblock_type[$i] ne $block_type ) {
            #                unless ( $::warning_count > 0 ) {
            #                    warning(
#"Code bug? uncaught nesting error detected at block types: $block_type : $$rblock_type[$i]\n"
            #                    );
            #                    ::report_possible_bug();
            #                }
            #                return 0;
            #            }

            # be sure any trailing comment also fits on the line
            my $i_nonblank =
              ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;

            if ( $$rtoken_type[$i_nonblank] eq '#' ) {
                $pos += length( $$rtokens[$i_nonblank] );

                if ( $i_nonblank > $i + 1 ) {
                    $pos += length( $$rtokens[ $i + 1 ] );
                }

                if ( $pos > $rOpts->{'maximum-line-length'} ) {
                    return 0;
                }
            }

            # ok, it's a one-line block
            create_one_line_block( $i_start, $level, $ci, 20 );
            return 0;
        }

        # just keep going for other characters
        else {
        }
    }

    # Allow certain types of new one-line blocks to form by joining old
    # input lines.  These can be safely done, but for other block types,
    # we keep old one-line blocks but do not form new ones. It is not
    # always a good idea to make as many one-line blocks as possible,
    # so other types are not done.  The user can always use -mangle.
    # TODO: experiment with an option to combine any blocks
    if ( $block_type =~ /^(eval|map|grep|sort)/ ) {
        create_one_line_block( $i_start, $level, $ci, 1 );
    }

    return 0;
}

sub store_token_to_go {

    # Place one token into the output stream.
    # Called once per output token.

    BEGIN {

        # we want to line up with the added space BEFORE these tokens types:
        $before_pattern = '^(\{|#|=>|=|\+=|\-=|=~|\*=|\/=)$';
    }

    my ( $token, $type, $block_type, $no_internal_newlines, $level, $slevel ) =
      @_;

    $tokens_to_go[ ++$max_index_to_go ] = $token;
    $types_to_go[$max_index_to_go]             = $type;
    $nobreak_to_go[$max_index_to_go]           = $no_internal_newlines;
    $old_breakpoint_to_go[$max_index_to_go]    = 0;
    $forced_breakpoint_to_go[$max_index_to_go] = 0;
    $block_type_to_go[$max_index_to_go]        = $block_type;
    if ( $type ne 'b' ) {
        $last_nonblank_index_to_go = $max_index_to_go;
        $last_nonblank_type_to_go  = $type;
        $last_nonblank_token_to_go = $token;
    }

    # Set a flag for tokens which might be vertically aligned.  The flag
    # equals the token type being aligned.
    $matching_token_to_go[$max_index_to_go] = ' ';

    if ( $rOpts->{'add-whitespace'} ) {

        if ( $type =~ /$before_pattern/ ) {

            if ( ( $max_index_to_go > 1 )
              && ( $types_to_go[ $max_index_to_go - 1 ] eq 'b' )

              # but do not create blank field: ( align1.t and undoci.t )
              && ( $type eq '#'
              || $matching_token_to_go[ $max_index_to_go - 2 ] !~
              /$before_pattern/ ) )
            {
                $matching_token_to_go[$max_index_to_go] = $type;
            }
        }

        # We want to line up ',' and interior ';' tokens, with the added
        # space AFTER these tokens.  (Note: interior ';' is included
        # because it may occur in short blocks).
        elsif ( ( $type !~ /^[b\#\)\]\}]$/ ) && ( $max_index_to_go > 0 )
          && ( $last_nonblank_type =~ /^[\,\;]$/ )
          && $types_to_go[ $max_index_to_go - 1 ] eq 'b' )
        {
            $matching_token_to_go[$max_index_to_go] = $last_nonblank_type;
        }
    }
    $levels_to_go[$max_index_to_go]        = $level;
    $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
    $current_statement_length += length($token);
    $lengths_to_go[$max_index_to_go] = $current_statement_length;

    if ( $FORMATTER_DEBUG_FLAG{STORE} ) {
        print
"STORE: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
    }
}

sub want_blank_line {
    flush();
    $file_writer_object->want_blank_line();
}

sub write_unindented_line {
    flush();
    $file_writer_object->write_line( $_[0] );
}

=pod
flush() is called to output any tokens in the pipeline, so that
an alternate source of lines can be written in the correct order
=cut
sub flush {
    output_line_to_go();
    Perltidy::VerticalAligner::flush();
}

=pod
 output_line_to_go() sends one logical line of tokens on down the
 pipeline to the VerticalAligner package, breaking the line into continuation
 lines as necessary.  The line of tokens is ready to go in the "to_go"
 arrays.
=cut
sub output_line_to_go {

    # debug stuff; this routine can be called from many points
    if ( $FORMATTER_DEBUG_FLAG{OUTPUT} ) {
        my ( $a, $b, $c ) = caller;
        write_diagnostics(
"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
        );
        my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
        write_diagnostics("$output_str\n");
    }

    # just set a tentative breakpoint if we might be in a one-line block
    if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
        set_forced_breakpoint($max_index_to_go);
        return;
    }

    my $imin = 0;
    my $imax = $max_index_to_go;

    # trim any blank tokens
    if ( $max_index_to_go >= 0 ) {
        if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
        if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
    }

    # anything left to write?
    if ( $imin <= $imax ) {

        # add a blank line before certain key types
        if ( ( $last_line_leading_type ne 'b' )
          && ( $last_line_leading_type ne '#' ) )
        {
            my $want_blank = 0;

            if ( $tokens_to_go[$imin] =~ /^(unless|if|while|until)$/ ) {

                # We do not want to add a blank line above a short block.
                # If a line doesn't end in a '}', it's probably a long block
                $want_blank =    # long blocks only
                  ( $rOpts->{'blanks-before-blocks'} )
                  && ( '}' ne
                  terminal_type( \@types_to_go, \@block_type_to_go, $imin,
                  $imax )
                  && ( $file_writer_object->get_consecutive_nonblank_lines() >=
                  5 ) );
            }
            elsif ( $tokens_to_go[$imin] =~ /^(sub\s)/ ) {

                # no blank lines before sub declarations and one-liners
                $want_blank =
                  ( $rOpts->{'blanks-before-subs'} )
                  && ( terminal_type( \@types_to_go, \@block_type_to_go, $imin,
                  $imax ) !~ /^[\;\}]$/ );
            }
            else {
                $want_blank =
                  ( $rOpts->{'blanks-before-blocks'} )
                  && ( ( $tokens_to_go[$imin] =~ /^(package\s)/ )
                  || ( ( $file_writer_object->get_consecutive_nonblank_lines()
                  >= 5 )
                  && ( $tokens_to_go[$imin] =~ /^(END|BEGIN|for|foreach)$/ ) )
                );
            }

            if ($want_blank) {

                # future: send blank line down normal path to VerticalAligner
                Perltidy::VerticalAligner::flush();
                $file_writer_object->write_blank_code_line();
            }
        }
        $last_line_leading_type = $types_to_go[$imin];

        if ( $FORMATTER_DEBUG_FLAG{FLUSH} ) {
            my ( $package, $file, $line ) = caller;
            print
"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
        }

        my $leader_length = $levels_to_go[$imin] * $rOpts->{'indent-columns'};

        if ( $nesting_depth_to_go[$imin] > $levels_to_go[$imin]
          || $continuation_flag )
        {
            $leader_length += $rOpts->{'continuation-indentation'};
        }

        my $is_short_line =
          ( $current_statement_length + $leader_length <=
          $rOpts->{'maximum-line-length'} );

        pad_array_to_go();
        scan_list();

        # setup continuation break points
        my ( $ri_first, $ri_last );

        # write a single line if requested or possible
        if ( !$rOpts->{'add-newlines'}
          || ( !$forced_breakpoint_count && $is_short_line ) )
        {
            @$ri_first = ($imin);
            @$ri_last  = ($imax);
        }

        # otherwise use multiple lines
        else {
            ( $ri_first, $ri_last ) = set_continuation_breaks();

            if ( $rOpts->{'recombine'} ) {
                ( $ri_first, $ri_last ) =
                  recombine_breakpoints( $ri_first, $ri_last );
            }
        }

        # loop to dump out the line(s)
        for my $n ( 0 .. @$ri_first - 1 ) {
            my $ibeg     = $$ri_first[$n];
            my $iend     = $$ri_last[$n];
            my @patterns = ();
            my @tokens   = ();
            my @fields   = ();
            my $i_start  = $ibeg;
            my $i;

            my $j = 0;    # field index

            $patterns[0] = "";
            for $i ( $ibeg .. $iend ) {

                if ( ( $i > $i_start ) && ( $matching_token_to_go[$i] ne ' ' ) )
                {

                    # make separators in different nesting depths unique
                    # by appending the nesting depth digit.
                    my $tok = $matching_token_to_go[$i];
                    if ( $tok ne '#' ) {
                        $tok .= "$nesting_depth_to_go[$i]";
                    }

                    push ( @fields,
                      join ( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );

                    push ( @tokens, $tok );
                    $i_start = $i;
                    $j++;
                    $patterns[$j] = "";
                }

                if ( $types_to_go[$i] ne 'k' ) {
                    my $type = $types_to_go[$i];

                    # Mark most things before arrows as a quote to
                    # get them to line up. Testfile: mixed.pl.
                    if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
                        my $next_type       = $types_to_go[ $i + 1 ];
                        my $i_next_nonblank =
                          ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );

                        if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
                            $type = 'Q';
                        }
                    }
                    $patterns[$j] .= $type;
                }
                else {

                    # a little patch to keep one-line blocks lined up
                    # in long if/elsif sequences. my testfile: elsif.pl
                    my $tok = $tokens_to_go[$i];
                    if ( $tok eq 'elsif' ) { $tok = 'if' }
                    $patterns[$j] .= $tok;
                }
            }
            push ( @fields, join ( '', @tokens_to_go[ $i_start .. $iend ] ) );
            my $terminal_type =
              terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );

            output_indented_line(
              $ibeg, $iend, \@fields, \@tokens, \@patterns,
              $levels_to_go[$ibeg], $nesting_depth_to_go[$ibeg], $terminal_type,
              $forced_breakpoint_to_go[$iend]
            );
        }
    }
    prepare_for_a_new_line();
}

sub terminal_type {

=pod
	returns type of last token on this line (terminal token), as follows:
	returns # for a full-line comment
	returns ' ' for a blank line
	otherwise returns final token type
=cut
    my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;

    # check for full-line comment..
    if ( $$rtype[$ibeg] eq '#' ) {
        return $$rtype[$ibeg];
    }
    else {

        # start at end and walk bakwards..
        for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {

            # skip past any side comment and blanks
            next if ( $$rtype[$i] =~ /^[\#b]$/ );

            # found it..
            # but hide a terminal } after sort/grep/map because it is not
            # necessarily the end of the line.  (terminal.t)
            my $terminal_type = $$rtype[$i];
            if ( $terminal_type eq '}'
              && $$rblock_type[$i] =~ /^(sort|grep|map|do|eval)$/ )
            {
                $terminal_type = 'b';
            }
            return ($terminal_type);
        }

        # empty line
        return ' ';
    }
}

sub output_indented_line {

=pod
 this routine completes one indented line, except for any adjustable
 whitespace to be added by the Perltidy::VerticalAligner package.
 the input parameters are:

	$output_line is character string to be written
	$level is the indentation level of this line
	$terminal_type = last token type (before any side comment)
 
 These global variables are used:

	$last_output_level = level of previously written line
	$continuation_flag = 1 if next line is a continuation, = 0 if not
	$saw_negative_indentation = 1 if we have written error message
	$rOpts->{'continuation-indentation'} = how many extra continuation spaces 

    Note: retain debug print statements because they are usually needed
    after new token types are added.

=cut
    my ( $ibeg, $iend, $rfields, $rtokens, $rpatterns, $level, $slevel,
      $terminal_type, $is_forced_break ) = @_;

#print "INDENT: type=$$rpatterns[0],  tok=$$rfields[0], terminal_type = $terminal_type\n";

    # patch for handling bad scripts
    if ( $level < 0 ) { $level = 0; }

    #----------------------------------------------------------------
    # update continuation flag when level changes
    #----------------------------------------------------------------
    if ( $level != $last_output_level ) {

        # give '}' continuation indentation if necessary ( overload.t, bless.t )
        # Note: added '\{' to version 20010304 to terminal_type test 
        # to fully outdent something like '} else {'

        if ( $$rpatterns[0] =~ /^[\}]/ && $terminal_type !~ /[\{\}\;]/ ) {
            $continuation_flag = 1;

            # added to 20010328: but not if this is a cuddled-else
            if ( $rOpts->{'cuddled-else'} ) {
                my $line = join ( "", @$rpatterns );
                if ( $line =~ /^\}b*(if|else|elsif)b*\(/ ) {
                    $continuation_flag = 0;
                }
            }

        }

        # otherwise, turn of continuation flag when level changes
        else {
            $continuation_flag = 0;
        }
    }

    #----------------------------------------------------------------
    # set the continuation indentation to be used for this line
    #----------------------------------------------------------------
    my $ci = '';

    # undo continuation indentation at an opening brace, so we get
    # this effect:
    #
    #  if ( xxx 
    #	 && yyyyy )
    #  {				# <-- no continuation indentation here
    #    bla bla
    #  }
    #
    # But do not undo it if we are in a list, which can be determined
    # from variables $slevel (total level, including parens) and $level
    # indentation (excluding parens).
    $slevel -= $level;

    # correct for different centering of the two level variables
    if ( $slevel > 0
      && ( $$rpatterns[0] =~ /^[\}\)\]]/ && !$rOpts->{'indent-closing-brace'} )
      )
    {
        $slevel -= 1;
    }

    if ( $slevel || $continuation_flag ) {
        my $skip_ci = 0;

        # temporary outdent of code block '{'  ( foreach.t )
        if ( !$slevel && $terminal_type =~ /^[\{]/ ) {
            my $line = join ( "", @$rpatterns );
            if ( $line =~ /^b*\{b*#?$/ && $block_type_to_go[$ibeg] ) {
                $skip_ci = 1;
            }
        }

        # undo continuation indentation ..
        if ( !$slevel

          # (unless told not to)
          && !$rOpts->{'indent-closing-brace'}

          # at ');'   ( slevel.t )
          && ( ( $$rpatterns[0] =~ /^\)b*;/ )

          # at ');', '};', '>;', and '];' of a terminal qw quote
          || ( $$rpatterns[0] =~ /^qb*;$/ && $$rfields[0] =~ /^[\)\}\]\>];$/ ) )
          )
        {
            $skip_ci = 1;
        }

        unless ($skip_ci) {
            $ci = ' ' x $rOpts->{'continuation-indentation'};
        }
    }

    if ( $FORMATTER_DEBUG_FLAG{CI} ) {
        print
"CI: level=$level slevel=$slevel terminal=$terminal_type ci=$continuation_flag\n";
    }

    # EXPERIMENTAL new option - looks ugly
    #if ($rOpts->{'indent-closing-brace'} && $$rfields[0] =~ /^[\}]/) {
    #  $level++;
    #}

    #----------------------------------------------------------------
    # write the line
    #----------------------------------------------------------------
    Perltidy::VerticalAligner::append( $level, $ci, $rfields, $rtokens,
      $rpatterns, $is_forced_break );
    $last_output_level = $level;

    #----------------------------------------------------------------
    # update the continuation flag unless this is a full-line comment
    #----------------------------------------------------------------
    if ( $terminal_type ne '#' ) {

        # do not add extra indentation to a line following a single-line label
        my $is_label = !$continuation_flag && $terminal_type eq 'J';

        $continuation_flag = ( $terminal_type !~ /^[\;\}\{]$/ )
          unless ($is_label);
    }
}

sub set_bond_strengths {

    BEGIN {

        ###############################################################
        # NOTE: NO_BREAK's set here are HINTS which may not be honored; 
        # essential NO_BREAKS's must be enforced in section 2, below.
        ###############################################################

        # adding NEW_TOKENS: add a left and right bond strength by
        # mimmicking what is done for an existing token type.  You
        # can skip this step at first and take the default, then
        # tweak later to get desired results.

        # The bond strengths should roughly follow precenence order where
        # possible.  If you make changes, please check the results very
        # carefully on a variety of scripts.

        # no break around possible filehandle
        $left_bond_strength{'Z'}  = NO_BREAK;
        $right_bond_strength{'Z'} = NO_BREAK;

        # never put a bare word on a new line:
        # example print (STDERR, "bla"); will fail with break after (
        $left_bond_strength{'w'} = NO_BREAK;

        # blanks always have infinite strength to force breaks after real tokens
        $right_bond_strength{'b'} = NO_BREAK;

        # try not to break on exponentation
        my @strong = qw" ** .. ... <=> ";
        @left_bond_strength{@strong}  = (STRONG) x scalar(@strong);
        @right_bond_strength{@strong} = (STRONG) x scalar(@strong);

        # The comma-arrow has very low precedence but not a good break point
        $left_bond_strength{'=>'}  = NO_BREAK;
        $right_bond_strength{'=>'} = NOMINAL;

        # ok to break after label
        $left_bond_strength{'J'}  = NO_BREAK;
        $right_bond_strength{'J'} = NOMINAL;
        $left_bond_strength{'j'}  = STRONG;
        $right_bond_strength{'j'} = STRONG;

        $left_bond_strength{'->'}  = STRONG;
        $right_bond_strength{'->'} = VERY_STRONG;

        # breaking AFTER these is just ok:
        my @rbrk = qw" % + - * / x  ";
        @left_bond_strength{@rbrk}  = (STRONG) x scalar(@rbrk);
        @right_bond_strength{@rbrk} = (NOMINAL) x scalar(@rbrk);

        # breaking BEFORE these is just ok:
        my @rbrk = qw" >> << ";
        @right_bond_strength{@rbrk} = (STRONG) x scalar(@rbrk);
        @left_bond_strength{@rbrk}  = (NOMINAL) x scalar(@rbrk);

        # I prefer breaking before the string concatenation operator
        # because it can be hard to see at the end of a line
        # swap these to break after a '.' 
        # this could be a future option
        $right_bond_strength{'.'} = STRONG;
        $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;

        my @misc = qw"} ] ) ";
        @left_bond_strength{@misc}  = (STRONG) x scalar(@misc);
        @right_bond_strength{@misc} = (NOMINAL) x scalar(@misc);

        # make these a little weaker than nominal so that they get
        # favored for end-of-line characters
        my @misc = qw"!= == >= <= =~ !~";
        @left_bond_strength{@misc}  = (STRONG) x scalar(@misc);
        @right_bond_strength{@misc} =
          ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@misc);

        # break AFTER these 
        my @rbrk = qw" < >  | & >= <=";
        @left_bond_strength{@rbrk}  = (VERY_STRONG) x scalar(@rbrk);
        @right_bond_strength{@rbrk} =
          ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@rbrk);

        # breaking either before or after a quote is ok
        $left_bond_strength{'Q'}  = NOMINAL;
        $right_bond_strength{'Q'} = NOMINAL;
        $left_bond_strength{'q'}  = NOMINAL;
        $right_bond_strength{'q'} = NOMINAL;

        # starting a line with a keyword is usually ok
        $left_bond_strength{'k'} = NOMINAL;

        # we usually want to bond a keyword strongly to what immediately
        # follows, rather than leaving it stranded at the end of a line
        $right_bond_strength{'k'} = STRONG;

        $left_bond_strength{'G'}  = NOMINAL;
        $right_bond_strength{'G'} = STRONG;

        # it is very good to break AFTER various assignment operators
        my @assignment = qw(
          = **= += *= &= <<= &&=
          -= /= |= >>= ||=
          .= %= ^=
          x=
        );
        @left_bond_strength{@assignment}  = (STRONG) x scalar(@assignment);
        @right_bond_strength{@assignment} =
          ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@assignment);

        $left_bond_strength{';'}  = VERY_STRONG;
        $right_bond_strength{';'} = VERY_WEAK;
        $left_bond_strength{'f'}  = VERY_STRONG;

        # make right strength of for ';' a little less than '='
        # to make for contents break after the ';' to avoid this:
        # 	for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
        # 	  $number_of_fields )
        $right_bond_strength{'f'} = 0.3 * WEAK + 0.7 * VERY_WEAK;

        # break BEFORE these so that they start a continuation line:
        $right_bond_strength{'&&'} = NOMINAL;
        $left_bond_strength{'&&'}  = 0.5 * WEAK + 0.5 * VERY_WEAK;
        $right_bond_strength{'||'} = NOMINAL;
        $left_bond_strength{'||'}  = 0.4 * WEAK + 0.6 * VERY_WEAK;

        # break after a colon
        # The strengths of these should be somewhere between 
        # an '=' and a quote (NOMINAL),
        # make strength of ':' slightly less than '?' to help
        # break long chains of ? : after the colons
        $right_bond_strength{':'} = 0.6 * WEAK + 0.4 * NOMINAL;
        $left_bond_strength{':'}  = NO_BREAK;
        $right_bond_strength{'?'} = 0.4 * WEAK + 0.6 * NOMINAL;
        $left_bond_strength{'?'}  = NO_BREAK;

        $left_bond_strength{','}  = VERY_STRONG;
        $right_bond_strength{','} = VERY_WEAK;
    }

    # patch-its always ok to break at end of line
    $nobreak_to_go[$max_index_to_go] = 0;

    # adding a small 'bias' to strengths is a simple way to make a line
    # break at the first of a sequence of identical terms.  For example,
    # to force long string of conditional operators to break with 
    # each line ending in a ':', we can add a small number to the bond
    # strength of each ':'
    my $colon_bias = 0;
    my $code_bias  = -.01;

    # preliminary loop to compute bond strengths
    for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
        my $type       = $types_to_go[$i];
        my $token      = $tokens_to_go[$i];
        my $block_type = $block_type_to_go[$i];

        my $i_next     = $i + 1;
        my $next_type  = $types_to_go[$i_next];
        my $next_token = $tokens_to_go[$i_next];
        my $total_nesting_depth = $nesting_depth_to_go[$i_next];

        my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );

        my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
        my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];

        # Some token chemistry...  The decision about where to break a
        # line depends upon a "bond strength" between tokens.  The LOWER
        # the bond strength, the MORE likely a break.  The strength
        # values are based on trial-and-error, and need to be tweaked
        # occasionally to get desired results.  Things to keep in mind
        # are:
        # 	1. relative strengths are important
        #	2. each indentation level adds one unit of bond strength
        #	3. a value of NO_BREAK makes an unbreakable bond
        # We are computing the strength of the bond between the current
        # token and the NEXT token.
        my $bond_str = VERY_STRONG;    # a default, high strength

        #---------------------------------------------------------------
        # section 1:
        # use minimum of left and right bond strengths if defined;
        # digraphs and trigraphs like to break on their left
        #---------------------------------------------------------------
        my $bsr = $right_bond_strength{$type};

        if ( !defined($bsr) ) {

            if ( $is_digraph{$type} || $is_trigraph{$type} ) {
                $bsr = STRONG;
            }
            else {
                $bsr = VERY_STRONG;
            }
        }

        if ( $token =~ /^(and|or)$/ ) {
            $bsr = NOMINAL;
        }
        elsif ( $token =~ /^(ne|eq)$/ ) {
            $bsr = NOMINAL;
        }
        my $bsl = $left_bond_strength{$next_nonblank_type};

        if ( !defined($bsl) ) {

            if ( $is_digraph{$next_nonblank_type}
              || $is_trigraph{$next_nonblank_type} )
            {
                $bsl = WEAK;
            }
            else {
                $bsl = VERY_STRONG;
            }
        }

        if ( $next_nonblank_token =~ /^(and|or)$/ ) {
            $bsl = VERY_WEAK;
        }
        elsif ( $next_nonblank_token =~ /^(ne|eq)$/ ) {

            #$bsl = WEAK;
            $bsl = NOMINAL;
        }
        elsif ( $next_nonblank_token =~ /^(lt|gt|le|ge)$/ ) {
            $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
        }

        # Note: it might seem that we would want to keep a NO_BREAK if
        # either token has this value.  This didn't work, because in an
        # arrow list, it prevents the comma from separating from the
        # following bare word (which is probably quoted by its arrow).
        # So necessary NO_BREAK's have to be handled as special cases
        # in the final section.
        $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
        my $bond_str_1 = $bond_str;

        #---------------------------------------------------------------
        # section 2:
        # special cases
        #---------------------------------------------------------------
        # good to break before 'if', 'unless', etc
        if ( $if_brace_follower_pattern
          && ( $next_nonblank_token =~ /$if_brace_follower_pattern/ ) )
        {
            $bond_str = VERY_WEAK;
        }

        # keywords like 'unless' 'if' make good breaks
        if ( $do_follower_pattern && ( $next_nonblank_type eq 'k' )
          && ( $next_nonblank_token =~ /$do_follower_pattern/ ) )
        {
            $bond_str = VERY_WEAK / 1.05;
        }

        # allow long lines before final { in an if statement, as in:
        #    if (..........
        #      ..........)
        #    {
        #
        # Otherwise, the line before the { tends to be too short.
        if ( ( $type eq ')' ) && ( $next_nonblank_type eq '{' ) ) {
            $bond_str = VERY_WEAK;
        }

        if ( ( $type eq '(' ) && ( $next_nonblank_type eq '{' ) ) {
            $bond_str = NOMINAL;
        }

        # break on something like '} (', but keep this stronger than a ','
        # example is in 'howe.pl'
        if ( ( $type =~ /^[R\}]$/ ) && ( $next_nonblank_type eq '(' ) ) {
            $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
        }

        # in long ?: conditionals, bias toward just one set per line (colon.t)
        if ( $type eq ':' ) {
            $bond_str   += $colon_bias;
            $colon_bias += 0.0001;
        }

        # keep matrix and hash indices together
        # but make them a little below STRONG to allow breaking open
        # something like {'some-word'}{'some-very-long-word'} at the }{
        # (bracebrk.t)
        if ( ( $type =~ /^[\]R]$/ ) && ( $next_nonblank_type =~ /^[\[L]$/ ) ) {
            $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
        }

        # increase strength to the point where a break in the following
        # will be after the opening paren rather than at the arrow:
        #    $a->$b($c);
        if ( ( $type eq 'i' ) && ( $next_nonblank_type eq 'i' )
          && ( $next_nonblank_token =~ /^->/ ) )
        {
            $bond_str = 1.5 * STRONG;
        }

        # good to break after end of code blocks
        if ( $type eq '}' && $block_type ) {

            $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
            $code_bias += 0.0001;
        }

        if ( $type eq 'k' ) {

            # allow certain control keywords to stand out
            if ( ( $next_nonblank_type eq 'k' )
              && ( $token =~ /^(last|next|redo|return)$/ ) )
            {
                $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
            }

            # Don't break after keyword my.  This is a quick fix for a
            # rare problem. An example is this line from file Container.pm:
# foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )

            if ( $token eq 'my' ) {
                $bond_str = NO_BREAK;
            }
        }

        # try not to break before a comma-arrow
        if ( $next_nonblank_type eq '=>' ) {
            if ( $bond_str < STRONG ) { $bond_str = STRONG }
        }

        if ( $type =~ /^[wCU]$/ ) {

            # use strict requires that bare word and => not be separated
            if ( $next_nonblank_type eq '=>' ) {
                $bond_str = NO_BREAK;
            }

            # keep label and colon together
            #elsif ( $next_nonblank_type eq ':' ) {
            #    $bond_str = NO_BREAK;
            #}
        }

        # use strict requires that bare word within braces not start new line
        if ( $type eq 'L' ) {

            if ( $next_nonblank_type eq 'w' ) {
                $bond_str = NO_BREAK;
            }
        }

        if ( $type eq 'w' ) {

            if ( $next_nonblank_type eq 'R' ) {
                $bond_str = NO_BREAK;
            }
        }

        # in fact, use strict hates bare words on any new line.  For example,
        # a break before the underscore here provokes the wrath of use strict:
        # 	if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
        if ( $type eq 'F' ) {
            $bond_str = NO_BREAK;
        }

        # use strict does not allow separating type info from trailing { }
        # testfile is readmail.pl
        if ( $type =~ /^[ti]$/ ) {

            if ( $next_nonblank_type eq 'L' ) {
                $bond_str = NO_BREAK;
            }
        }

        # Never break before a ? or / because perl can mistake it for
        # the start of a pattern in some cases.
        # ( prnterr3.t and print2.t )
        if ( $next_nonblank_type =~ /^[\/\?]$/ ) {
            $bond_str = NO_BREAK;
        }

        # Never break around a possible file handle
        if ( ( $type eq 'Z' ) || ( $next_nonblank_type eq 'Z' ) ) {
            $bond_str = NO_BREAK;
        }

        # patch to put cuddled elses back together when on multiple
        # lines, as in:	} \n else \n { \n
        if ( $rOpts->{'cuddled-else'} ) {

            if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
              || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
            {
                $bond_str = NO_BREAK;
            }
        }

        # keep '}' together with ';'
        if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
            $bond_str = NO_BREAK;
        }

        # impossible break
        if ( ( $type eq 'w' ) && ( $next_nonblank_type eq '(' ) ) {
            $bond_str = NO_BREAK;
        }

        #---------------------------------------------------------------
        # section 3:
        # now take nesting depth into account
        #---------------------------------------------------------------
        # final strength incorporates the bond strength and nesting depth
        my $strength;

        if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
            if ( $total_nesting_depth > 0 ) {
                $strength = $bond_str + $total_nesting_depth;
            }
            else {
                $strength = $bond_str;
            }
        }
        else {
            $strength = NO_BREAK;
        }

        # always break after side comment
        if ( $type eq '#' ) { $strength = 0 }

        $bond_strength_to_go[$i] = $strength;

        if ( $FORMATTER_DEBUG_FLAG{BOND} ) {
            my $str = substr( $token, 0, 15 );
            $str .= ' ' x ( 16 - length($str) );
            print
"BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
        }
    }
}

sub pad_array_to_go {

    # to simplify coding in scan_list and set_bond_strengths, it helps
    # to create some extra blank tokens at the end of the arrays
    $tokens_to_go[ $max_index_to_go + 1 ]        = '';
    $tokens_to_go[ $max_index_to_go + 2 ]        = '';
    $types_to_go[ $max_index_to_go + 1 ]         = 'b';
    $types_to_go[ $max_index_to_go + 2 ]         = 'b';
    $nesting_depth_to_go[ $max_index_to_go + 1 ] =
      $nesting_depth_to_go[$max_index_to_go];

    if ( $types_to_go[$max_index_to_go] =~ /^[R\}\)\]]$/ ) {
        if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {

            # shouldn't happen:
            unless ( get_saw_brace_error() ) {
                warning(
"Program bug in scan_list: hit nesting error which should have been caught\n"
                );
                report_definite_bug();
            }
        }
        else {
            $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
        }
    }
    elsif ( $types_to_go[$max_index_to_go] =~ /^[L\{\(\[]$/ ) {
        $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
    }
}

sub scan_list {

    # Mark breakpoints for any lists that can be vertically aligned.  
    # The output of this routine is stored in the array 
    # @forced_breakpoint_to_go, which is used to set final breakpoints.

    my $starting_depth = $nesting_depth_to_go[0];
    my $current_depth  = $starting_depth;
    my @comma_index    = ();
    my @opening_structure_index_stack = ();

    my @breakpoint_stack = ();
    my $starting_breakpoint_count = $forced_breakpoint_count;

    my $depth;
    my $last_nonblank_type  = ';';
    my $last_nonblank_token = ';';
    my $i_last_nonblank_token;
    my @max_length                 = ();
    my @comma_index                = ();
    my $item_count                 = 0;
    my @item_count_stack           = ();
    my @identifier_count_stack     = ();
    my @old_breakpoint_count_stack = ();
    my @breakpoint_undo_stack      = ();
    my @container_type             = ();
    my $old_breakpoint_count       = 0;
    my $saw_comma                  = 0;
    $item_count_stack[$current_depth]       = 0;
    $identifier_count_stack[$current_depth] = 0;

    if ( !defined( $dont_align[$current_depth] ) ) {
        $dont_align[$current_depth] = 0;
    }
    if ( !defined( $interrupted_list[$current_depth] ) ) {
        $interrupted_list[$current_depth] = 0;
    }

    $opening_structure_index_stack[$current_depth] = -1;
    $breakpoint_stack[$current_depth] = $forced_breakpoint_count;

    my $block_type = ' ';
    my $type       = ';';
    my $token      = ';';
    my $i = -1;

    # loop over all tokens in this batch
    while ( ++$i <= $max_index_to_go ) {
        $old_breakpoint_count++ if ( $old_breakpoint_to_go[$i] );
        if ( $type ne 'b' ) {
            $i_last_nonblank_token = $i - 1;
            $last_nonblank_type    = $type;
            $last_nonblank_token   = $token;
        }
        $type       = $types_to_go[$i];
        $block_type = $block_type_to_go[$i];
        $token      = $tokens_to_go[$i];
        my $next_type           = $types_to_go[ $i + 1 ];
        my $next_token          = $tokens_to_go[ $i + 1 ];
        my $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
        my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
        my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
        next if ( $type eq 'b' );
        $depth = $nesting_depth_to_go[ $i + 1 ];

        #print "LIST: i=$i type=$type  tok=$token  depth=$depth\n";

        # prepare for a new list when depth increases
        # token $i is a '(','{', or '['
        if ( $depth > $current_depth ) {

            $item_count_stack[$current_depth] = $item_count;
            $item_count = 0;
            $item_count_stack[$depth]              = 0;
            $identifier_count_stack[$depth]        = 0;
            $want_comma_break[$depth]              = 0;
            $opening_structure_index_stack[$depth] = $i;
            $interrupted_list[$depth]              = 0;
            $breakpoint_stack[$depth]              = $forced_breakpoint_count;
            $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
            $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;

            $container_type[$depth] = $last_nonblank_token;

            # Not all lists of values should be vertically aligned..
            $dont_align[$depth] =

              # code blocks are handled at a higher level
              ( $block_type ne "" )

              # certain paren lists
              || ( $type eq '(' ) && (

              # it does not usually look good to align a list of
              # identifiers in a parameter list, as in:
              #    my($var1, $var2, ...) 
              # (This test should probably be refined, for now I'm just
              # testing for any keyword)
              ( $last_nonblank_type =~ /^[k]$/ )

              # a trailing '(' usually indicates a non-list
              || ( $next_nonblank_type =~ /^[\(]$/ ) );
        }

        # finish off any old list when depth decreases
        # token $i is a ')','}', or ']'
        elsif ( $depth < $current_depth ) {

            my $i_opening = $opening_structure_index_stack[$current_depth];
            my $saw_opening_structure =
              ( defined($i_opening) && $i_opening >= 0 );

            # We've set breaks after all comma-arrows.  Now we have to
            # undo them if this can be a one-line block
            # (the only breakpoints set will be due to comma-arrows)
            if ( !$rOpts->{'break-after-comma-arrows'}

              # if the opening structure is in this batch
              && $saw_opening_structure

              # and on the same old line 
              && ( $old_breakpoint_count_stack[$current_depth] ==
              $old_breakpoint_count )

              # and we made some breakpoints between the opening and closing
              && ( $breakpoint_undo_stack[$current_depth] <
              $forced_breakpoint_undo_count )

              # and this block is short enough to fit on one line
              && ( $current_depth * $rOpts->{'indent-columns'} +
              $rOpts->{'continuation-indentation'} + $lengths_to_go[$i] -
              $lengths_to_go[$i_opening] + 1 < $rOpts->{'maximum-line-length'} )

              #
              )
            {
                undo_forced_breakpoint_stack( $breakpoint_undo_stack
                  [$current_depth] );
            }

            if ( $item_count > 0 ) {
                set_list_breakpoints(
                  $i_opening,
                  $i,
                  $item_count,
                  $identifier_count_stack[$current_depth],
                  $comma_index[$current_depth],
                  $interrupted_list[$current_depth],
                  $max_length[$current_depth] )
                  unless $dont_align[$current_depth];
            }

            # if we created breaks between the opening and closing
            # brace, then put breaks within them to expose their contents
            if ( !$block_type && ( ( $breakpoint_stack[$current_depth] !=
              $forced_breakpoint_count ) || !$saw_opening_structure ) )
            {

                # break after opening structure
                if ( $starting_depth <= $current_depth ) {
                    my $i_opening =
                      $opening_structure_index_stack[$current_depth];
                    set_forced_breakpoint($i_opening);

                    # break before opening structure if preeced by another
                    # closing structure and a comma.  This is normally
                    # done by the previous closing brace, but not
                    # if it was a one-line block.
                    if ( $i_opening > 2 ) {
                        my $i_prev =
                          ( $types_to_go[ $i_opening - 1 ] eq 'b' ) ?
                          $i_opening - 2 : $i_opening - 1;

                        if ( $types_to_go[$i_prev] eq ','
                          && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
                        {
                            set_forced_breakpoint($i_prev);
                        }
                    }
                }

                # break before } and ] 
                set_forced_breakpoint( $i - 1 )

                  # but not a )
                  if ( ( $type ne ')' )

                  # unless preced by a ','
                  || ( $last_nonblank_type eq ',' )

                  # or followed by a ';' and there are some breaks
                  || ( ( $next_nonblank_type eq ';' )
                  && ( $breakpoint_stack[$current_depth] !=
                  $forced_breakpoint_count ) ) );

                # break after comma following closing structure
                if ( $next_type eq ',' ) {
                    set_forced_breakpoint( $i + 1 );
                }

                # break before an '=' following closing structure
                if ( $next_nonblank_type eq '='
                  && ( $breakpoint_stack[$current_depth] !=
                  $forced_breakpoint_count ) )
                {
                    set_forced_breakpoint($i);
                }
            }

            if ( $depth < $starting_depth ) {
                $item_count_stack[$depth]       = 0;
                $identifier_count_stack[$depth] = 0;
                $breakpoint_stack[$depth]       = $starting_breakpoint_count;
            }
            $item_count = $item_count_stack[$depth];
        }

        $current_depth = $depth;

        # handle comma-arrow
        if ( $type eq '=>' ) {
            $want_comma_break[$depth] = 1;
            $saw_comma = 1;
            next;
        }

        # we aren't in a list if we see any of these
        # note that '=' could be in any of the = operators  ( lextest.t )
        if ( $type =~ /(^[\?\:\;\<\>\~]$)|[=]/ || ( !$want_comma_break[$depth]
          && ( $next_nonblank_type !~ /^[L\[\{]$/ ) && ( $token =~ /^->/ ) )

          #|| ( !$want_comma_break[$depth] && $type eq 'i' && $token =~ /^->/ )
          #|| ( !$want_comma_break[$depth] && $type eq '->' ) 
          )
        {
            $dont_align[$depth]       = 1;
            $want_comma_break[$depth] = 0;
        }

        # any side comment in a list forces use of old breakpoints
        if ( $type eq '#' ) {
            $interrupted_list[$depth] = 1;
        }

        # now just handle any commas
        next unless ( $type eq ',' );
        $saw_comma = 1;

        # break here if this comma follows a '=>'
        # but not if there is a side comment after the comma
        if ( $want_comma_break[$depth] ) {
            set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
            $want_comma_break[$depth] = 0;

            # hack to handle list which mixes '=>'s and ','s:
            # treat any list items so far as a broken list

            # EXPERIMENTAL .. deactivated
            #for ( my $dd = $depth ; $dd >= $starting_depth ; $dd-- ) {
            my $dd = $depth;
            $item_count = $item_count_stack[$dd];
            if ( $item_count > 0 ) {
                if ( $interrupted_list[$dd] == 0 ) {
                    set_forced_breakpoint( $opening_structure_index_stack[$dd]
                    );
                    $interrupted_list[$dd] = 1;
                }

                my $i_opening = $opening_structure_index_stack[$dd];
                set_list_breakpoints( $i_opening, $i, $item_count,
                  $identifier_count_stack[$dd], $comma_index[$dd],
                  $interrupted_list[$dd],       $max_length[$dd] )
                  unless $dont_align[$dd];
                $item_count_stack[$dd]       = 0;
                $identifier_count_stack[$dd] = 0;
            }

            #}
            $item_count = 0;
            next;
        }

        # skip past these commas if we are not supposed to format them
        next if ( $dont_align[$depth] );

        # break after all commas above starting depth
        if ( $depth < $starting_depth ) {
            set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
            next;
        }

        # add this comma to the list..
        if ( $item_count == 0 ) {

            # but do not form a list with no opening structure
            # for example:
            # print $a, $b, $c;
            if ( ( $opening_structure_index_stack[$depth] < 0 )
              && ( $interrupted_list[$depth] == 0 ) )
            {
                $dont_align[$depth] = 1;
                next;
            }

            $max_length[$depth][0] = 0;
            $max_length[$depth][1] = 0;
        }

        # save max length of list items to calculate page layout
        my $i_prev =
          ( $item_count > 0 ) ? $comma_index[$depth][ $item_count - 1 ] :
          $opening_structure_index_stack[$depth];
        my $length = $lengths_to_go[$i];
        if ( defined($i_prev) && $i_prev >= 0 ) {
            $length -= $lengths_to_go[$i_prev];
        }

        if ( $length > $max_length[$depth][ $item_count % 2 ] ) {
            $max_length[$depth][ $item_count % 2 ] = $length;
        }

        $comma_index[$depth][ $item_count++ ] = $i;
        if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
            $identifier_count_stack[$depth]++;
        }
    }

    # end of loop over all tokens in this batch

    # Mark any pending list as having been interrupted
    $interrupted_list[$current_depth] = 1;

    my $i_opening = $opening_structure_index_stack[$current_depth];
    set_list_breakpoints(
      $i_opening,  $i,
      $item_count, $identifier_count_stack[$current_depth],
      $comma_index[$current_depth], $interrupted_list[$current_depth],
      $max_length[$current_depth] )
      unless $dont_align[$current_depth];

    # make any needed breaks at opening braces
    for ( my $dd = $depth ; $dd >= $starting_depth ; $dd-- ) {
        my $i_opening = $opening_structure_index_stack[$dd];
        if ( defined($i_opening) && $dont_align[$dd] == 0 ) {

#print "SCANLIST: dd=$dd iopen=$i_opening tok=$tokens_to_go[$i_opening] items=$item_count_stack[$dd] want=$want_comma_break[$dd] bpstk=$breakpoint_stack[$dd]
            #count=$forced_breakpoint_count\n"; 
            set_forced_breakpoint($i_opening)

              # don't break after opening paren if it doesn't look like a list
              # and try not to leave an isolated '('
              #unless ( ( $tokens_to_go[$i_opening] eq '(' ) && (!$saw_comma ));
              unless ( ( $tokens_to_go[$i_opening] eq '(' )
              && ( !$saw_comma || $i_opening == 0 ) );
        }
    }
}

sub set_list_breakpoints {

    my ( $i_opening_paren, $i_closing_paren, $item_count, $identifier_count,
      $rcomma_index, $interrupted, $rmax_length ) = @_;

    return if ( $item_count < 1 );

    my $i_first_comma = $$rcomma_index[0];
    my $i_last_comma  = $$rcomma_index[ $item_count - 1 ];

    #my ( $a, $b, $c ) = caller();
    #print "LIST: in set_list $a $c interupt=$interrupted count=$item_count
    #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";

    # handle list which is forced to use old breakpoints because it
    # was interrupted by side comments or blank lines.
    if ( $interrupted || !defined($i_opening_paren) ) {
        write_logfile_entry("list broken: using old breakpoints\n")
          unless ( $item_count < 6 );
        copy_old_breakpoints( $i_first_comma, $i_last_comma );
        return;
    }

    return if ( $i_first_comma < 1 );
    if ( $i_last_comma >= $max_index_to_go ) {
        $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
        return if ( $item_count <= 2 );
    }

    # now we have to make a distinction between the comma count and item count,
    # because the item count will be one greater than the comma count if
    # the last item is not terminated with a comma
    my $comma_count = $item_count;
    my $i_b         =
      ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) ? $i_last_comma + 1 :
      $i_last_comma;
    my $i_e =
      ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) ? $i_closing_paren - 2 :
      $i_closing_paren - 1;
    my $i_effective_last_comma = $i_last_comma;

    my $last_item_length = $lengths_to_go[$i_e] - $lengths_to_go[$i_b];
    if ( $last_item_length > 0 ) {

        # add 2 to length because other lengths include a comma and a blank
        $last_item_length += 2;
        my $i_odd = $item_count % 2;
        if ( $last_item_length > $$rmax_length[$i_odd] ) {
            $$rmax_length[$i_odd] = $last_item_length;
        }
        $item_count++;
        $i_effective_last_comma = $i_e + 1;
    }

    # leading whitespace before line starts
    my $leader_length =
      $levels_to_go[$i_first_comma] * $rOpts->{'indent-columns'} +
      $rOpts->{'continuation-indentation'};

    # Number of free columns across the page width for laying out tables
    my $columns = $rOpts->{'maximum-line-length'} - $leader_length;

    # Patch: the vertical formatter does not line up lines whose lengths
    # exactly equal the available line length because of allowances
    # that must be made for side comments.  Therefore, the number of
    # available columns is reduced by 1 character.
    $columns -= 1;

    # the default is to use an even number of fields because
    # it's impossible to know which lists are really hash data
    my $pair_width       = ( $$rmax_length[0] + $$rmax_length[1] );
    my $max_pairs        = int( $columns / $pair_width );
    my $number_of_fields = $max_pairs * 2;

    # try for one column if two won't work
    my $max_width =
      ( $$rmax_length[0] > $$rmax_length[1] ) ? $$rmax_length[0] :
      $$rmax_length[1];
    if ( $number_of_fields <= 0 ) {
        $number_of_fields = int( $columns / $max_width );
    }

#print "LIST: max_pairs=$max_pairs  w0=${$rmax_length}[0]  w1=${$rmax_length}[1] width=$pair_width\n";

    # The user can place an upper bound on the number of fields,
    # which can be useful for doing maintenance on tables
    if ( $number_of_fields > $rOpts->{'maximum-fields-per-table'} ) {
        $number_of_fields = int $rOpts->{'maximum-fields-per-table'};
    }

    return if ( $number_of_fields <= 0 );

    my $formatted_lines = $item_count / ($number_of_fields);
    if ( $formatted_lines != int $formatted_lines ) {
        $formatted_lines = 1 + int $formatted_lines;
    }
    my $columns_per_line =
      ( int $number_of_fields / 2 ) * $pair_width + ( $number_of_fields % 2 ) *
      $$rmax_length[1];

    my $formatted_columns;

    if ( $number_of_fields > 1 ) {
        $formatted_columns =
          ( $pair_width * ( int( $item_count / 2 ) ) + ( $item_count % 2 ) *
          $$rmax_length[0] );
    }
    else {
        $formatted_columns = $max_width * $item_count;
    }

    my $packed_columns =
      ( $lengths_to_go[ $i_effective_last_comma + 1 ] -
      $lengths_to_go[$i_opening_paren] );

    my $packed_lines   = 1 + int( $packed_columns / $columns );
    my $unused_columns = $formatted_columns - $packed_columns;

    # Give up and use old breakpoints if this is a tiny table or it would
    # be too spaced.  It seems that the more packed lines we have, the
    # sparser the list that can be allowed and still look ok.
    my $max_allowed_sparsity = ( $item_count < 4 ) ? 0.1 :

      ( $packed_lines == 1 ) ? 0.15 : ( $packed_lines == 2 ) ? 0.25 : 0.7;

    # Require dense lists if mostly identifiers or few items
    if ( ( $identifier_count > 0.4 * $item_count ) || $item_count < 6 ) {
        $max_allowed_sparsity /= 2;
    }

    if ( $FORMATTER_DEBUG_FLAG{SPARSE} ) {
        my $sparsity = ($unused_columns) / ($formatted_columns);
        print
"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count max_pairs=$max_pairs  pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";

    }

    if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
      || ( $formatted_lines < 2 )
      || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) )
    {

        if ( $packed_lines > 2 ) {
            write_logfile_entry("List sparse: using old breakpoints\n");
            copy_old_breakpoints( $i_first_comma, $i_last_comma );
        }
        return;
    }

    # looks ok, so go ahead and format the table
    write_logfile_entry(
      "List: auto formatting with $number_of_fields fields/row\n");
    my $j;
    for ( $j = $number_of_fields - 1 ; $j < $comma_count ;
      $j += $number_of_fields )
    {
        my $i = $$rcomma_index[$j];
        set_forced_breakpoint($i);
    }

    # Save list diagnostics during development
    if ( $FORMATTER_DEBUG_FLAG{LIST} ) {
        my $pkl = sprintf( "%.1f", $packed_lines );
        my $fml = sprintf( "%.1f", $formatted_lines );
        write_diagnostics(<<"EOM");
List:items=$item_count commas=$comma_count ids=$identifier_count cols=$columns pairs=$max_pairs fmt_lines=$fml pkd_lines=$pkl brks=$forced_breakpoint_count
  fmt_cols=$formatted_columns pk_cols=$packed_columns unusd=$unused_columns 
EOM
    }

    return;
}

sub copy_old_breakpoints {
    my ( $i_first_comma, $i_last_comma ) = @_;
    for my $i ( $i_first_comma .. $i_last_comma ) {
        if ( $old_breakpoint_to_go[$i] ) {
            set_forced_breakpoint($i);
        }
    }
}

sub set_nobreaks {
    my ( $i, $j ) = @_;
    if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {

        if ( $FORMATTER_DEBUG_FLAG{NOBREAK} ) {
            my ( $a, $b, $c ) = caller();
            print(
"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
            );
        }

        my $k;
        for ( $k = $i ; $k <= $j ; $k++ ) {
            $nobreak_to_go[$k] = 1;
        }
    }

    # shouldn't happen; non-critical error
    else {
        if ( $FORMATTER_DEBUG_FLAG{NOBREAK} ) {
            my ( $a, $b, $c ) = caller();
            print(
              "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
            );
        }
    }
}

sub set_forced_breakpoint {
    my ($i) = @_;
    if ( $i >= 0 && $i <= $max_index_to_go ) {
        my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;

        if ( $FORMATTER_DEBUG_FLAG{FORCE} ) {
            my ( $a, $b, $c ) = caller();
            print
"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i] nobr=$nobreak_to_go[$i_nonblank]\n";
        }

        # Note: I thought it would be best not to set these if the nobreak
        # flag is set (since it has priority), but things really looked
        # better without doing this check.  The reason is that the really
        # complex lines which would trigger this should really be split
        # up.  ( break.t )
        # if ( $i_nonblank >= 0  && !$nobreak_to_go[$i_nonblank]) {
        # So just do this:
        if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
            $forced_breakpoint_to_go[$i_nonblank] = 1;

            if ( $i_nonblank > $index_max_forced_break ) {
                $index_max_forced_break = $i_nonblank;
            }
            $forced_breakpoint_count++;
            $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
              $i_nonblank;
        }
    }
}

sub clear_breakpoint_undo_stack {
    $forced_breakpoint_undo_count = 0;
}

sub undo_forced_breakpoint_stack {

    my ($i_start) = @_;
    if ( $i_start < 0 ) {
        $i_start = 0;
        my ( $a, $b, $c ) = caller();
        warning(
          "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
        );
    }

    while ( $forced_breakpoint_undo_count > $i_start ) {
        my $i =
          $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
        if ( $i >= 0 && $i <= $max_index_to_go ) {
            $forced_breakpoint_to_go[$i] = 0;
            $forced_breakpoint_count--;

            if ( $FORMATTER_DEBUG_FLAG{UNDOBP} ) {
                my ( $a, $b, $c ) = caller();
                print(
"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
                );
            }

        }

        # shouldn't happen, but not a critical error
        else {
            if ( $FORMATTER_DEBUG_FLAG{UNDOBP} ) {
                my ( $a, $b, $c ) = caller();
                print(
"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
                );
            }
        }
    }
}

sub recombine_breakpoints {

    # Occasionally small line fragments are produced which would look
    # better if they were combined.
    my ( $ri_first, $ri_last ) = @_;
    my $more_to_do = 1;

    my $nmax_last = @$ri_last;
    while ($more_to_do) {
        my $n_best = 0;
        my $bs_best;
        my $n;
        my $nmax = @$ri_last - 1;

        if ( $nmax >= $nmax_last ) {

            # shouldn't happen because splice below decreases nmax on each pass:
            # but i get paranoid sometimes
            die "Program bug-infinite loop in recombine breakpoints\n";
        }
        $nmax_last  = $nmax;
        $more_to_do = 0;

        for $n ( 1 .. $nmax ) {
            my $if    = $$ri_first[ $n - 1 ];
            my $il    = $$ri_last[$n];
            my $imid  = $$ri_last[ $n - 1 ];
            my $imidr = $$ri_first[$n];

            #----------------------------------------------------------
            # Start of special recombination rules 
            # These are ad-hoc rules which have been found to work ok.
            # Skip to next pair to avoid re-combination.  Inexes are:
            #  ---left---- | ---right---
            #  $if   $imid | $imidr   $il
            #----------------------------------------------------------

            # a terminal '{' should stay where it is
            next if ( ( $n == $nmax ) && ( $types_to_go[$imidr] eq '{' ) );

            # an isolated '}' may join with a ';' terminated segment
            if ( $types_to_go[$imid] eq '}' ) {
                next
                  unless ( ( $if == $imid ) && ( $types_to_go[$il] eq ';' ) );
            }

            # an isolated '},' may join with an identifier + ';' 
            # this is useful for the class of a 'bless' statement (bless.t)
            if ( $types_to_go[$imid] eq ',' && $types_to_go[$if] eq '}'
              && $types_to_go[$imidr] eq 'i' )
            {
                next
                  unless ( ( $if == ( $imid - 1 ) ) && ( $il == ( $imidr + 1 ) )
                  && ( $types_to_go[$il] eq ';' ) );

                # override breakpoint
                $forced_breakpoint_to_go[$imid] = 0;
            }

            # isolated paren looks bad
            if ( $types_to_go[$imid] eq '(' ) {

                next unless ( $imid == $if );

                # but do not combine with a comma list
                my $commas = grep { $_ eq ',' } @types_to_go[ $imidr .. $il ];

                next unless ( $commas eq 0 );

                # override breakpoints if no commas
                $forced_breakpoint_to_go[$imid] = 0;
            }

            if ( $types_to_go[$imid] eq ')' ) {

                next unless ( $imid == $if );

                # override breakpoints if no commas
                $forced_breakpoint_to_go[$imid] = 0;
            }

            # keep a terminal colon
            next if ( $types_to_go[$imid] eq ':' );

            # keywords look best at start of lines,
            # but combine things like "1 while"  and "or die"
            next
              if ( ( $types_to_go[$imid] ne 'k' )
              && ( $types_to_go[$imidr] eq 'k' )
              && ( $tokens_to_go[$imidr] ne 'while' ) );

            # make major control keywords stand out, like this one here:
            # (recombine.t)
            next
              if ( ( $types_to_go[$imid] eq 'k' )
              && ( $tokens_to_go[$imid] =~ /^(last|next|redo|return)$/ ) );

            # honor hard breakpoints
            next if ( $forced_breakpoint_to_go[$imid] > 0 );

            # retain '=' at end of line unless 
            if ( $types_to_go[$imid] eq '=' ) {
                my $is_math = ( ( $types_to_go[$il] =~ /^[+-\/\*\)]$/ )

                  # note no '$' in pattern because -> can start long identifier
                  && !grep { $_ =~ /^(->|=>|[\,])/ }
                  @types_to_go[ $imidr .. $il ] );
                next unless (

                  # '=' is followed by a number and looks like math
                  ( $types_to_go[$imidr] eq 'n' && $is_math )

                  # or followed by a scalar and looks like math
                  || ( ( $types_to_go[$imidr] eq 'i' )
                  && ( $tokens_to_go[$imidr] =~ /^\$/ ) && $is_math )

                  # or followed by a single "short" token
                  || ( $il == $imidr
                  && $lengths_to_go[$imidr] - $lengths_to_go[ $imidr - 1 ] <
                  12 )

                  #
                );
            }

            #----------------------------------------------------------
            # end of special recombination rules 
            #----------------------------------------------------------

            my $level = $levels_to_go[$if];
            my $leading_spaces =
              $level * $rOpts->{'indent-columns'} +
              $rOpts->{'continuation-indentation'};
            my $lenf   = ( $if == 0 ) ? 0 : $lengths_to_go[ $if - 1 ];
            my $lenl   = $lengths_to_go[$il];
            my $length = $lenl - $lenf;
            my $bs     = $bond_strength_to_go[$imid];

            # combined line cannot be too long
            next
              if ( ( $length + $leading_spaces ) >
              $rOpts->{'maximum-line-length'} );

            # honor no-break's
            next if ( $bs == NO_BREAK );

            if ( !$n_best ) {
                $n_best  = $n;
                $bs_best = $bs;
            }
            else {
                $more_to_do++;

                if ( $bs > $bs_best ) {
                    $n_best  = $n;
                    $bs_best = $bs;
                }
            }
        }

        if ($n_best) {
            splice @$ri_first, $n_best, 1;
            splice @$ri_last, $n_best - 1, 1;
        }
    }
    return ( $ri_first, $ri_last );
}

sub set_continuation_breaks {

    # Define an array of indexes for inserting newline characters to
    # keep the line lengths below the maximum desired length.  There is
    # an implied break after the last token, so it need not be included.
    # We'll break at points where the bond strength is lowest.
    my @i_first = ();    # the first index to output
    my @i_last  = ();    # the last index to output

    my $level = $levels_to_go[0];
    my $in_continuation = ( $nesting_depth_to_go[0] - $level ) > 0;
    my $leading_spaces  = $level * $rOpts->{'indent-columns'};
    if ($in_continuation) {
        $leading_spaces += $rOpts->{'continuation-indentation'};
    }

    set_bond_strengths();

    my $imin = 0;
    my $imax = $max_index_to_go;
    if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
    if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
    my $i_begin = $imin;

    my $line_count = 0;
    my $last_break_strength = NO_BREAK;

    while ( $i_begin <= $imax ) {
        my $lowest_strength = NO_BREAK;
        my $starting_sum    =
          ( $i_begin == 0 ) ? 0.0 : $lengths_to_go[ $i_begin - 1 ];

        my $i_lowest = -1;
        my $i_test   = -1;

        # loop to find next break point
        for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
            my $type            = $types_to_go[$i_test];
            my $token           = $tokens_to_go[$i_test];
            my $next_type       = $types_to_go[ $i_test + 1 ];
            my $next_token      = $tokens_to_go[ $i_test + 1 ];
            my $i_next_nonblank =
              ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
            my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
            my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
            my $strength   = $bond_strength_to_go[$i_test];
            my $must_break = 0;

            if (

              # Try to put a break where requested by scan_list
              $forced_breakpoint_to_go[$i_test]

              # break between ) { in a continued line so that the '{' can
              # be outdented
              || ( $line_count && ( $type eq ')' )
              && ( $next_nonblank_type eq '{' ) )

              # There is an implied forced break at a terminal opening brace
              || ( ( $type eq '{' ) && ( $i_test == $imax ) ) )
            {

                # Forced breakpoints must sometimes be overridden because of a
                # side comment causing a NO_BREAK.  It is easier to catch this
                # here than when they are set.
                if ( $strength < NO_BREAK ) {
                    $strength   = $lowest_strength / 2;
                    $must_break = 1;
                }
            }

            # quit if a break here would put a good terminal token on
            # the next line and we already have a possible break
            if ( !$must_break && ( $next_nonblank_type =~ /^[\;\,]$/ )
              && ( ( $leading_spaces + $lengths_to_go[$i_next_nonblank] -
              $starting_sum ) > $rOpts->{'maximum-line-length'} ) )
            {
                last if ( $i_lowest >= 0 );
            }

            # Avoid a break which would strand a single punctuation
            # token.  For example, we do not want to strand a leading
            # '.' which is followed by a long quoted string.
            if ( !$must_break && ( $i_test == $i_begin ) && ( $i_test < $imax )
              && ( $token eq $type )
              && ( ( $leading_spaces + $lengths_to_go[$i_test] - $starting_sum )
              <= $rOpts->{'maximum-line-length'} ) )
            {
                $i_test++;

                if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
                    $i_test++;
                }
                redo;
            }

            if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
            {
                $lowest_strength = $strength;
                $i_lowest        = $i_test;
                last if $must_break;
            }

            my $too_long =
              ( $i_test >= $imax ) ? 1 :
              ( ( $leading_spaces + $lengths_to_go[ $i_test + 1 ] -
              $starting_sum ) > $rOpts->{'maximum-line-length'} );

            if ( $FORMATTER_DEBUG_FLAG{BREAK} ) {
                print
"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+1] too_long=$too_long str=$strength\n";
            }

            # allow one extra terminal token after exceeding line length
            # if it would strand this token.
            if ( $rOpts->{'fuzzy-line-length'} && $too_long
              && ( $i_lowest == $i_test ) && ( length($token) > 1 )
              && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
            {
                $too_long = 0;
            }

            last if ( ( $i_test == $imax )    # we're done if no more tokens,
              || ( ( $i_lowest >= 0 )    # or no more space and we have a break
              && $too_long ) );
        }

        if ( $FORMATTER_DEBUG_FLAG{BREAK} ) {
            print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
        }
        $line_count++;

        # it's always ok to break at imax if no other break was found
        if ( $i_lowest < 0 ) { $i_lowest = $imax }

        # save this line segment, after trimming blanks at the ends
        push ( @i_first,
          ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
        push ( @i_last,
          ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );

        $i_begin = $i_lowest + 1;
        $last_break_strength = $lowest_strength;

        if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
            $i_begin++;
        }

        # update indentation size
        if ( $i_begin <= $imax ) {
            $leading_spaces =
              $levels_to_go[$i_begin] * $rOpts->{'indent-columns'} +
              $rOpts->{'continuation-indentation'};
        }
    }

    return \@i_first, \@i_last;
}

# check to see if output line tabbing agrees with input line
# this can be very useful for debugging a script which has an extra
# or missing brace
sub compare_indentation_levels {

    my ( $python_indentation_level, $structural_indentation_level ) = @_;
    if ( ( $python_indentation_level ne $structural_indentation_level )
      && ( Perltidy::Tokenizer::know_input_tabstr() ) )
    {
        $last_tabbing_disagreement = $input_line_number;

        if ($in_tabbing_disagreement) {
        }
        else {
            $tabbing_disagreement_count++;

            if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
                write_logfile_entry(
"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
                );
            }
            $in_tabbing_disagreement    = $input_line_number;
            $first_tabbing_disagreement = $in_tabbing_disagreement
              unless ($first_tabbing_disagreement);
        }
    }
    else {

        if ($in_tabbing_disagreement) {

            if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
                write_logfile_entry(
"End indentation disagreement from input line $in_tabbing_disagreement\n"
                );

                if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
                    write_logfile_entry(
                      "No further tabbing disagreements will be noted\n");
                }
            }
            $in_tabbing_disagreement = 0;
        }
    }
    $input_line_tabbing = undef;    # deactivate test for this input line
}

=pod

The Perltidy::VerticalAligner package collects output lines and attempts to line
up certain common tokens, such as => and #, which are identified
by the calling routine.

There are two main routines: append and flush.  Append acts as a storage
buffer, collecting lines into a group which can be vertically aligned.
When alignment is no longer possible or desirable, it dumps the group
to flush.

	append ---------> flush

	collects          writes
	vertical          one
	groups            group

=cut
package Perltidy::VerticalAligner;
use vars qw(
  $starting_jmax
  $maximum_jmax_seen
  $minimum_jmax_seen
  $maximum_line_index
  $maximum_field_index
  $group_level
  $group_list_type
  $zero_count
  @columns
  @starting_columns
  @matching_tokens
  @matching_patterns
  @levels
  @ci
  @fields
  $last_comment_column
  $lines_since_last_side_comment
  $outdented_line_count
  $first_outdented_line_at
  $last_outdented_line_at
  $diagnostics_object
  $logger_object
  $file_writer_object
  $rOpts
  %VALIGN_DEBUG_FLAG
);

sub initialize {

    ( $rOpts, $file_writer_object, $logger_object, $diagnostics_object ) = @_;

    # variables describing the entire space group:
    $maximum_line_index  = -1;    # lines in the current group
    $maximum_field_index = -1;    # fields in the current group

    $group_level = 0;

    $group_list_type     = "";
    $zero_count          = 0;     # count consecutive lines without tokens
    @columns             = ();    # list of columns of matching tokens
    @starting_columns    = ();    # list of columns of matching tokens
    @matching_tokens     = ();    # tokens being matched
    @matching_patterns   = ();    # patterns being matched
    $last_comment_column = 0;
    $lines_since_last_side_comment = 0;
    $starting_jmax     = 0;
    $maximum_jmax_seen = 0;
    $minimum_jmax_seen = 0;

    # variables describing each line of the group
    @levels = ();    # levels of each line
    @ci     = ();    # continuation indentation
    @fields = ();    # reference to list of columns

    $outdented_line_count    = 0;
    $first_outdented_line_at = 0;
    $last_outdented_line_at  = 0;
}

# interface to Perltidy::Diagnostics routines
sub write_diagnostics {
    if ($diagnostics_object) {
        $diagnostics_object->write_diagnostics(@_);
    }
}

# interface to Perltidy::Logger routines
sub warning {
    if ($logger_object) {
        $logger_object->warning(@_);
    }
}

sub write_logfile_entry {
    if ($logger_object) {
        $logger_object->write_logfile_entry(@_);
    }
}

sub report_definite_bug {
    if ($logger_object) {
        $logger_object->report_definite_bug();
    }
}

sub append {

=pod

sub append is called to place one line in the current vertical group.

The input parameters are:
	$level = indentation level of this line
	$ci	   = continuation indentation string
	$rfields = reference to array of fields
	$rpatterns	= reference to array of patterns, one per field
	$rtokens	= reference to array of tokens starting fields 1,2,..

Here is an example of what this package does.  In this example,
we are trying to line up both the '=>' and the '#'.  

        '18' => 'grave',    #   \`
        '19' => 'acute',    #   `'
        '20' => 'caron',    #   \v
<-tabs-><f1-><--field 2 ---><-f3->
|            |              |    |
|            |              |    |
col1        col2         col3 col4 

The calling routine has already broken the entire line into 3 fields as
indicated.  (So the work of identifying promising common tokens has
already been done).

In this example, there will be 2 tokens being matched: '=>' and '#'.
They are the leading parts of fields 2 and 3, but we do need to know
what they are so that we can dump a group of lines when these tokens
change.

The fields contain the actual characters of each field.  The patterns
are like the fields, but they contain mainly token types instead
of tokens, so they have fewer characters.  They are used to be
sure we are matching fields of similar type.

In this example, there will be 4 column indexes being adjusted.  The
first one is always at zero.  The interior columns are at the start of
the matching tokens, and the last one tracks the maximum line length.

Basically, each time a new line comes in, it joins the current vertical
group if possible.  Otherwise it causes the current group to be dumped
and a new group is started.

For each new group member, the column locations are increased, as
necessary, to make room for the new fields.  When the group is finally
output, these column numbers are used to compute the amount of spaces of
padding needed for each field.

Programming note: the fields are assumed not to have any tab characters.
Tabs have been previously removed except for tabs in quoted strings and
side comments.  Tabs in these fields can mess up the column counting.
The log file warns the user if there are any such tabs.

=cut
    my ( $level, $ci, $rfields, $rtokens, $rpatterns, $is_forced_break ) = @_;

    # number of fields is $jmax
    # number of tokens between fields is $jmax-1
    my $jmax = $#{$rfields};

    if ( $VALIGN_DEBUG_FLAG{APPEND0} ) {
        print
"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, ci=$ci last_cmt=$last_comment_column force=$is_forced_break\n";
    }

    # don't remember old side comment location for very long
    if ( $lines_since_last_side_comment > 12 ) {
        $last_comment_column = 0;
    }

    # do not align code across indentation level changes
    if ( $level != $group_level ) {
        flush();
        $last_comment_column = 0;
    }

    $group_level = $level;

    # --------------------------------------------------------------------
    # Step 1. Handle simple line of code with no fields to match.
    # --------------------------------------------------------------------
    if ( $jmax <= 0 ) {
        $zero_count++;

        if ( $maximum_line_index >= 0 ) {
            if ( $maximum_field_index > 1 ) { flush() }

            # flush current group unless we are just collecting side
            # comments, and we saw one recently
            elsif ( ( $zero_count > 3 )
              || ( ( $level * $rOpts->{'indent-columns'} + length($ci) +
              length( $$rfields[0] ) ) > $columns[1] ) )
            {
                flush();
            }
        }

        # write directly if no current group and no side comment
        if ( $maximum_line_index < 0 ) {
            write_leader_and_string( $ci, $$rfields[0], 0 );
            $lines_since_last_side_comment++;
            return;
        }
    }
    else {
        $zero_count = 0;
    }

    # programming check: (shouldn't happen)
    # an error here implies an incorrect call was made
    if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
        warning(
"Program bug in Perltidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
        );
        report_definite_bug();
    }

    # --------------------------------------------------------------------
    # Step 2. It simplifies things to create a zero length side comment
    # if none exists.
    # --------------------------------------------------------------------
    my $jmax_original_line = $jmax;
    if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
        $$rtokens[$jmax] = '#';
        $$rfields[ ++$jmax ] = '';
        $$rpatterns[$jmax] = '#';
        $lines_since_last_side_comment++;
    }
    else {
        $lines_since_last_side_comment = 0;
    }

    # --------------------------------------------------------------------
    # Step 2a. Decide if this is a simple list of items
    # Select from 3 list types: none, comma, comma-arrow
    # We use this below to be less restrictive in deciding what to align
    # --------------------------------------------------------------------
    my $list_type  = "";
    my $test_token = $$rtokens[0];

    # A list will be taken to be a line with a forced break in which all
    # of the field separators are commas or comma-arrows (except for the
    # trailing #)
    if ( $is_forced_break > 0 ) {

        # List separator tokens are things like ',3'   or '=>2',
        # where the trailing digit is the nesting depth.  Allow braces
        # to allow nested list items.
        if ( $test_token =~ /^(\,|=>)/ ) {
            $list_type = $test_token;
            foreach my $i ( 1 .. $jmax - 2 ) {

                if ( $$rtokens[$i] !~ /^(\,|=>|\{)/ ) {
                    $list_type = "";
                    last;
                }
            }
        }
    }

    # --------------------------------------------------------------------
    # Step 3. If there is just one previous line, and it has more fields
    # than the new line, try to join fields together to get a match with
    # the new line.  At the present time, only a single leading '=' is
    # allowed to be compressed out.  This is useful in rare cases where
    # a table is forced to use old breakpoints because of side comments,
    # and the table starts out something like this:
    #   my %MonthChars = ('0', 'Jan',	# side comment
    #                     '1', 'Feb',
    #                     '2', 'Mar',
    # Eliminating the '=' field will allow the remaining fields to line up.
    # This situation does not occur if there are no side comments
    # because scan_list would put a break after the opening '('.
    # --------------------------------------------------------------------
    my $hid_equals = 0;

    my $previous_minimum_jmax_seen = $minimum_jmax_seen;
    my $previous_maximum_jmax_seen = $maximum_jmax_seen;
    if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
    if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }

    if ( ( $maximum_field_index > $jmax )         # this line has fewer fields
      && ( $maximum_field_index - 2 <= $jmax )    # be reasonable, not too few
      && ( $maximum_line_index == 0 )             # there is one previous line
      && ( length( $fields[0][$maximum_field_index] ) > 0 )  # with side comment
      )
    {
        my @new_columns = ();
        my @new_starting_columns = ();
        my @new_fields = ();
        my @new_matching_patterns = ();
        my @new_matching_tokens   = ();
        $new_columns[0] = $columns[0];
        $new_starting_columns[0] = $starting_columns[0];
        my $j = 0;
        my $k;
        my $current_field   = '';
        my $current_pattern = '';

        # loop over all old tokens
        my $in_match = 0;
        for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
            $current_field .= $fields[0][$k];
            $current_pattern .= $matching_patterns[$k];
            last if ( $j > $jmax - 1 );

            if ( $matching_tokens[$k] eq $$rtokens[$j] ) {
                $in_match = 1;
                $new_matching_tokens[$j] = $matching_tokens[$k];
                $new_fields[$j] = $current_field;
                $new_matching_patterns[$j] = $current_pattern;
                $current_field   = '';
                $current_pattern = '';
                $new_columns[ $j + 1 ] = $columns[ $k + 1 ];
                $new_starting_columns[ $j + 1 ] = $starting_columns[ $k + 1 ];
                $j++;
            }
            else {

                if ( $matching_tokens[$k] =~ /^\=\d*$/ ) {
                    $hid_equals = 1;
                }
                last if $in_match;    # disallow gaps in matching field types
            }
        }

        # Modify the current state if we are successful.
        # We must exactly reach the ends of both lists for success.
        if ( ( $j == $jmax ) && ( $current_field eq '' ) && $hid_equals ) {
            $k = $maximum_field_index;
            $current_field .= $fields[0][$k];
            $current_pattern .= $matching_patterns[$k];
            $new_fields[$j] = $current_field;
            $new_matching_patterns[$j] = $current_pattern;
            $new_columns[ $j + 1 ] = $columns[ $k + 1 ];
            $new_starting_columns[ $j + 1 ] = $starting_columns[ $k + 1 ];
            $maximum_field_index = $j;
            $fields[$maximum_line_index] = \@new_fields;
            @columns = @new_columns;
            @starting_columns  = @new_starting_columns;
            @matching_tokens   = @new_matching_tokens;
            @matching_patterns = @$rpatterns;
        }
    }

    # --------------------------------------------------------------------
    # Step 4. If the new line has more fields than the current group,
    # see if we can match the first fields and combine the remaining
    # fields of the new line.  
    # --------------------------------------------------------------------
    if ( ( $maximum_line_index >= 0 ) && ( $maximum_field_index < $jmax )
      && ( $starting_jmax == $minimum_jmax_seen )    # only if monotonic
      && ( $previous_maximum_jmax_seen <= $jmax )    # only if monotonic
      && ( $maximum_field_index > 1 )
      && ( $list_type !~ /^,/ ) )    # never combine fields of a comma list
    {

        # loop over all old tokens except comment
        my $match = 1;
        my $k;
        for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
            if ( ( $matching_tokens[$k] ne $$rtokens[$k] )
              || ( $matching_patterns[$k] ne $$rpatterns[$k] ) )
            {
                $match = 0;
                last;
            }
        }

        # first tokens agree, so combine new tokens
        if ($match) {
            for $k ( $maximum_field_index .. $jmax - 1 ) {

                $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
                $$rfields[$k] = "";
                $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
                $$rpatterns[$k] = "";
            }

            $$rtokens[ $maximum_field_index - 1 ] = '#';
            $$rfields[$maximum_field_index]   = $$rfields[$jmax];
            $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
            $jmax = $maximum_field_index;

        }
    }

    # --------------------------------------------------------------------
    # Step 5. Flush previous group unless all common tokens and patterns match..
    # --------------------------------------------------------------------

    # flush if this line has too many fields
    if ( $jmax > $maximum_field_index ) { flush(); }

    # flush if adding this line would make a non-monotonic field count
    elsif ( ( $maximum_field_index > $jmax )         # this has too few fields
      && ( ( $previous_minimum_jmax_seen < $jmax )   # and wouldn't be monotonic
      || ( $starting_jmax != $maximum_jmax_seen ) ) )
    {
        flush();
    }

    # otherwise append this line if everything matches
    else {

        my $jlimit = $jmax - 1;
        if ( $maximum_field_index > $jmax ) {
            $jlimit = $jmax_original_line;
        }

        my $everything_matches = 1;

        # common list types always match
        unless ( $group_list_type && ( $list_type eq $group_list_type ) ) {

            for my $j ( 0 .. $jlimit ) {
                my $match = 1;
                if ( ( $j < $jlimit )
                  && ( ( $matching_tokens[$j] ne $$rtokens[$j] )
                  || ( $matching_patterns[$j] ne $$rpatterns[$j] ) ) )
                {
                    $match = 0;
                }

                # Don't let line with fewer fields increase column widths
                # ( align3.t )
                if ( $maximum_field_index > $jmax ) {
                    my $pad =
                      length( $$rfields[$j] ) -
                      ( $columns[ $j + 1 ] - $columns[$j] );

                    if ( $j == 0 ) {
                        $pad +=
                          $level * $rOpts->{'indent-columns'} + length($ci);
                    }
                    if ( $pad > 0 ) { $match = 0 }
                }

                unless ($match) {
                    $everything_matches = 0;
                    last;
                }
            }
        }

        if ( $maximum_field_index > $jmax ) {

            if ($everything_matches) {

                my $comment = $$rfields[$jmax];
                for $jmax ( $jlimit .. $maximum_field_index ) {
                    $$rtokens[$jmax] = $matching_tokens[$jmax];
                    $$rfields[ ++$jmax ] = '';
                    $$rpatterns[$jmax] = $matching_patterns[$jmax];
                }
                $$rfields[$jmax] = $comment;
            }
        }

        flush() unless ($everything_matches);
    }

    # --------------------------------------------------------------------
    # Step 6. See if there is space for this line in the current group
    # --------------------------------------------------------------------
    if ( $maximum_line_index >= 0 ) {
        my $padding_so_far    = 0;
        my $padding_available =
          $rOpts->{'maximum-line-length'} -
          $columns[ $maximum_field_index + 1 ];

        # save current columns in case this doesn't work
        my @old_columns = @columns;

        for my $j ( 0 .. $jmax ) {
            my $pad =
              length( $$rfields[$j] ) - ( $columns[ $j + 1 ] - $columns[$j] );

            if ( $j == 0 ) {
                $pad += $level * $rOpts->{'indent-columns'} + length($ci);
            }

            # sudden increase or decrease in whitespace space looks bad 
            # (unless this is a list or side comment).  The number '8' 
            # is empirical, and could be yet another parameter...
            # ( eight.t )
            my $big_jump = 0;
            my $eight    = $group_list_type ? 12 : 8;
            if ( $j < $jmax - 1 ) {    #&& !$group_list_type ) {

                # a sudden increase of over 8 spaces is too much
                if ( $pad > $eight ) { $big_jump = 1 }

                # a sudden decrease of over 8 spaces to a new low is too much
                elsif ( $pad < 0 ) {
                    my $test_pad = $pad;
                    my $test_pad =
                      length( $$rfields[$j] ) -
                      ( $starting_columns[ $j + 1 ] - $starting_columns[$j] );

                    if ( $j == 0 ) {
                        $test_pad +=
                          $level * $rOpts->{'indent-columns'} + length($ci);
                    }
                    if ( $test_pad < -$eight ) { $big_jump = 1 }
                }
            }

            next if !$big_jump && $pad < 0;

            # This line will need space; lets see if we want to accept it..
            if (

              # not if padding increases too much
              $big_jump

              # not if this won't fit
              || ( $pad > $padding_available )

              # or, with the exception of space to side comments, ..
              || ( $j < $jmax - 1

              # causes too many consecutive columns of whitespace 
              && ( ( $pad + $columns[ $j + 1 ] - $starting_columns[ $j + 1 ] >
              $rOpts->{'maximum-whitespace-columns'} )

              ) ) )
            {

                # revert to starting state then flush; things didn't work out
                @columns = @old_columns;
                flush();
                last;
            }

            # looks ok, squeeze this field in
            $padding_available -= $pad;
            my $k;

            for $k ( $j .. $maximum_field_index ) {
                $columns[ $k + 1 ] += $pad;
            }
        }
    }

    # --------------------------------------------------------------------
    # Step 7. Append this line to the current group (or start new group)
    # --------------------------------------------------------------------
    $maximum_line_index++;
    $levels[$maximum_line_index] = $level;
    $ci[$maximum_line_index]     = $ci;
    $fields[$maximum_line_index] = $rfields;

    # initialize field lengths if starting new group
    if ( $maximum_line_index == 0 ) {
        $group_list_type     = $list_type;
        $maximum_field_index = $jmax;
        $columns[0] = $level * $rOpts->{'indent-columns'} + length($ci);
        my $j;

        for $j ( 0 .. $maximum_field_index ) {
            $columns[ $j + 1 ] = $columns[$j] + length( $$rfields[$j] );
        }
        $columns[0] = 0;
        @starting_columns  = @columns;
        $starting_jmax     = $jmax;
        $maximum_jmax_seen = $jmax;
        $minimum_jmax_seen = $jmax;

        for $j ( 0 .. $jmax - 1 ) {
            $matching_tokens[$j] = $$rtokens[$j];
        }
        for $j ( 0 .. $jmax ) {
            $matching_patterns[$j] = $$rpatterns[$j];
        }
    }

    # --------------------------------------------------------------------
    # Step 8. Some old debugging stuff
    # --------------------------------------------------------------------
    if ( $VALIGN_DEBUG_FLAG{APPEND} ) {
        print "APPEND fields:";
        dump_array(@$rfields);
        print "APPEND tokens:";
        dump_array(@$rtokens);
        print "APPEND patterns:";
        dump_array(@$rpatterns);
        print "APPEND common columns:";
        dump_array(@columns);
    }
}

sub dump_array {

    # debug routine to dump array contents
    local $" = ')(';
    print "(@_)\n";
}

=pod====================================================================

flush() sends the current Perltidy::VerticalAligner group down the pipeline to Perltidy::FileWriter.

=cut====================================================================
sub flush {

    if ( $VALIGN_DEBUG_FLAG{APPEND0} ) {
        my ( $a, $b, $c ) = caller();
        print
"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type\n";
    }

    # Do not try to align two lines which are not really similar
    my $do_not_align =
      ( $maximum_line_index < 2 && !$group_list_type
      && ( $maximum_jmax_seen != $minimum_jmax_seen ) );

    # But try to convert them into a simple comment group if the first line 
    # a has side comment
    if ( $do_not_align && ( $maximum_line_index > 0 )
      && ( length( $fields[0][$maximum_field_index] ) > 0 ) )
    {
        combine_fields();
        $do_not_align = 0;
    }

    # let's see if we can move the side comment field out a little
    # to improve readability (the last field is always a side comment field)
    my $kmax = $maximum_field_index + 1;

    if ( $kmax > 0 && !$do_not_align ) {

        # the maximum space without exceeding the line length:
        my $avail = $rOpts->{'maximum-line-length'} - $columns[$kmax];

        # try to use the previous comment column
        my $move = $last_comment_column - $columns[ $kmax - 1 ];

        # but if this doesn't work, give up and use the minimum space
        if ( $move > $avail ) {
            $move = $rOpts->{'minimum-space-to-comment'} - 1;
        }

        # but we want some minimum space to the comment
        if ( $move < $rOpts->{'minimum-space-to-comment'} - 1 ) {
            $move = $rOpts->{'minimum-space-to-comment'} - 1;
        }

        # if this causes too much space, give up and use the minimum space
        if ( $move > $rOpts->{'maximum-space-to-comment'} - 1 ) {
            $move = $rOpts->{'minimum-space-to-comment'} - 1;
        }

        # don't exceed the available space
        if ( $move > $avail ) { $move = $avail }

        # we can only increase space, never decrease
        if ( $move > 0 ) {
            $columns[ $kmax - 1 ] += $move;
            $columns[$kmax] += $move;
        }

        # remember this column for the next group
        $last_comment_column = $columns[ $kmax - 1 ];
    }

    # See if we can increase the continuation indentation
    # to move all continuation lines closer to the next field
    # (unless it is a comment).
    my $ci_count       = 0;
    my $non_ci_count   = 0;
    my $min_ci_gap     = $rOpts->{'maximum-line-length'};
    my $min_non_ci_gap = $rOpts->{'maximum-line-length'};
    my $ci_length      = 0;
    my $is_comma_list  = 1;

    if ( $maximum_field_index > 1 && !$do_not_align ) {

        for my $i ( 0 .. $maximum_line_index ) {
            my $gap =
              $columns[1] - $levels[$i] * $rOpts->{'indent-columns'} -
              length( $ci[$i] ) - length( $fields[$i][0] );

            # see if all but last field end in a ',' 
            if ( $is_comma_list && ( $i < $maximum_line_index )
              && ( $fields[$i][ $maximum_field_index - 1 ] !~ /\,\s*$/ ) )
            {
                $is_comma_list = 0;
            }

            if ( length( $ci[$i] ) > 0 ) {

                if ( !$ci_count ) {
                    $ci_length = length( $ci[$i] );
                }
                else {
                    if ( $ci_length != length( $ci[$i] ) ) { $ci_length = 0 }
                }
                $ci_count++;

                if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
            }
            else {
                $non_ci_count++;
                if ( $gap < $min_non_ci_gap ) { $min_non_ci_gap = $gap }
            }
        }

        if ( $min_ci_gap >= $rOpts->{'maximum-line-length'} ) {
            $min_ci_gap = 0;
        }
    }
    else {
        $min_ci_gap     = 0;
        $min_non_ci_gap = 0;
    }

    # Undo continuation indentation in special cases. We want to avoid
    # this kind of situation, where the continuation indentation is not
    # helpful (undoci.t).
    #
    #   $root{$vol} = {
    #   	path       => $vol . $SEPARATOR,
    #   	  prefix   => $vol . $SEPARATOR,
    #   	  srcpath  => $vol . $SEPARATOR,
    #   	  'exists' => 1
    #   };
    #

    if ( $is_comma_list && !$min_ci_gap && $ci_length
      && ( $min_non_ci_gap >= $ci_length ) && ( $non_ci_count == 1 ) )
    {
        for my $i ( 0 .. $maximum_line_index ) {
            $ci[$i] = '';
        }

        for my $j ( 1 .. $maximum_field_index + 1 ) {
            $columns[$j] -= $ci_length;
        }
    }

    # loop over all lines
    my $group_leader_length = $group_level * $rOpts->{'indent-columns'};
    for my $i ( 0 .. $maximum_line_index ) {

        # calculate correction needed because tab char, if used, is one
        # space but will expand to 'indent-columns' when the line is
        # displayed:
        my $spaces = $ci[$i];

        if ( length( $ci[$i] ) > 0 ) { $spaces .= ' ' x $min_ci_gap; }
        my $str = $fields[$i][0];

        # loop to concatenate all fields of this line and needed padding
        for my $j ( 1 .. $maximum_field_index ) {

            # skip zero-length side comments
            last

              if ( ( $j == $maximum_field_index )
              && ( !defined( $fields[$i][$j] )
              || ( length( $fields[$i][$j] ) == 0 ) ) );

            # pad with spaces to start of this field
            my $pad = $columns[$j] -
              ( length($str) + $group_leader_length + length($spaces) );

            if ($do_not_align) {
                $pad =
                  ( $j < $maximum_field_index ) ? 0 :
                  $rOpts->{'minimum-space-to-comment'} - 1;
            }
            if ( $pad > 0 ) { $str .= ' ' x $pad; }

            # add this field
            if ( !defined $fields[$i][$j] ) {
                write_diagnostics("UNDEFined field at i=$i j=$j\n");
            }
            $str .= $fields[$i][$j];
        }

        # trim right side because we may have had empty fields
        $str =~ s/\s*$//;
        my $side_comment_length =
          ( length( $fields[$i][$maximum_field_index] ) );

        # ship it off
        write_leader_and_string( $spaces, $str, $side_comment_length );
    }
    $maximum_line_index  = -1;
    $maximum_field_index = -1;
    $zero_count          = 0;
}

sub combine_fields {

    # combine all fields except for the comment field  ( sidecmt.t )
    my ( $j, $k );
    for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
        for ( $k = 1 ; $k < $maximum_field_index ; $k++ ) {
            $fields[$j][0] .= $fields[$j][$k];
        }
        $fields[$j][1] = $fields[$j][$maximum_field_index];
    }
    $maximum_field_index = 1;

    $columns[0] = 0;
    $columns[1] = 0;
    $columns[2] = 0;

    for $j ( 0 .. $maximum_line_index ) {
        for $k ( 0 .. $maximum_field_index ) {
            my $pad =
              length( $fields[$j][$k] ) - ( $columns[ $k + 1 ] - $columns[$k] );
            if ( $k == 0 ) {
                $pad +=
                  $group_level * $rOpts->{'indent-columns'} + length( $ci[$j] );
            }

            if ( $pad > 0 ) {
                for my $l ( $k .. $maximum_field_index ) {
                    $columns[ $l + 1 ] += $pad;
                }
            }
        }
    }
}

sub write_leader_and_string {
    my ( $spaces, $str, $side_comment_length ) = @_;

    # reduce continuation indentation if it will make the line fit
    # in the available page width.  Do not include side comment length
    # when considering the excess.
    my $level  = $group_level;
    my $excess =
      length($str) - $side_comment_length + $level *
      $rOpts->{'indent-columns'} + length($spaces) -
      $rOpts->{'maximum-line-length'};

    # handle long lines:
    if ( $excess > 0 ) {

        # Reduce continuation indentation if that solves the problem
        if ( length($spaces) >= $excess ) {
            $spaces = substr( $spaces, 0, length($spaces) - $excess );

            #write_diagnostics(
            #  "Continuation indentation reduced by $excess characters\n");
        }

        # Otherwise, outdent if permitted
        else {

            if ( $rOpts->{'outdent-long-lines'} ) {
                $level  = 0;
                $spaces = "";
                $last_outdented_line_at =
                  $file_writer_object->get_output_line_number();

                unless ($outdented_line_count) {
                    $first_outdented_line_at = $last_outdented_line_at;
                }
                $outdented_line_count++;
            }
        }
    }

    #$file_writer_object->write_code_line( $Perltidy::Formatter::tabstr x $level
    $file_writer_object->write_code_line( Perltidy::Formatter->get_tabstr() x
      $level . $spaces . $str . "\n" );
}

sub report_anything_unusual {
    if ( $outdented_line_count > 0 ) {
        write_logfile_entry("$outdented_line_count long lines were outdented:\n"
        );
        write_logfile_entry( "  First at output line $first_outdented_line_at\n"
        );

        if ( $outdented_line_count > 1 ) {
            write_logfile_entry(
              "   Last at output line $last_outdented_line_at\n");
        }
        write_logfile_entry("\n");
    }
}

#####################################################################
#
# the Perltidy::FileWriter class writes the output file
#
#####################################################################

package Perltidy::FileWriter;

# Maximum number of little messages; probably need not be changed.
use constant MAX_NAG_MESSAGES => 6;

sub write_logfile_entry {
    my $self = shift;
    my $logger_object = $self->{_logger_object};
    if ($logger_object) {
        $logger_object->write_logfile_entry(@_);
    }
}

sub new {
    my $class = shift;
    my ( $line_sink_object, $rOpts, $logger_object ) = @_;

    bless {
        _line_sink_object           => $line_sink_object,
        _logger_object              => $logger_object,
        _rOpts                      => $rOpts,
        _output_line_number         => 1,
        _consecutive_blank_lines    => 0,
        _consecutive_nonblank_lines => 0,
        _first_line_length_error    => 0,
        _max_line_length_error      => 0,
        _last_line_length_error     => 0,
        _first_line_length_error_at => 0,
        _max_line_length_error_at   => 0,
        _last_line_length_error_at  => 0,
        _line_length_error_count    => 0,
        _max_output_line_length     => 0,
        _max_output_line_length_at  => 0,
    }, $class;
}

sub tee_on {
    my $self = shift;
    $self->{_line_sink_object}->tee_on();
}

sub tee_off {
    my $self = shift;
    $self->{_line_sink_object}->tee_off();
}

sub get_output_line_number {
    my $self = shift;
    return $self->{_output_line_number};
}

sub decrement_output_line_number {
    my $self = shift;
    $self->{_output_line_number}--;
}

sub get_consecutive_nonblank_lines {
    my $self = shift;
    return $self->{_consecutive_nonblank_lines};
}

sub want_blank_line {
    my $self = shift;
    unless ( $self->{_consecutive_blank_lines} ) {
        $self->write_blank_code_line();
    }
}

sub write_blank_code_line {
    my $self  = shift;
    my $rOpts = $self->{_rOpts};
    return
      if ( $self->{_consecutive_blank_lines} >=
      $rOpts->{'maximum-consecutive-blank-lines'} );
    $self->{_consecutive_blank_lines}++;
    $self->{_consecutive_nonblank_lines} = 0;
    $self->write_line("\n");
}

sub write_code_line {
    my $self = shift;
    my ($a) = @_;

    if ( $a =~ /^\s*$/ ) {
        my $rOpts = $self->{_rOpts};
        return
          if ( $self->{_consecutive_blank_lines} >=
          $rOpts->{'maximum-consecutive-blank-lines'} );
        $self->{_consecutive_blank_lines}++;
        $self->{_consecutive_nonblank_lines} = 0;
    }
    else {
        $self->{_consecutive_blank_lines} = 0;
        $self->{_consecutive_nonblank_lines}++;
    }
    $self->write_line($a);
}

sub write_line {
    my $self = shift;
    my ($a) = @_;
    $self->{_line_sink_object}->write_line($a);
    if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }

    # This calculation of excess line length ignores any internal tabs
    my $rOpts = $self->{_rOpts};
    my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
    if ( $a =~ /^\t+/g ) {
        $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
    }

    # Note that we just incremented output line number to future value
    # so we must subtract 1 for current line number
    if ( length($a) > 1 + $self->{_max_output_line_length} ) {
        $self->{_max_output_line_length}    = length($a) - 1;
        $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
    }

    if ( $exceed > 0 ) {
        my $output_line_number = $self->{_output_line_number};
        $self->{_last_line_length_error}    = $exceed;
        $self->{_last_line_length_error_at} = $output_line_number - 1;
        if ( $self->{_line_length_error_count} == 0 ) {
            $self->{_first_line_length_error}    = $exceed;
            $self->{_first_line_length_error_at} = $output_line_number - 1;
        }

        if ( $self->{_last_line_length_error} >
          $self->{_max_line_length_error} )
        {
            $self->{_max_line_length_error}    = $exceed;
            $self->{_max_line_length_error_at} = $output_line_number - 1;
        }

        if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
            $self->write_logfile_entry(
              "Line length exceeded by $exceed characters\n");
        }
        $self->{_line_length_error_count}++;
    }

}

sub report_line_length_errors {
    my $self  = shift;
    my $rOpts = $self->{_rOpts};
    my $line_length_error_count = $self->{_line_length_error_count};
    if ( $line_length_error_count == 0 ) {
        $self->write_logfile_entry(
          "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
        my $max_output_line_length    = $self->{_max_output_line_length};
        my $max_output_line_length_at = $self->{_max_output_line_length_at};
        $self->write_logfile_entry(
"  Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
        );

    }
    else {

        my $word = ( $line_length_error_count > 1 ) ? "s" : "";
        $self->write_logfile_entry(
"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
        );

        $word = ( $line_length_error_count > 1 ) ? "First" : "";
        my $first_line_length_error    = $self->{_first_line_length_error};
        my $first_line_length_error_at = $self->{_first_line_length_error_at};
        $self->write_logfile_entry(
" $word at line $first_line_length_error_at by $first_line_length_error characters\n"
        );

        if ( $line_length_error_count > 1 ) {
            my $max_line_length_error     = $self->{_max_line_length_error};
            my $max_line_length_error_at  = $self->{_max_line_length_error_at};
            my $last_line_length_error    = $self->{_last_line_length_error};
            my $last_line_length_error_at = $self->{_last_line_length_error_at};
            $self->write_logfile_entry(
" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
            );
            $self->write_logfile_entry(
" Last at line $last_line_length_error_at by $last_line_length_error characters\n"
            );
        }
    }
}

#####################################################################
#
# The Perltidy::Debugger class shows line tokenization
#
#####################################################################

package Perltidy::Debugger;

sub new {

    my ( $class, $filename ) = @_;

    bless {
        _debug_file        => $filename,
        _debug_file_opened => 0,
        _fh                => undef,
    }, $class;
}

sub really_open_debug_file {

    my $self       = shift;
    my $debug_file = $self->{_debug_file};
    my $fh;
    unless ( $fh = IO::File->new("> $debug_file") ) {
        warn("can't open $debug_file: $!\n");
    }
    $self->{_debug_file_opened} = 1;
    $self->{_fh} = $fh;
    print $fh "Use -dump-tokens to get a list of token type codes\n";
}

sub close_debug_file {

    my $self = shift;
    my $fh   = $self->{_fh};
    if ( $self->{_debug_file_opened} ) {

        close $self->{_fh};
    }
}

sub write_debug_entry {

    # This is a debug dump routine which may be modified as necessary
    # to dump tokens on a line-by-line basis.  The output will be written
    # to the .DEBUG file when the -D flag is entered.
    my $self = shift;
    my ($line_of_tokens) = @_;

    my $input_line        = $line_of_tokens->{_line_text};
    my $rtoken_type       = $line_of_tokens->{_rtoken_type};
    my $rtokens           = $line_of_tokens->{_rtokens};
    my $rlevels           = $line_of_tokens->{_rlevels};
    my $rslevels          = $line_of_tokens->{_rslevels};
    my $rblock_type       = $line_of_tokens->{_rblock_type};
    my $input_line_number = $line_of_tokens->{_line_number};
    my $line_type         = $line_of_tokens->{_line_type};

    my ( $j, $num );

    my $token_str = "$input_line_number: ";
    my $reconstructed_original = "$input_line_number: ";
    my $block_str = "$input_line_number: ";

    #$token_str .= "$line_type: ";
    #$reconstructed_original .= "$line_type: ";

    my $pattern   = "";
    my @next_char = ( '"', '"' );
    my $i_next    = 0;
    unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
    my $fh = $self->{_fh};

    for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {

        # testing patterns
        if ( $$rtoken_type[$j] eq 'k' ) {
            $pattern .= $$rtokens[$j];
        }
        else {
            $pattern .= $$rtoken_type[$j];
        }
        $reconstructed_original .= $$rtokens[$j];
        $block_str .= "($$rblock_type[$j])";
        $num = length( $$rtokens[$j] );
        my $type_str = $$rtoken_type[$j];

        # be sure there are no blank tokens (shouldn't happen)
        # This can only happen if a programming error has been made
        # because all valid tokens are non-blank
        if ( $type_str eq ' ' ) {
            print $fh "BLANK TOKEN on the next line\n";

            #warning("BLANK TOKEN on line $input_line\n");
            #report_definite_bug();
            $type_str = $next_char[$i_next];
            $i_next   = 1 - $i_next;
        }

        if ( length($type_str) == 1 ) {
            $type_str = $type_str x $num;
        }
        $token_str .= $type_str;
    }

    # Write what you want here ...
    #print $fh "$input_line\n";
    #print $fh "$pattern\n";
    print $fh "$reconstructed_original\n";
    print $fh "$token_str\n";

    #print $fh "$block_str\n";
}

#####################################################################
#
# The Perltidy::LineBuffer class supplies a 'get_line()'
# method for returning the next line to be parsed, as well as a
# 'peek_ahead()' method
#
# The input parameter is an object with a 'get_line()' method
# which returns the next line to be parsed
#
#####################################################################

package Perltidy::LineBuffer;

sub new {

    my $class = shift;
    my $line_source_object = shift;

    return bless {
        _line_source_object  => $line_source_object,
          _rlookahead_buffer => [],
    }, $class;
}

sub peek_ahead {
    my $self = shift;
    my ($buffer_index) = @_;
    my $line = undef;
    my $input_file_copy    = $self->{_input_file_copy};
    my $line_source_object = $self->{_line_source_object};
    my $rlookahead_buffer  = $self->{_rlookahead_buffer};
    if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
        $line = $$rlookahead_buffer[$buffer_index];
    }
    else {
        $line = $line_source_object->get_line();
        push ( @$rlookahead_buffer, $line );
    }
    return $line;
}

sub get_line {
    my $self = shift;
    my $line = undef;
    my $line_source_object = $self->{_line_source_object};
    my $rlookahead_buffer  = $self->{_rlookahead_buffer};

    if ( scalar(@$rlookahead_buffer) ) {
        $line = shift @$rlookahead_buffer;
    }
    else {
        $line = $line_source_object->get_line();
    }
    return $line;
}

########################################################################
#
# the Perltidy::Tokenizer package is essentially a filter which
# reads lines of perl source code from a source object and provides
# corresponding tokenized lines through its get_line() method.  Lines
# flow from the source_object to the caller like this:
#
# source_object --> LineBuffer_object --> Tokenizer -->  calling routine
#   get_line()         get_line()           get_line()     line_of_tokens
#
# The source object can be any object with a get_line() method which
# supplies one line (a character string) perl call.
# The LineBuffer object is created by the Tokenizer.
# The Tokenizer returns a reference to a data structure 'line_of_tokens'
# containing one tokenized line for each call to its get_line() method.
#
########################################################################

package Perltidy::Tokenizer;

use vars qw{
  $tokenizer_self
  $level_in_tokenizer
  $slevel_in_tokenizer
  $saw_negative_indentation
  $id_scan_state
  $last_nonblank_token
  $last_nonblank_type
  $identifier
  $in_quote
  $quote_type
  $quote_character
  $quote_pos
  $quote_depth
  $allowed_quote_modifiers
  $paren_depth
  @paren_type
  $brace_depth
  @brace_type
  @brace_structural_type
  @brace_package
  $square_bracket_depth
  @square_bracket_type
  @square_bracket_structural_type
  @depth_array
  @starting_line_of_current_depth
  @current_depth
  @lower_case_labels_at
  $saw_v_string
  %is_constant
  %is_user_function
  $max_token_index
  $peeked_ahead
  $current_package
  $unexpected_error_count
  $input_line
  $input_line_number
  $rpretokens
  $rpretoken_map
  $rpretoken_type
  $want_paren

  %TOKENIZER_DEBUG_FLAG
  $block_list_operator
  $block_operator
  %expecting_operator_token
  %expecting_operator_types
  %expecting_term_types
  %expecting_term_token
  %is_block_function
  %is_block_list_function
  %is_digraph
  %is_file_test_operator
  %is_trigraph
  %is_valid_token_type
  %is_keyword
  @opening_brace_names
  @closing_brace_names
};

# possible values of operator_expected()
use constant TERM     => -1;
use constant UNKNOWN  => 0;
use constant OPERATOR => 1;

# Maximum number of little messages; probably need not be changed.
use constant MAX_NAG_MESSAGES => 6;

sub new {

    my $class = shift;

    # Note: 'tabs' and 'indent_columns' are temporary and should be
    # removed asap
    my %defaults = (
      source_object      => undef,
      debugger_object    => undef,
      diagnostics_object => undef,
      logger_object      => undef,
      starting_level     => undef,
      indent_columns     => 4,
      tabs               => 0,
      look_for_hash_bang => 0,
    );
    my %args = ( %defaults, @_ );

    # we are given an object with a get_line() method to supply source lines
    my $source_object = $args{source_object};

    # we create another object with a get_line() and peek_ahead() method
    my $line_buffer_object = Perltidy::LineBuffer->new($source_object);

    # Tokenizer state data is as follows:
    # _rhere_target_list   reference to list of here-doc targets
    # _here_doc_target     the target string for a here document
    # _quote_target        character we seek if chasing a quote
    # _line_start_quote    line where we started looking for a long quote
    # _in_here_doc         flag indicating if we are in a here-doc
    # _in_pod              flag set if we are in pod documentation
    # _in_error            flag set if we saw severe error (binary in script)
    # _in_data             flag set if we are in __DATA__ or __END__ text
    # _in_format           flag set if we are in a format description
    # _in_quote            flag telling if we are chasing a quote
    # _starting_level      indentation level of first line
    # _input_tabstr        string denoting one indentation level of input file
    # _know_input_tabstr   flag indicating if we know _input_tabstr
    # _line_buffer_object  object with get_line() method to supply source code
    # _diagnostics_object  place to write debugging information
    $tokenizer_self = {
        _rhere_target_list   => undef,
        _in_here_doc         => 0,
        _here_doc_target     => "",
        _in_data             => 0,
        _in_format           => 0,
        _in_error            => 0,
        _in_pod              => 0,
        _in_quote            => 0,
        _quote_target        => "",
        _line_start_quote    => -1,
        _starting_level      => $args{starting_level},
        _know_starting_level => defined( $args{starting_level} ),
        _tabs                => $args{tabs},
        _indent_columns      => $args{indent_columns},
        _look_for_hash_bang  => $args{look_for_hash_bang},
        _input_tabstr        => "",
        _know_input_tabstr   => -1,
        _last_line_number    => 0,
        _saw_perl_dash_P     => 0,
        _saw_perl_dash_w     => 0,
        _saw_use_strict      => 0,
        _saw_hash_bang       => 0,
        _saw_lc_filehandle   => 0,
        _line_buffer_object  => $line_buffer_object,
        _debugger_object     => $args{debugger_object},
        _diagnostics_object  => $args{diagnostics_object},
        _logger_object       => $args{logger_object},
    };

    prepare_for_a_new_file();
    find_starting_indentation_level();

    bless $tokenizer_self, $class;
}

# interface to Perltidy::Logger routines
sub warning {
    my $logger_object = $tokenizer_self->{_logger_object};
    if ($logger_object) {
        $logger_object->warning(@_);
    }
}

sub write_logfile_entry {
    my $logger_object = $tokenizer_self->{_logger_object};
    if ($logger_object) {
        $logger_object->write_logfile_entry(@_);
    }
}

sub interrupt_logfile {
    my $logger_object = $tokenizer_self->{_logger_object};
    if ($logger_object) {
        $logger_object->interrupt_logfile();
    }
}

sub resume_logfile {
    my $logger_object = $tokenizer_self->{_logger_object};
    if ($logger_object) {
        $logger_object->resume_logfile();
    }
}

sub increment_brace_error {
    my $logger_object = $tokenizer_self->{_logger_object};
    if ($logger_object) {
        $logger_object->increment_brace_error();
    }
}

sub report_definite_bug {
    my $logger_object = $tokenizer_self->{_logger_object};
    if ($logger_object) {
        $logger_object->report_definite_bug();
    }
}

sub brace_warning {
    my $logger_object = $tokenizer_self->{_logger_object};
    if ($logger_object) {
        $logger_object->brace_warning(@_);
    }
}

sub get_saw_brace_error {
    my $logger_object = $tokenizer_self->{_logger_object};
    if ($logger_object) {
        $logger_object->get_saw_brace_error();
    }
    else {
        0;
    }
}

# interface to Perltidy::Diagnostics routines
sub write_diagnostics {
    if ( $tokenizer_self->{_diagnostics_object} ) {
        $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
    }
}

sub report_errors {

    my $self = shift;

    my $level = get_indentation_level();
    if ( $level != $tokenizer_self->{_starting_level} ) {
        warning("final indentation level: $level\n");
    }

    check_final_nesting_depths();

    if ( $tokenizer_self->{_look_for_hash_bang}
      && !$tokenizer_self->{_saw_hash_bang} )
    {
        warning( "hit EOF without seeing hash-bang line; maybe don't need -x?\n"
        );
    }

    if ( $tokenizer_self->{_in_format} ) {
        warning("hit EOF while in format description\n");
    }

    # this check may be removed after a year or so
    if ( $tokenizer_self->{_saw_lc_filehandle} ) {
        warning( <<'EOM' );
PLEASE NOTE: Prior to perltidy release 20010328, perltidy incorrectly
parsed a function call after a print/printf, with the result that a
space got added before the opening paren, with the result that something
like this:

  print usage() and exit;

became this:

  print usage () and exit;

thereby converting the function name to a filehandle according to perl's
weird parsing rules.  If this has happened to your script, you will 
need to remove the space before the opening paren by hand.
EOM
    }

    if ( $tokenizer_self->{_in_pod} ) {
        warning(
"hit EOF while in POD documentation (no =cut seen)\n\tit is ok to end in a POD doc, but it could indicate an error\n"
        );
    }

    if ( $tokenizer_self->{_in_here_doc} ) {
        my $here_doc_target = $tokenizer_self->{_here_doc_target};
        warning("hit EOF while in here document $here_doc_target\n");
    }

    if ( $tokenizer_self->{_in_quote} ) {
        my $line_start_quote = $tokenizer_self->{_line_start_quote};
        my $quote_target     = $tokenizer_self->{_quote_target};
        warning(
"hit EOF seeking end of quote/pattern starting at line $line_start_quote ending in $quote_target\n"
        );
    }

    unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
        if ( $] < 5.006 ) {
            write_logfile_entry("Suggest including '-w parameter'\n");
        }
        else {
            write_logfile_entry("Suggest including 'use warnings;'\n");
        }
    }

    if ( $tokenizer_self->{_saw_perl_dash_P} ) {
        write_logfile_entry("Use of -P parameter for defines is discouraged\n");
    }

    unless ( $tokenizer_self->{_saw_use_strict} ) {
        write_logfile_entry("Suggest including 'use strict;'\n");
    }

    # it is suggested that lables have at least one upper case character
    # for legibility and to avoid code breakage as new keywords are introduced
    if (@lower_case_labels_at) {
        my $num = @lower_case_labels_at;
        write_logfile_entry( "Suggest using upper case characters in label(s)\n"
        );
        local $" = ')(';
        write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
    }
}

sub report_v_string {

    # warn if this version can't handle v-strings
    my ($tok) = @_;
    $saw_v_string = $input_line_number;
    if ( $] < 5.006 ) {
        warning(
"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
        );
    }
}

sub know_input_tabstr {
    return ( $tokenizer_self->{_know_input_tabstr} == 1 );
}

sub get_input_line_number {
    return $tokenizer_self->{_last_line_number};
}

# returns the next tokenized line
sub get_line {

    my $self = shift;

    my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();

    return undef unless ($input_line);

    $tokenizer_self->{_last_line_number}++;

    # remove any control m; otherwise here-target's may not match;
    # trimming trailing white space would work too, but that would
    # change the original line
    $input_line =~ s/(\r|\035)*$//gi;

    my $input_line_number = $tokenizer_self->{_last_line_number};

    # create a data structure describing this line which will be
    # returned to the caller.

    # _line_type codes are: 
    #   SYSTEM         - system-specific code before hash-bang line
    #   CODE           - line of perl code (including comments)
    #   POD_START      - line starting pod, such as '=head'
    #   POD            - pod documentation text 
    #   POD_END        - last line of pod section, '=cut'
    #   HERE           - text of here-document 
    #   HERE_END       - last line of here-doc (target word)
    #   FORMAT         - format section
    #   FORMAT_END     - last line of format section, '.'
    #   DATA_START     - line containing __END__ or __DATA__
    #   DATA           - line following __END__ or __DATA__ 
    #   ERROR          - we are in big trouble, probably not a perl script

    # Other variables:
    #   _curly_brace_depth     - depth of curly braces at start of line
    #   _square_bracket_depth  - depth of square brackets at start of line
    #   _paren_depth           - depth of parens at start of line
    #   _starting_in_quote     - this line continues a multi-line quote
    #                            (so don't trim leading blanks!)
    #   _ending_in_quote       - this line ends in a multi-line quote
    #                            (so don't trim trailing blanks!)

    my $line_of_tokens = {
        _line_type   => 'EOF',
        _line_text   => $input_line,
        _line_number => $input_line_number,
        _rtoken_type => undef,
        _rtokens     => undef,
        _rlevels     => undef,
        _rslevels    => undef,
        _rblock_type => undef,
          _python_indentation_level => 0,
          _starting_in_quote        =>
          ( $tokenizer_self->{_in_quote} && ( $quote_type eq 'Q' ) ),
          _ending_in_quote      => 0,
          _curly_brace_depth    => $brace_depth,
          _square_bracket_depth => $square_bracket_depth,
          _paren_depth          => $paren_depth,
    };

    # must print line unchanged if we are in a here document
    if ( $tokenizer_self->{_in_here_doc} ) {

        $line_of_tokens->{_line_type} = 'HERE';
        my $here_doc_target = $tokenizer_self->{_here_doc_target};

        # note that this match will work even if $here_doc_target is a quote 
        # containing backslashed characters
        if ( $input_line =~ /^$here_doc_target$/ ) {
            $line_of_tokens->{_line_type} = 'HERE_END';
            write_logfile_entry("Exiting HERE document $here_doc_target\n");

            my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
            if (@$rhere_target_list) {    # there can be multiple here targets
                $here_doc_target = shift @$rhere_target_list;
                $tokenizer_self->{_here_doc_target} = $here_doc_target;
                write_logfile_entry( "Entering HERE document $here_doc_target\n"
                );
            }
            else {
                $tokenizer_self->{_in_here_doc}     = 0;
                $tokenizer_self->{_here_doc_target} = "";
            }
        }
        return $line_of_tokens;
    }

    # must print line unchanged if we are in a format section
    elsif ( $tokenizer_self->{_in_format} ) {

        if ( $input_line =~ /^\.[\s#]*$/ ) {
            write_logfile_entry("Exiting format section\n");
            $tokenizer_self->{_in_format} = 0;
            $line_of_tokens->{_line_type} = 'FORMAT_END';
        }
        else {
            $line_of_tokens->{_line_type} = 'FORMAT';
        }
        return $line_of_tokens;
    }

    # must print line unchanged if we are in pod documentation
    elsif ( $tokenizer_self->{_in_pod} ) {

        $line_of_tokens->{_line_type} = 'POD';
        if ( $input_line =~ /^=cut/ ) {
            $line_of_tokens->{_line_type} = 'POD_END';
            write_logfile_entry("Exiting POD section\n");
            $tokenizer_self->{_in_pod} = 0;
        }
        return $line_of_tokens;
    }

    # must print line unchanged if we have seen a severe error (i.e., we
    # are seeing illegal tokens and connot continue.  Syntax errors do
    # not pass this route).  Calling routine can decide what to do, but
    # the default can be to just pass all lines as if they were after __END__ 
    elsif ( $tokenizer_self->{_in_error} ) {
        $line_of_tokens->{_line_type} = 'ERROR';
        return $line_of_tokens;
    }

    # must print line unchanged if we are __DATA__ or __END__
    # (once we enter this mode, we stay in it to the end of the file)
    elsif ( $tokenizer_self->{_in_data} ) {
        $line_of_tokens->{_line_type} = 'DATA';
        return $line_of_tokens;
    }

    # check for a hash-bang line if we haven't seen one
    if ( !$tokenizer_self->{_saw_hash_bang} ) {
        if ( $input_line =~ /^\#\!.*perl\b/ ) {
            $tokenizer_self->{_saw_hash_bang} = $input_line_number;
            if ( ( $input_line_number > 1 )
              && ( !$tokenizer_self->{_look_for_hash_bang} ) )
            {
                warning(
"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
                );
            }

            # check for -w and -P flags
            if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
                $tokenizer_self->{_saw_perl_dash_P} = 1;
            }

            if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
                $tokenizer_self->{_saw_perl_dash_w} = 1;
            }
        }
    }

    # wait for a hash-bang before parsing if the user invoked us with -x
    if ( $tokenizer_self->{_look_for_hash_bang}
      && !$tokenizer_self->{_saw_hash_bang} )
    {
        $line_of_tokens->{_line_type} = 'SYSTEM';
        return $line_of_tokens;
    }

    # now we know that it is ok to tokenize the line...
    # the line tokenizer will modify any of these private variables:
    #        _rhere_target_list
    #        _in_data 
    #        _in_format
    #        _in_error
    #        _in_pod 
    #        _in_quote
    my $ending_in_quote_last = $tokenizer_self->{_in_quote};
    tokenize_this_line($line_of_tokens);

    # Now finish defining the return structure and return it
    $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};

    # handle severe error (binary data in script)
    if ( $tokenizer_self->{_in_error} ) {
        $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
        warning("Giving up after error\n");
        $line_of_tokens->{_line_type} = 'ERROR';
        reset_indentation_level(0);          # avoid error messages
        return $line_of_tokens;
    }

    # handle start of pod documentation
    if ( $tokenizer_self->{_in_pod} ) {
        $line_of_tokens->{_line_type} = 'POD_START';
        write_logfile_entry("Entering POD section\n");
        return $line_of_tokens;
    }

    # update indentation levels for log messages
    if ( $input_line !~ /^\s*$/ ) {
        my $rlevels = $line_of_tokens->{_rlevels};
        my $structural_indentation_level = $$rlevels[0];
        my ( $python_indentation_level, $msg ) =
          find_indentation_level( $input_line, $structural_indentation_level );
        if ($msg) { write_logfile_entry("$msg") }
        $line_of_tokens->{_python_indentation_level} =
          $python_indentation_level;
    }

    # see if this line contains here doc targets
    my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
    if (@$rhere_target_list) {
        my $here_doc_target = shift @$rhere_target_list;
        $tokenizer_self->{_in_here_doc}     = 1;
        $tokenizer_self->{_here_doc_target} = $here_doc_target;
        write_logfile_entry("Entering HERE document $here_doc_target\n");
    }

    # NOTE: __END__ and __DATA__ statements are written unformatted
    # because they can theoretically contain additional characters
    # which are not tokenized (and cannot be read with <DATA> either!).
    if ( $tokenizer_self->{_in_data} ) {
        $line_of_tokens->{_line_type} = 'DATA_START';
        write_logfile_entry("Starting $tokenizer_self->{_in_data} section\n");
        return $line_of_tokens;
    }

    # now, finally, we know that this line is type 'CODE'
    $line_of_tokens->{_line_type} = 'CODE';

    if ( $tokenizer_self->{_debugger_object} ) {
        $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
    }

    # Note: if keyword 'format' occurs in this line code, it is still CODE
    # (keyword 'format' need not start a line)
    if ( $tokenizer_self->{_in_format} ) {
        write_logfile_entry("Entering format section\n");
    }

    if ( $tokenizer_self->{_in_quote}
      and ( $tokenizer_self->{_line_start_quote} < 0 ) )
    {

        if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
            $tokenizer_self->{_line_start_quote} = $input_line_number;
            $tokenizer_self->{_quote_target}     = $quote_target;
            write_logfile_entry(
              "Start multi-line quote or pattern ending in $quote_target\n");
        }
    }
    elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
      and !$tokenizer_self->{_in_quote} )
    {
        $tokenizer_self->{_line_start_quote} = -1;
        write_logfile_entry("End of multi-line quote or pattern\n");
    }

    # we are returning a line of CODE
    return $line_of_tokens;
}

sub find_starting_indentation_level {

    my $starting_level    = 0;
    my $know_input_tabstr = -1;    # flag for find_indentation_level

    # use value if given as parameter
    if ( $tokenizer_self->{_know_starting_level} ) {
        $starting_level = $tokenizer_self->{_starting_level};
    }

    # if we know there is a hash_bang line, the level must be zero
    elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
        $tokenizer_self->{_know_starting_level} = 1;
    }

    # otherwise figure it out from the input file
    else {
        my $line;
        my $i = 0;
        my $structural_indentation_level = -1; # flag for find_indentation_level

        my $msg = "";
        while ( $line =
          $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
        {

            # if first line is #! then assume starting level is zero
            if ( $i == 1 && $line =~ /^\#\!/ ) {
                $starting_level = 0;
                last;
            }
            next if ( $line =~ /^\s*#/ );    # must not be comment
            next if ( $line =~ /^\s*$/ );    # must not be blank
            ( $starting_level, $msg ) =
              find_indentation_level( $line, $structural_indentation_level );
            if ($msg) { write_logfile_entry("$msg") }
            last;
        }
        $msg = "Line $i implies starting-indentation-level = $starting_level\n";

        if ( $starting_level > 0 ) {

            my $input_tabstr = $tokenizer_self->{_input_tabstr};
            if ( $input_tabstr eq "\t" ) {
                $msg .= "by guessing input tabbing uses 1 tab per level\n";
            }
            else {
                my $cols = length($input_tabstr);
                $msg .=
                  "by guessing input tabbing uses $cols blanks per level\n";
            }
        }
        write_logfile_entry("$msg");
    }
    $tokenizer_self->{_starting_level} = $starting_level;
    reset_indentation_level($starting_level);
}

=pod

Find indentation level given a input line.  At the same time, try to
figure out the input tabbing scheme.

There are two types of calls:

Type 1: $structural_indentation_level < 0 
 In this case we have to guess $input_tabstr to figure out the level.

Type 2: $structural_indentation_level >= 0
 In this case the level of this line is known, and this routine can
 update the tabbing string, if still unknown, to make the level correct.

=cut
sub find_indentation_level {
    my ( $line, $structural_indentation_level ) = @_;
    my $level = 0;
    my $msg   = "";

    my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
    my $input_tabstr      = $tokenizer_self->{_input_tabstr};

    # find leading whitespace
    my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";

    # make first guess at input tabbing scheme if necessary
    if ( $know_input_tabstr < 0 ) {

        $know_input_tabstr = 0;

        if ( $tokenizer_self->{_tabs} ) {
            $input_tabstr = "\t";
            if ( length($leading_whitespace) > 0 ) {
                if ( $leading_whitespace !~ /\t/ ) {

                    my $cols = $tokenizer_self->{_indent_columns};

                    if ( length($leading_whitespace) < $cols ) {
                        $cols = length($leading_whitespace);
                    }
                    $input_tabstr = " " x $cols;
                }
            }
        }
        else {
            $input_tabstr = " " x $tokenizer_self->{_indent_columns};

            if ( length($leading_whitespace) > 0 ) {
                if ( $leading_whitespace =~ /^\t/ ) {
                    $input_tabstr = "\t";
                }
            }
        }
        $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
        $tokenizer_self->{_input_tabstr}      = $input_tabstr;
    }

    # determine the input tabbing scheme if possible
    if ( ( $know_input_tabstr == 0 ) && ( length($leading_whitespace) > 0 )
      && ( $structural_indentation_level > 0 ) )
    {
        my $saved_input_tabstr = $input_tabstr;

        # check for common case of one tab per indentation level
        if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
            if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
                $input_tabstr = "\t";
                $msg = "Guessing old indentation was tab character\n";
            }
        }

        else {

            # detab any tabs based on 8 blanks per tab
            my $entabbed = "";
            if ( $leading_whitespace =~ s/^\t+/        /g ) {
                $entabbed = "entabbed";
            }

            # now compute tabbing from number of spaces
            my $columns =
              length($leading_whitespace) / $structural_indentation_level;
            if ( $columns == int $columns ) {
                $msg =
                  "Guessing old indentation was $columns $entabbed spaces\n";
            }
            else {
                $columns = int $columns;
                $msg     =
                "old indentation is unclear, using $columns $entabbed spaces\n";
            }
            $input_tabstr = " " x $columns;
        }
        $know_input_tabstr = 1;
        $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
        $tokenizer_self->{_input_tabstr}      = $input_tabstr;

        # see if mistakes were made
        if ( ( $tokenizer_self->{_starting_level} > 0 )
          && !$tokenizer_self->{_know_starting_level} )
        {

            if ( $input_tabstr ne $saved_input_tabstr ) {
                warning(
"I made a bad starting level guess; rerun with a value for -sil \n"
                );
            }
        }
    }

    # use current guess at input tabbing to get input indentation level
    #
    # Patch to handle a common case of entabbed leading whitespace
    # If the leading whitespace equals 4 spaces and we also have
    # tabs, detab the input whitespace assuming 8 spaces per tab.  
    if ( length($input_tabstr) == 4 ) {
        $leading_whitespace =~ s/^\t+/        /g;
    }

    if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
        my $pos = 0;

        while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
        {
            $pos += $len_tab;
            $level++;
        }
    }
    return ( $level, $msg );
}

sub dump_token_types {
    my $class = shift;
    my $fh    = shift;

    # This should be the latest list of token types in use
    # adding NEW_TOKENS: add a comment here
    print $fh <<'END_OF_LIST';

Here is a list of the token types currently used.  
For the following tokens, the "type" of a token is just the token itself.  

.. :: << >> ** && .. ||  -> => += -= .= %= &= |= ^= *= <>
( ) <= >= == =~ !~ != ++ -- /= x=
... **= <<= >>= &&= ||= <=> 
, + - / * | % ! x ~ = \ ? : . < > ^ &

The following additional token types are defined:

 type    meaning
    b    blank (white space) 
    {    indent: left structural curly brace or square bracket
	     (code block, anonymous hash reference, or anonymous array reference)
    }    outdent: right structural curly brace or square bracket
	[    left non-structural square bracket (enclosing an array index)
	]    right non-structural square bracket
    L    left non-structural curly brace (enclosing a key)
    R    right non-structural curly brace 
    ;    terminal semicolon
    f    indicates a semicolon in a "for" statement
    h    here_doc operator <<
    #    a comment
    Q    indicates a quote or pattern
    q    indicates a qw quote block
    k    a perl keyword
    C    user-defined constant or constant function (with void prototype = ())
    U    user-defined function taking parameters
    G    user-defined function taking block parameter (like grep/map/eval)
    t    type indicater such as %,$,@,*,&,sub
    w    bare word (perhaps a subroutine call)
    i    identifier of some type (with leading %, $, @, *, &, sub )
    n    a number
    v    a v-string
    F    a file test operator (like -e)
    Y    File handle
    Z    identifier which may be file handle or object
    J    LABEL:  code block label
    j    LABEL after next, last, redo, goto
    p    unary +
    m    unary -
    pp   pre-increment operator ++
    mm   pre-decrement operator -- 
END_OF_LIST
}

# This is an unused debug routine
sub dump_functions {

    my $fh = *STDOUT;
    my ( $pkg, $sub );
    foreach $pkg ( keys %is_user_function ) {
        print $fh "\nnon-constant subs in package $pkg\n";

        foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
            my $msg = "";
            if ( $is_block_list_function{$pkg}{$sub} ) {
                $msg = 'block_list';
            }

            if ( $is_block_function{$pkg}{$sub} ) {
                $msg = 'block';
            }
            print $fh "$sub $msg\n";
        }
    }

    foreach $pkg ( keys %is_constant ) {
        print $fh "\nconstants and constant subs in package $pkg\n";

        foreach $sub ( keys %{ $is_constant{$pkg} } ) {
            print $fh "$sub\n";
        }
    }
}

sub prepare_for_a_new_file {
    $saw_negative_indentation = 0;
    $id_scan_state       = '';
    $last_nonblank_token = ';';    # the only possible starting state which
    $last_nonblank_type  = ';';    # will make a leading brace a code block
    $identifier          = '';
    $in_quote   = 0;     # flag telling if we are chasing a quote, and what kind
    $quote_type = 'Q';
    $quote_character = "";    # character we seek if chasing a quote
    $quote_pos   = 0;  # next character index to check for case of alphanum char
    $quote_depth = 0;
    $allowed_quote_modifiers = "";
    $paren_depth = 0;
    $brace_depth = 0;
    $square_bracket_depth = 0;
    $current_package = "main";
    @current_depth[ 0 .. 2 ] = ( 0, 0, 0 );
    $paren_type[$paren_depth] = '';
    $brace_type[$brace_depth] = ';';    # identify opening brace as code block
    $brace_structural_type[$brace_depth]        = '';
    $square_bracket_type[$square_bracket_depth] = '';
    $square_bracket_structural_type[$square_bracket_depth] = '';
    $brace_package[$paren_depth] = $current_package;
    %is_constant            = ();    # user-defined constants
    %is_user_function       = ();    # user-defined functions
    %is_block_function      = ();
    %is_block_list_function = ();
    $unexpected_error_count = 0;
    $want_paren             = "";
    @lower_case_labels_at   = ();
    $saw_v_string           = 0;     # for warning of v-strings on older perl
}

sub get_quote_target {
    return matching_end_token($quote_character);
}

sub get_indentation_level {
    return $level_in_tokenizer;
}

sub reset_indentation_level {
    $level_in_tokenizer  = $_[0];
    $slevel_in_tokenizer = $_[0];
}

sub tokenize_this_line {

=pod 

This routine breaks a line of perl code into tokens which are of use in
indentation and reformatting.  One of my goals has been to define tokens
such that a newline may be inserted between any pair of tokens without
changing or invalidating the program. This version comes close to this,
although there are necessarily a few exceptions which must be caught by
the formatter.  Many of these involve the treatment of bare words.

The tokens and their types are returned in arrays.  See previous
routine for their names.

See also the array "valid_token_types" in the BEGIN section for an
up-to-date list.

To simplify things, token types are either a single character, or they
are identical to the tokens themselves.

As a debugging aid, the -D flag creates a file containing a side-by-side
comparison of the input string and its tokenization for each line of a file.
This is an invaluable debugging aid.

In addition to tokens, and some associated quantities, the tokenizer
also returns flags indication any special line types.  These include
quotes, here_docs, formats.

-----------------------------------------------------------------------

Hacker's guide to adding NEW_TOKENS:

New token types will undoubtedly be needed in the future both to keep up
with changes in perl and to help adapt the tokenizer to other applications.

Here are some notes on the minimal steps.  I wrote these notes while
adding the 'v' token type for v-strings, which are things like version
numbers 5.6.0, and ip addresses, and will use that as an example.  ( You
can use your editor to search for the string "NEW_TOKENS" to find the
appropriate sections to change):

*. Try to talk somebody else into doing it!  If not, ..

*. Make a backup of your current version in case things don't work out!

*. Think of a new, unused character for the token type, and add to
the array @valid_token_types in the BEGIN section of this package.
For example, I used 'v' for v-strings.

*. Implement coding to recognize the $type of the token in this routine.
This is the hardest part, and is best done by immitating or modifying
some of the existing coding.  For example, to recognize v-strings, I
patched 'sub scan_bare_identifier' to recognize v-strings beginning with
'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.

*. Update sub operator_expected.  This update is critically important but
the coding is trivial.  Look at the comments in that routine for help.
For v-strings, which should behave like numbers, I just added 'v' to the
regex used to handle numbers and strings (types 'n' and 'Q').

*. Implement a 'bond strength' rule in sub set_bond_strengths in
Perltidy::Formatter for breaking lines around this token type.  You can
skip this step and take the default at first, then adjust later to get
desired results.  For adding type 'v', I looked at sub bond_strength and
saw that number type 'n' was using default strengths, so I didn't do
anything.  I may tune it up someday if I don't like the way line
breaks with v-strings look.

*. Implement a 'whitespace' rule in sub set_white_space_flag in
Perltidy::Formatter.  For adding type 'v', I looked at this routine
and saw that type 'n' used spaces on both sides, so I just added 'v'
to the array @spaces_both_sides. 

*. Update HtmlWriter package so that users can colorize the token as
desired.  This is quite easy; see comments identified by 'NEW_TOKENS' in
that package.  For v-strings, I initially chose to use a default color
equal to the default for numbers, but it might be nice to change that
eventually.

*. Update comments in Perltidy::Tokenizer::dump_token_types.  

*. Run lots and lots of debug tests.  Start with special files designed
to test the new token type.  Run with the -D flag to create a .DEBUG
file which shows the tokenization.  When these work ok, test as many old
scripts as possible.  Start with all of the '.t' files in the 'test'
directory of the distribution file.  Compare .tdy output with previous
version and updated version to see the differences.  Then include as
many more files as possible. My own technique has been to collect a huge
number of perl scripts (thousands!) into one directory and run perltidy
*, then run diff between the output of the previous version and the
current version.

-----------------------------------------------------------------------

=cut
    use constant BRACE => 0;
    use constant SQUARE_BRACKET => 1;
    use constant PAREN => 2;
    my ($line_of_tokens)       = @_;
    my ($untrimmed_input_line) = $line_of_tokens->{_line_text};

    # patch while coding change is underway
    # make callers private data to allow access
    # $tokenizer_self = $caller_tokenizer_self;

    # extract line number for use in error messages
    $input_line_number = $line_of_tokens->{_line_number};

    # check for pod documentation
    if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {

        # must not be in multi-line quote
        # and must not be in an eqn
        if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) ) {
            $tokenizer_self->{_in_pod} = 1;
            return;
        }
    }

    $input_line = $untrimmed_input_line;

    chomp $input_line;

    # trim start of this line unless we are continuing a quoted line
    # do not trim end because we might end in a quote (test: deken4.pl)
    # Perltidy::Formatter will delete needless trailing blanks
    unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
        $input_line =~ s/^\s*//;    # trim left end
    }

    # initialize for the main loop
    my @output_token_list = ();    # stack of output token indexes
    my @output_token_type = ();    # token types
    my @output_block_type = ();    # types associated with ( ) { }

    my $tok  = $last_nonblank_token;
    my $type = $last_nonblank_type;
    my $last_nonblank_i = -1;
    my $block_type      = "";
    my $next_tok;
    my $next_type;
    my $expecting;
    my $here_doc_target;
    my @here_target_list = ();         # list of here-doc target strings

    $peeked_ahead = 0;

    # tokenization is done in two stages..
    # stage 1 is a very simple pre-tokenization
    my $max_tokens_wanted = 0;    # this signals pre_tokenize to get all tokens

    # a little optimization for a full-line comment
    if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
        $max_tokens_wanted = 1    # no use tokenizing a comment
    }

    ( $rpretokens, $rpretoken_map, $rpretoken_type ) =
      pre_tokenize( $input_line, $max_tokens_wanted );

    $max_token_index = scalar(@$rpretokens) - 1;
    push ( @$rpretokens, ' ', ' ', ' ' );    # extra whitespace simplifies logic
    push ( @$rpretoken_map, 0, 0, 0 );       # shouldn't be referenced
    push ( @$rpretoken_type, 'b', 'b', 'b' );

    # temporary copies while coding change is underway
    my ( $rtokens, $rtoken_map, $rtoken_type ) =
      ( $rpretokens, $rpretoken_map, $rpretoken_type );

    for my $i ( 0 .. $max_token_index + 3 ) {
        $output_token_type[$i] = "";
        $output_block_type[$i] = "";
    }
    my $i     = -1;
    my $i_tok = -1;

    # main tokenization loop 
    while ( ++$i <= $max_token_index ) {

        if ($in_quote) {    # continue looking for end of a quote
            $type = $quote_type;

            unless (@output_token_list) {    # initialize if continuation line
                push ( @output_token_list, $i );
                $output_token_type[$i] = $type;

            }
            $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
            ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
              do_quote( $i, $in_quote, $quote_character, $quote_pos,
              $quote_depth, $rtokens, $rtoken_map );
            last if ($in_quote);
            $quote_character = '';    # re-initialize for next search
            $quote_pos       = 0;
            $quote_type      = 'Q';
            last if ( ++$i > $max_token_index );

            if ($allowed_quote_modifiers) {    # skip past any modifiers

                # check for exact quote modifiers
                if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
                    my $str = $$rtokens[$i];
                    while ( $str =~ /\G$allowed_quote_modifiers/gc ) {}

                    if ( defined( pos($str) ) ) {

                        # matched
                        if ( pos($str) == length($str) ) {
                            last if ( ++$i > $max_token_index );
                        }

                        # Looks like the turkey joined a quote modifier
                        # and keyword, maybe something like
                        # s/xxx/yyy/gefor @k=...  
                        # Example is "galgen.pl".  Would have to split
                        # the word and insert a new token in the
                        # pre-token list.  This is so rare that I haven't
                        # done it.  Will just issue a warning citation.

                        # This error might also be triggered if my quote
                        # modifier characters are incomplete
                        else {
                            warning(<<EOM);

Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
Please put a space between quote modifiers and trailing keywords.
EOM

                            # print "token $$rtokens[$i]\n";
                            # my $num = length($str) - pos($str);
# $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
                            # print "continuing with new token $$rtokens[$i]\n";

                            # skipping past this token does least damage
                            last if ( ++$i > $max_token_index );
                        }
                    }
                    else {

                        # example file: rokicki4.pl
                        # This error might also be triggered if my quote
                        # modifier characters are incomplete
                        write_logfile_entry(
                          "Note: found word $str at quote modifier location\n");
                    }
                }
                $allowed_quote_modifiers = "";
            }
        }

        unless ( $tok =~ /^\s*$/ ) {

            # try to catch some common errors
            if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {

                if ( $last_nonblank_token eq 'eq' ) {
                    write_logfile_entry("Should 'eq' be '==' here ?\n");
                }
                elsif ( $last_nonblank_token eq 'ne' ) {
                    write_logfile_entry("Should 'ne' be '!=' here ?\n");
                }
            }
            $last_nonblank_token = $tok;
            $last_nonblank_type  = $type;
            $last_nonblank_i     = $i_tok;
        }

        # store previous token type
        if ( $i_tok >= 0 ) {
            $output_token_type[$i_tok] = $type;
            $output_block_type[$i_tok] = $block_type;
        }
        my $pre_tok  = $$rtokens[$i];        # get the next pre-token
        my $pre_type = $$rtoken_type[$i];    # and type
        $tok        = $pre_tok;
        $type       = $pre_type; # to be modified as necessary
        $block_type = "";        # blank for all tokens except code block braces
        $i_tok      = $i;

        # this pre-token will start an output token
        push ( @output_token_list, $i_tok );

        # continue gathering identifier if necessary
        # but do not start on blanks and comments
        if ( $id_scan_state && $pre_type !~ /[b#]/ ) {

            if ( $id_scan_state =~ /(sub|package)/ ) {
                ( $i, $tok, $type, $id_scan_state ) =
                  scan_id( $input_line, $i, $tok, $rtokens, $rtoken_map,
                  $id_scan_state );
            }
            else {
                ( $i, $tok, $type, $id_scan_state, $identifier ) =
                  scan_identifier( $i, $id_scan_state, $identifier, $rtokens );
            }

            last if ($id_scan_state);
            next if ( ( $i > 0 ) || $type );

            # didn't find any token; start over
            $type = $pre_type;
            $tok  = $pre_tok;
        }

        # handle whitespace tokens..
        next if ( $type eq 'b' );
        my $prev_tok  = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';
        my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';

        # Build larger tokens where possible, since we are not in a quote.
        #
        # First try to assemble digraphs.  The following tokens are
        # excluded and handled specially: 
        # '/=' is excluded because the / might start a pattern.
        # 'x=' is excluded since it might be $x=, with $ on previous line  
        # '**' and *= might be typeglobs of punctuation variables
        # I have allowed tokens starting with <, such as <=,
        # because I don't think these could be valid angle operators.
        # test file: storrs4.pl
        my $test_tok = $tok . $$rtokens[ $i + 1 ];

        if ( $is_digraph{$test_tok} && ( $test_tok ne '/=' )  # might be pattern
          && ( $test_tok ne 'x=' )                            # might be $x
          && ( $test_tok ne '**' )                            # typeglob?
          && ( $test_tok ne '*=' ) )                          # typeglob?
        {
            $tok = $test_tok;
            $i++;

            # Now try to assemble trigraphs.  Note that all possible
            # perl trigraphs can be constructed by appending a character
            # to a digraph.
            $test_tok = $tok . $$rtokens[ $i + 1 ];

            if ( $is_trigraph{$test_tok} ) {
                $tok = $test_tok;
                $i++;
            }
        }
        $type      = $tok;
        $next_tok  = $$rtokens[ $i + 1 ];
        $next_type = $$rtoken_type[ $i + 1 ];
        $expecting = operator_expected( $prev_type, $tok, $next_type );

        # This debug mode forces perltidy to rely exclusively on its guessing
        # algorithms.  This is a simple way to test them.
        my $correct_expecting;
        if ( $TOKENIZER_DEBUG_FLAG{GUESS} ) {
            $correct_expecting = $expecting;
            $expecting         = UNKNOWN;
        }

        if ( $TOKENIZER_DEBUG_FLAG{TOKENIZE} ) {
            local $" = ')(';
            my @debug_list =
              ( $last_nonblank_token, $tok, $next_tok, $brace_depth,
              $brace_type[$brace_depth], $paren_depth,
              $paren_type[$paren_depth] );
            print "TOKENIZE:(@debug_list)\n";
        }

        ###############################################################
        # We have the next token, $tok.
        # Now we have to examine this token and decide what it is
        # and define its $type
        #
        # section 1: bare words
        ###############################################################
        if ( $pre_type eq 'w' ) {
            my ( $next_nonblank_token, $i_next ) =
              find_next_nonblank_token( $i, $rtokens );

            # quote a word followed by => operator
            if ( $next_nonblank_token eq '=' ) {

                if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
                    if ( $is_constant{$current_package}{$tok} ) { $type = 'C' }
                    elsif ( $is_user_function{$current_package}{$tok} ) {
                        $type = 'U';
                    }
                    elsif ( $tok =~ /^v\d+$/ ) {
                        $type = 'v';
                        unless ($saw_v_string) { report_v_string($tok) }
                    }
                    else { $type = 'w' }

                    next;
                }
            }

            # quote a bare word within braces..like xxx->{s}
            # note that we must be sure this is not a structural brace,
            # to avoid mistaking {s} in the following for a quoted bare word:
            # 	for(@[){s}bla}BLA}
            if ( ( $last_nonblank_type eq 'L' )
              && ( $next_nonblank_token eq '}' ) )
            {
                $type = 'w';
                next;
            }

            # handle operator x (now we know it isn't $x=)
            if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
                if ( $tok eq 'x' ) {

                    if ( $$rtokens[ $i + 1 ] eq '=' ) {    # x=
                        $tok  = 'x=';
                        $type = $tok;
                        $i++;
                    }
                    else {
                        $type = 'x';
                    }
                }

                # FIXME: Patch: mark something like x4 as an integer for now
                # It gets fixed downstream.  This is easier than
                # splitting the pretoken.
                else {
                    $type = 'n';
                }
            }

            elsif ( $tok eq 'sub' ) {
                if ( $expecting == OPERATOR ) {
                    unexpected( $tok, "operator", $i_tok, $last_nonblank_i );
                }

                ( $i, $tok, $type, $id_scan_state ) =
                  scan_id( $input_line, $i, $tok, $rtokens, $rtoken_map,
                  $id_scan_state );
            }

            elsif ( $tok eq 'package' ) {

                if ( $expecting == OPERATOR ) {
                    unexpected( $tok, "operator", $i_tok, $last_nonblank_i );
                }
                ( $i, $tok, $type, $id_scan_state ) =
                  scan_id( $input_line, $i, $tok, $rtokens, $rtoken_map,
                  $id_scan_state );
            }

            elsif ( ( $tok eq 'strict' ) and ( $last_nonblank_token eq 'use' ) )
            {
                $tokenizer_self->{_saw_use_strict} = 1;
                $type = 'k';
            }

            elsif ( ( $tok eq 'warnings' )
              and ( $last_nonblank_token eq 'use' ) )
            {
                $tokenizer_self->{_saw_perl_dash_w} = 1;
                $type = 'k';
            }

            elsif ( ( $tok eq 'constant' )
              and ( $last_nonblank_token eq 'use' ) )
            {
                $type = 'k';
                my ( $next_nonblank_token, $i_next ) =
                  find_next_nonblank_token( $i, $rtokens );

                if ($next_nonblank_token) {

                    if ( $is_keyword{$next_nonblank_token} ) {
                        warning(
"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
                        );
                    }

                    # FIXME: could check for error in which next token is not
                    # a word (number, punctuation, ..)
                    else {
                        $is_constant{$current_package}{$next_nonblank_token} =
                          1;
                    }
                }
            }

            elsif ( $tok eq 's' ) {
                if ( $expecting == OPERATOR ) {
                    unexpected( $tok, "operator", $i_tok, $last_nonblank_i );
                }
                $in_quote = 2;    # starting first of two patterns/quotes

                # NOTE: camel 3 says egimosx, but 'c' is accepeted by perl
                $allowed_quote_modifiers = '[cegimosx]';
                $type = 'Q';
            }

            elsif ( $tok =~ /^((y)|(tr))$/ ) {    # built-in functions
                if ( $expecting == OPERATOR ) {
                    unexpected( $tok, "operator", $i_tok, $last_nonblank_i );
                }
                $in_quote = 2;    # starting first of two patterns/quotes
                $allowed_quote_modifiers = '[cds]';
                $type = 'Q';
            }

            elsif ( $tok eq 'm' ) {    # match operator
                if ( $expecting == OPERATOR ) {
                    unexpected( $tok, "operator", $i_tok, $last_nonblank_i );
                }
                $in_quote = 1;
                $allowed_quote_modifiers = '[cgimosx]';
                $type = 'Q';
            }

            elsif ( $tok =~ /^(q|qq|qw|qx)$/ ) {    # various quote operators
                if ( $expecting == OPERATOR ) {
                    unexpected( $tok, "operator", $i_tok, $last_nonblank_i );
                }
                $in_quote = 1;
                $allowed_quote_modifiers = "";
                $type       = ( $tok eq 'qw' ) ? 'q' : 'Q';
                $quote_type = $type;
            }

            elsif ( $tok =~ /^(qr)$/ ) {
                if ( $expecting == OPERATOR ) {
                    unexpected( $tok, "operator", $i_tok, $last_nonblank_i );
                }
                $in_quote = 1;
                $allowed_quote_modifiers = '[imosx]';    # camel 3 p 147
                $type       = 'Q';
                $quote_type = $type;
            }

            # check for a statement label
            elsif ( ( $next_nonblank_token eq ':' )
              && ( $$rtokens[ $i_next + 1 ] ne ':' ) && is_label() )
            {
                if ( $tok !~ /A-Z/ ) {
                    push @lower_case_labels_at, $input_line_number;
                }
                $type = 'J';
                $tok .= ':';
                $i = $i_next;
                next;
            }

            elsif ( $tok eq 'format' ) {
                $type = 'k';
                $tokenizer_self->{_in_format} = 1;
                last;
            }

            elsif ( $tok =~ /((__DATA__)|(__END__))/ ) {
                $tokenizer_self->{_in_data} = $tok;
                $type = 'k';
                last;
            }

            elsif ( $is_keyword{$tok} ) {
                $type = 'k';

                # Since for and foreach may not be followed immediately
                # by an opening paren, we have to remember which keyword
                # is associated with the next '('
                if ( $tok =~ /^(for|foreach)$/ ) { $want_paren = $tok; }
            }

            # check for inline label 
            elsif ( ( $last_nonblank_type eq 'k' )
              && ( $last_nonblank_token =~ /^(redo|last|next|goto)$/ ) )
            {
                $type = 'j';
                next;
            }

            # something else -- 
            else {

                ( $i, $tok, $type ) =
                  scan_bare_identifier( $input_line, $i, $tok, $type,
                  $rtoken_map );

                # mark bare words following a file test operator as
                # something that will expect an operator next
                if ( ( $type eq 'w' ) && ( $last_nonblank_type eq 'F' ) ) {
                    $type = 'C';
                }
            }
        }

        ###############################################################
        # section 2: strings of digits
        ###############################################################
        elsif ( $pre_type eq 'd' ) {
            if ( $expecting == OPERATOR ) {
                unexpected( "Number", "operator", $i_tok, $last_nonblank_i );
            }
            ( $i, $type, my $number ) =
              scan_number( $input_line, $i, $rtoken_map, $type );

            if ( !defined($number) ) {

                # shouldn't happen - we should always get a number
                warning("non-number beginning with digit--program bug\n");
                report_definite_bug();
            }
        }

        ###############################################################
        # section 3: single character punctuation
        ###############################################################
        elsif ( length($tok) == 1 ) {

            # nothing to do for these tokens:
            if ( $tok =~ /^[\=\|\>\\\~\!]/ ) {

                # the rest of these are in order of approximate frequency
                # of occurance in a typical program according to my
                # highly scientific study :)
            }
            elsif ( $tok eq '$' ) {    # start looking for a scalar
                if ( $expecting == OPERATOR ) {
                    unexpected( "Scalar", "operator", $i_tok,
                      $last_nonblank_i );
                }
                ( $i, $tok, $type, $id_scan_state, $identifier ) =
                  scan_identifier( $i, $id_scan_state, $identifier, $rtokens );

                if ( $identifier eq '$^W' ) {
                    $tokenizer_self->{_saw_perl_dash_w} = 1;
                }

                # Test problem is vorboard.pl
                if ( ( $last_nonblank_token =~ /^(print|printf)$/ )
                  || ( ( $last_nonblank_token eq '(' )
                  && ( $paren_type[$paren_depth] =~ /^(print|printf)$/ ) )
                  || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
                  )
                {
                    $type = 'Z';
                }
            }
            elsif ( $tok eq '(' ) {

                ++$paren_depth;
                if ($want_paren) {
                    $paren_type[$paren_depth] = $want_paren;
                    $want_paren = "";
                }
                else {
                    $paren_type[$paren_depth] = $last_nonblank_token;
                }
                increase_nesting_depth( PAREN, $i_tok );

                if ( $last_nonblank_type =~ /^(\))$/ ) {
                    warning(
                    "Syntax error? found token '$last_nonblank_type' then '('\n"
                    );
                }

            }
            elsif ( $tok eq ')' ) {
                decrease_nesting_depth( PAREN, $i_tok );
                if ( $paren_depth > 0 ) { $paren_depth-- }
            }
            elsif ( $tok eq ',' ) {
            }
            elsif ( $tok eq ';' ) {

                if ( $paren_type[$paren_depth] eq 'for' ) { # mark ; in for loop
                    $type = 'f';
                }

            }
            elsif ( $tok =~ /^[\"\'\`]/ ) {
                if ( $expecting == OPERATOR ) {
                    unexpected( "String", "operator", $i_tok,
                      $last_nonblank_i );
                }
                $in_quote = 1;
                $type     = 'Q';
                $allowed_quote_modifiers = "";
                redo;
            }
            elsif ( $tok eq '/' ) {
                my $is_pattern;

                if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
                    my $msg;
                    ( $is_pattern, $msg ) =
                      guess_if_pattern_or_division( $i, $rtokens, $rtoken_map );

                    if ($msg) {
                        write_diagnostics("DIVIDE:$msg\n");
                        write_logfile_entry($msg);
                    }
                }
                else { $is_pattern = ( $expecting == TERM ) }

                if ($is_pattern) {
                    $in_quote = 1;
                    $type     = 'Q';
                    $allowed_quote_modifiers = '[cgimosx]';
                    redo;
                }
                else {    # not a pattern; check for a /= token

                    if ( $$rtokens[ $i + 1 ] eq '=' ) {    # form token /=
                        $i++;
                        $tok  = '/=';
                        $type = $tok;
                    }

                    #DEBUG - collecting info on what tokens follow a divide
                    # for development of guessing algorithm
                    #if ( numerator_expected( $i, $rtokens ) < 0 ) {
                    #	#write_diagnostics( "DIVIDE? $input_line\n" );
                    #}
                }
            }
            elsif ( $tok eq '#' ) {    # a comment
                $type = '#';
                my $pos = $$rtoken_map[$i];
                last;          # we're done with this line
            }
            elsif ( $tok eq '{' ) {

                # if we just saw a ')', we will label this block with
                # its type.  We need to do this to allow sub
                # code_block_type to determine if this brace starts a
                # code block or anonymous hash.  (The type of a paren
                # pair is the preceding token, such as 'if', 'else',
                # etc).
                if ( $last_nonblank_token eq ')' ) {
                    $last_nonblank_token = $paren_type[ $paren_depth + 1 ];

                    # defensive move in case of a nesting error (pbug.t)
                    # in which this ')' had no previous '('
                    # this nesting error will have been caught
                    if ( !defined($last_nonblank_token) ) {
                        $last_nonblank_token = 'if';
                    }

                    # check for syntax error here;
                    # expecting: (if|elsif|while|until|for|foreach) 
                    # Delete this if it is too redundant
                    #unless ( $is_keyword{$last_nonblank_token} ) {
                    unless ( $last_nonblank_token =~
                      /^(if|elsif|unless|while|until|for|foreach)$/ )
                    {
                        warning(
"syntax error at ') {', didn't see (if|elsif|unless|while|until|for|foreach)\n"
                        );
                    }
                }

                # now identify which of the three possible types of
                # curly braces we have: hash index container, anonymous
                # hash reference, or code block.

                # non-structural (hash index) curly brace pair 
                # get marked 'L' and 'R'
                if ( is_non_structural_brace() ) {
                    $type = 'L';
                }

                # code and anonymous hash have the same type, '{', but are 
                # distinguished by 'block_type', 
                # which will be blank for an anonymous hash
                else {
                    $block_type = code_block_type();
                }
                $brace_type[ ++$brace_depth ] = $block_type;
                $brace_package[$paren_depth] = $current_package;
                increase_nesting_depth( BRACE, $i_tok );
                $brace_structural_type[$brace_depth] = $type;
            }
            elsif ( $tok eq '}' ) {
                $block_type = $brace_type[$brace_depth];

                if ( defined( $brace_package[$paren_depth] ) ) {
                    $current_package = $brace_package[$paren_depth];
                }

                # can happen on brace error (caught elsewhere)
                else {
                }
                decrease_nesting_depth( BRACE, $i_tok );

                if ( $brace_structural_type[$brace_depth] eq 'L' ) {
                    $type = 'R';
                }

                # propagate type information for 'do' and 'eval' blocks.  
                # This is necessary to enable us to know if an operator 
                # or term is expected next
                if ( $brace_type[$brace_depth] =~ /$block_operator/ ) {
                    $tok = $brace_type[$brace_depth];
                }

                if ( $brace_depth > 0 ) { $brace_depth--; }
            }
            elsif ( $tok eq '&' ) {    # maybe sub call? start looking

                # We have to check for sub call unless we are sure we
                # are expecting an operator.  This example from s2p
                # got mistaken as a q operator in an early version:
                #   print BODY &q(<<'EOT');
                if ( $expecting != OPERATOR ) {
                    ( $i, $tok, $type, $id_scan_state, $identifier ) =
                      scan_identifier( $i, $id_scan_state, $identifier,
                      $rtokens );
                }
                else {
                }
            }
            elsif ( $tok eq '<' ) {    # angle operator or less than?

                if ( $expecting != OPERATOR ) {
                    my $ibeg = $i;
                    ( $i, $type ) =
                      find_angle_operator_termination( $input_line, $i,
                      $rtoken_map, $expecting );

                }
                else {
                }
            }
            elsif ( $tok eq '?' ) {    # ?: conditional or starting pattern?

                my $is_pattern;

                if ( $expecting == UNKNOWN ) {

                    my $msg;
                    ( $is_pattern, $msg ) =
                      guess_if_pattern_or_conditional( $i, $rtokens,
                      $rtoken_map );

                    if ($msg) { write_logfile_entry($msg) }
                }
                else { $is_pattern = ( $expecting == TERM ) }

                if ($is_pattern) {
                    $in_quote = 1;
                    $type     = 'Q';
                    $allowed_quote_modifiers = '[cgimosx]';    # TBD:check this
                    redo;
                }
            }
            elsif ( $tok eq '*' ) {    # typeglob, or multiply?

                if ( $expecting == TERM ) {
                    ( $i, $tok, $type, $id_scan_state, $identifier ) =
                      scan_identifier( $i, $id_scan_state, $identifier,
                      $rtokens );
                }
                else {

                    if ( $$rtokens[ $i + 1 ] eq '=' ) {
                        $tok  = '*=';
                        $type = $tok;
                        $i++;
                    }
                    elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
                        $tok  = '**';
                        $type = $tok;
                        $i++;
                    }
                }
            }
            elsif ( $tok eq '.' ) {    # what kind of . ?

                if ( $expecting != OPERATOR ) {
                    ( $i, $type, my $number ) =
                      scan_number( $input_line, $i, $rtoken_map, $type );
                }
                else {
                }
            }
            elsif ( $tok eq ':' ) {
            }
            elsif ( $tok eq '+' ) {    # what kind of plus?

                if ( $expecting == TERM ) {
                    ( $i, $type, my $number ) =
                      scan_number( $input_line, $i, $rtoken_map, $type );

                    # unary plus is safest assumption if not a number
                    if ( !defined($number) ) { $type = 'p'; }
                }
                elsif ( $expecting == OPERATOR ) {
                }
                else {
                    if ( $next_type eq 'w' ) { $type = 'p' }
                }
            }
            elsif ( $tok eq '@' ) {

                if ( $expecting == OPERATOR ) {
                    unexpected( "Array", "operator", $i_tok, $last_nonblank_i );
                }
                ( $i, $tok, $type, $id_scan_state, $identifier ) =
                  scan_identifier( $i, $id_scan_state, $identifier, $rtokens );
            }
            elsif ( $tok eq '%' ) {    # hash or modulo?

                # first guess is hash if no following blank
                if ( $expecting == UNKNOWN ) {
                    if ( $next_type ne 'b' ) { $expecting = TERM }
                }
                if ( $expecting == TERM ) {
                    ( $i, $tok, $type, $id_scan_state, $identifier ) =
                      scan_identifier( $i, $id_scan_state, $identifier,
                      $rtokens );
                }
            }
            elsif ( $tok eq '[' ) {
                $square_bracket_type[ ++$square_bracket_depth ] =
                  $last_nonblank_token;
                increase_nesting_depth( SQUARE_BRACKET, $i_tok );

                # It may seem odd, but structural square brackets have
                # type '{' and '}'.  This simplifies the indentation logic.
                if ( !is_non_structural_brace() ) {
                    $type = '{';
                }
                $square_bracket_structural_type[$square_bracket_depth] = $type;
            }
            elsif ( $tok eq ']' ) {
                decrease_nesting_depth( SQUARE_BRACKET, $i_tok );

                if ( $square_bracket_structural_type[$square_bracket_depth] eq
                  '{' )
                {
                    $type = '}';
                }
                if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
            }
            elsif ( $tok eq '-' ) {    # what kind of minus?

                if ( ( $expecting != OPERATOR )
                  && $is_file_test_operator{$next_tok} )
                {
                    $i++;
                    $tok .= $next_tok;
                    $type = 'F';
                }
                elsif ( $expecting == TERM ) {

                    ( $i, $type, my $number ) =
                      scan_number( $input_line, $i, $rtoken_map, $type );

                    # maybe part of bareword token? unary is safest
                    if ( !defined($number) ) { $type = 'm'; }

                }
                elsif ( $expecting == OPERATOR ) {
                }
                else {

                    if ( $next_type eq 'w' ) {
                        $type = 'm';
                    }
                }
            }

            elsif ( $tok eq '^' ) {

                # check for special variables like ${^WARNING_BITS}
                if ( $expecting == TERM ) {

                    # FIXME: this should work but will not catch errors
                    # because we also have to be sure that previous token is
                    # a type character ($,@,%).  
                    if ( $last_nonblank_token eq '{'
                      && ( $next_tok =~ /^[A-Za-z_]/ ) )
                    {

                        if ( $next_tok eq 'W' ) {
                            $tokenizer_self->{_saw_perl_dash_w} = 1;
                        }
                        $tok  = $tok . $next_tok;
                        $i    = $i + 1;
                        $type = 'w';
                    }
                    else {
                        unexpected( "carat", "term", $i_tok, $last_nonblank_i );
                    }
                }
            }
        }

        ###############################################################
        # section 4: digraphs
        ###############################################################
        elsif ( length($tok) == 2 ) {

            if ( $tok eq '::' ) {    # probably a sub call

                ( $i, $tok, $type ) =
                  scan_bare_identifier( $input_line, $i, $tok, $type,
                  $rtoken_map );
            }
            elsif ( $tok eq '<<' ) {    # maybe a here-doc?
                next
                  unless ( $i < $max_token_index )
                  ;    # here doc not possible if end of line

                if ( $expecting != OPERATOR ) {
                    my $found_target;
                    ( $found_target, $here_doc_target, $i ) =
                      find_here_doc( $expecting, $i, $rtokens, $rtoken_map );

                    if ($found_target) {
                        push ( @here_target_list, $here_doc_target );
                        $type = 'h';
                    }
                    elsif ( $expecting == TERM ) {

                        # shouldn't happen..
                        warning("Program bug; didn't find here doc target\n");
                    }
                }
                else {
                }
            }
            elsif ( $tok eq '->' ) {

                # if -> points to a bare word, we must scan for an identifier,
                # otherwise something like ->y would look like the y operator
                ( $i, $tok, $type, $id_scan_state, $identifier ) =
                  scan_identifier( $i, $id_scan_state, $identifier, $rtokens );
            }

            # type = 'pp' for pre-increment, '++' for post-increment
            elsif ( $tok eq '++' ) {

                if ( $expecting == TERM ) { $type = 'pp' }
            }

            # type = 'mm' for pre-decrement, '--' for post-decrement
            elsif ( $tok eq '--' ) {

                if ( $expecting == TERM ) { $type = 'mm' }
            }
            next;
        }

        ###############################################################
        # section 5: trigraphs
        ###############################################################
        elsif ( length($tok) == 3 ) {
        }
    }    # end of main loop to process all preliminary tokens

    if ( $i_tok >= 0 ) {
        $output_token_type[$i_tok] = $type;
        $output_block_type[$i_tok] = $block_type;
    }

    unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
        $last_nonblank_token = $tok;
        $last_nonblank_type  = $type;
    }

    # reset indentation level if necessary at a sub or package
    # in an attempt to recover from a nesting error
    if ( $level_in_tokenizer < 0 ) {
        if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
            reset_indentation_level(0);
            brace_warning("resetting level to 0 at $1 $2\n");
        }
    }

    # all done tokenizing this line ... 
    # now prepare the final list of tokens and types

    my @token_type = ();    # stack of output token types
    my @block_type = ();    # stack of output code block types
    my @tokens     = ();    # output tokens
    my @levels     = ();    # structural brace levels of output tokens
    my @slevels    = ();    # secondary nesting levels of output tokens
    my $im         = -1;    # previous $i value
    my $num;

    foreach $i (@output_token_list) {    # scan the list of pre-tokens indexes

        # self-checking for valid token types
        my $type = $output_token_type[$i];

        # This can happen by running perltidy on non-scripts
        # although it could also be bug introduced by programming change.
        # Perl silently accepts a 032 (^Z) and takes it as the end
        if ( !$is_valid_token_type{$type} ) {
            my $val = ord($type);
            warning("unexpected character decimal $val ($type) in script\n");
            $tokenizer_self->{_in_error} = 1;

        }
        push ( @token_type, $output_token_type[$i] );

        push ( @block_type, $output_block_type[$i] );

        # set primary indentation levels based on structural braces
        # Note: these are set so that the leading braces have a HIGHER
        # level than their CONTENTS, which is convenient for indentation
        if ( $type eq '{' ) {
            push ( @levels, $level_in_tokenizer++ );
        }
        elsif ( $type eq '}' ) {
            push ( @levels, --$level_in_tokenizer );
        }
        else {
            push ( @levels, $level_in_tokenizer );
        }

        if ( $level_in_tokenizer < 0 ) {
            unless ($saw_negative_indentation) {
                $saw_negative_indentation = 1;
                warning("Starting negative indentation\n");
            }
        }

        # set secondary nesting levels based on all continment token types
        # Note: these are set so that the nesting depth is the depth
        # of the PREVIOUS TOKEN, which is convenient for setting
        # the stength of token bonds
        if ( $type =~ /^[L\{\(\[]$/ ) {
            push ( @slevels, $slevel_in_tokenizer++ );
        }
        elsif ( $type =~ /^[R\}\)\]]$/ ) {
            push ( @slevels, $slevel_in_tokenizer-- );
        }
        else {
            push ( @slevels, $slevel_in_tokenizer );
        }

        # now form the previous token
        if ( $im >= 0 ) {
            $num = $$rtoken_map[$i] - $$rtoken_map[$im];   # how many characters

            if ( $num > 0 ) {
                push ( @tokens,
                  substr( $input_line, $$rtoken_map[$im], $num ) );
            }
        }
        $im = $i;
    }
    $num = length($input_line) - $$rtoken_map[$im];    # make the last token

    if ( $num > 0 ) {
        push ( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
    }

    $tokenizer_self->{_in_quote} = $in_quote;
    $tokenizer_self->{_rhere_target_list} = \@here_target_list;

    $line_of_tokens->{_rtoken_type} = \@token_type;
    $line_of_tokens->{_rtokens}     = \@tokens;
    $line_of_tokens->{_rblock_type} = \@block_type;
    $line_of_tokens->{_rlevels}     = \@levels;
    $line_of_tokens->{_rslevels}    = \@slevels;

    return;
}

sub is_label {

    # Decide if a bare word followed by a colon is a label

    # if it follows an opening or closing code block curly brace..
    if ( $last_nonblank_token =~ /^[\{\}]$/
      && $last_nonblank_type eq $last_nonblank_token )
    {

        # it is a label if and only if the curly encloses a code block
        return $brace_type[$brace_depth];
    }

    # otherwise, it is a label if and only if it follows a ';' 
    else {
        return ( $last_nonblank_token eq ';' );
    }
}

sub code_block_type {

    # Decide if this is a block of code, and its type.
    # Must be called only when $type = $token = '{'
    # The problem is to distinguish between the start of a block of code
    # and the start of an anonymous hash reference
    # Returns "" if not code block, otherwise returns 'last_nonblank_token'
    # to indicate the type of code block.  (For example, 'last_nonblank_token'
    # might be 'if' for an if block, 'else' for an else block, etc).

    # handle case of multiple '{'s
    if ( $last_nonblank_token =~ /^[\{]$/
      && $last_nonblank_type eq $last_nonblank_token )
    {

        # if we see a string of {'s, the status of the first (code or
        # anonymous hash ref) propagates down 
        if ( $brace_type[$brace_depth] ) {
            return $last_nonblank_token;
        }
        else {
            return "";
        }
    }

    # handle case of '}{'
    elsif ( $last_nonblank_token =~ /^[\}]$/
      && $last_nonblank_type eq $last_nonblank_token )
    {

        # a } { situation is code because we can't have
        # adjacent hash references
        return $last_nonblank_token;
    }

    # brace after label:
    elsif ( $last_nonblank_type eq 'J' ) {
        return $last_nonblank_token;
    }

    # otherwise, look at previous token.  This must be a code block if
    # it follows any of these:
    elsif ( $last_nonblank_token =~
/^(BEGIN|END|CHECK|INIT|AUTOLOAD|continue|;|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
      )
    {
        return $last_nonblank_token;
    }

    # or a sub definition
    elsif ( $last_nonblank_type =~ /^[ti]$/
      && $last_nonblank_token =~ /^sub\b/ )
    {
        return $last_nonblank_token;
    }

    # user-defined subs with block parameters (like grep/map/eval)
    elsif ( $last_nonblank_type eq 'G' ) {
        return $last_nonblank_token;
    }

    # anything else must be anonymous hash reference
    else {
        return "";
    }
}

sub unexpected {

    # report unexpected token type and show where it is
    my ( $found, $expecting, $i_tok, $last_nonblank_i ) = @_;
    $unexpected_error_count++;
    if ( $unexpected_error_count <= MAX_NAG_MESSAGES ) {
        my $msg = "$found found where $expecting expected";
        my $pos = $$rpretoken_map[$i_tok];
        interrupt_logfile();
        my ( $offset, $numbered_line, $underline ) =
          make_numbered_line( $input_line_number, $input_line, $pos );
        $underline = write_on_underline( $underline, $pos - $offset, '^' );

        if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
            my $pos_prev = $$rpretoken_map[$last_nonblank_i];
            my $num;
            if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
                $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
            }
            else {
                $num = $pos - $pos_prev;
            }
            $underline =
              write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
        }
        warning( $numbered_line . "\n" );
        $underline =~ s/\s*$//;
        warning( $underline . "\n" );
        warning( $msg . " (previous token underlined)\n" );
        resume_logfile();
    }
}

sub indicate_error {
    my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
    interrupt_logfile();
    warning($msg);
    write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
    resume_logfile();
}

sub write_error_indicator_pair {
    my ( $line_number, $input_line, $pos, $carrat ) = @_;
    my ( $offset, $numbered_line, $underline ) =
      make_numbered_line( $line_number, $input_line, $pos );
    $underline = write_on_underline( $underline, $pos - $offset, $carrat );
    warning( $numbered_line . "\n" );
    $underline =~ s/\s*$//;
    warning( $underline . "\n" );
}

sub make_numbered_line {

=pod
 Given an input line, its line number, and a character position of interest, 
 create a string not longer than 80 characters of the form
    $lineno: sub_string
 such that the sub_string of $str contains the positon of interest

 input parameters are:
  $lineno = line number
  $str = the text of the line
  $pos = position of interest (the error) : 0 = first character
=cut
    my ( $lineno, $str, $pos ) = @_;
    my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
    my $excess = length($str) - $offset - 68;
    my $numc   = ( $excess > 0 ) ? 68 : undef;

    if ( defined($numc) ) {
        if ( $offset == 0 ) {
            $str = substr( $str, $offset, $numc - 4 ) . " ...";
        }
        else {
            $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
        }
    }
    else {

        if ( $offset == 0 ) {
        }
        else {
            $str = "... " . substr( $str, $offset + 4 );
        }
    }

    my $numbered_line = sprintf( "%d: ", $lineno );
    $offset -= length($numbered_line);
    $numbered_line .= $str;
    my $underline = " " x length($numbered_line);
    return ( $offset, $numbered_line, $underline );
}

sub write_on_underline {

    my ( $underline, $pos, $pos_chr ) = @_;
    return unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) );
    my $excess = length($pos_chr) + $pos - length($underline);
    if ( $excess > 0 ) {
        $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
    }
    substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
    return ($underline);
}

sub is_non_structural_brace {

    # Decide if a brace or bracket is structural or non-structural
    # by looking at the previous token and type

    # it is non-structural if it is decorated by type information
    # For example, the '{' here is non-structural:   ${xxx}
    ( $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/

      # or if we follow a hash or array closing curly brace or bracket
      # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
      # because the first '}' would have been given type 'R'
      || $last_nonblank_type =~ /^([R\]])$/ );
}

sub operator_expected {

=pod
Many perl symbols have two or more meanings.  For example, '<<'
can be a shift operator or a here-doc operator.  The
interpretation of these symbols depends on the current state of
the tokenizer, which may either be expecting a term or an
operator.  For this example, a << would be a shift if an operator
is expected, and a here-doc if a term is expected.  This routine
is called to make this decision for any current token.  It returns
one of three possible values:
	
 	OPERATOR - operator expected (or at least, not a term)
    UNKNOWN  - can't tell  
    TERM     - a term is expected (or at least, not an operator)

The decision is based on what has been seen so far.  This information
is stored in the "$last_nonblank_type" and "$last_nonblank_token" variables.
For example, if the $last_nonblank_type is '=~', then we are expecting
a TERM, whereas if $last_nonblank_type is 'n' (numeric), we are
expecting an OPERATOR. 
	
If a UNKNOWN is returned, the calling routine must guess. A major goal
of this tokenizer is to minimize the possiblity of returning
UNKNOWN, because a wrong guess can spoil the formatting of a script.

adding NEW_TOKENS: it is critically important that this routine be updated
to allow it to determine if an operator or term is to be expected
after the new token.  Doing this simply involves adding the new token
character to one of the regexes in this routine or to one of the hash lists
that it uses, which are initialized in the BEGIN section.

=cut
    my ( $prev_type, $tok, $next_type ) = @_;
    my $op_expected = UNKNOWN;

    # A possible filehandle (or object) requires some care...
    if ( $last_nonblank_type eq 'Z' ) {

        # For possible file handle like "$a", Perl uses weird parsing rules.
        # For example:
        # print $a/2,"/hi";   - division
        # print $a / 2,"/hi"; - division
        # print $a/ 2,"/hi";  - division
        # print $a /2,"/hi";  - pattern (and error)!
        if ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
            $op_expected = TERM;
        }

        # Note when an operation is being done where a
        # filehandle might be expected, since a change in whitespace
        # could change the interpretation of the statement.
        else {
            if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
                write_logfile_entry(
                  "operator in print statement not recommended\n");
                $op_expected = OPERATOR;
            }
        }
    }

    # handle something after 'do' and 'eval'
    elsif ( $last_nonblank_token =~ /$block_operator/ ) {

        # something like $a = eval "expression";
        #                          ^             
        if ( $last_nonblank_type eq 'k' ) {
            $op_expected = TERM;    # expression or list mode following keyword
        }

        # something like $a = do { BLOCK } / 2;
        #                                  ^             
        else {
            $op_expected = OPERATOR;    # block mode following }
        }
    }

    # handle bare word..
    elsif ( $last_nonblank_type =~ /^[w]$/ ) {

        # unfortunately, we can't tell what type of token to expect next
        # after most bare words
        $op_expected = UNKNOWN;
    }

    # operator, but not term possible after these types
    elsif ( ( $last_nonblank_type =~ /^[\)\]RnviQh]$/ )
      || ( $last_nonblank_token =~ /^(\$|\-\>)/ ) )
    {
        $op_expected = OPERATOR;
    }

    # no operator after many keywords, such as "die", "warn", etc
    elsif ( $expecting_term_token{$last_nonblank_token} ) {
        $op_expected = TERM;
    }

    # no operator after things like + - **  (i.e., other operators)
    elsif ( $expecting_term_types{$last_nonblank_type} ) {
        $op_expected = TERM;
    }

    # a few operators, like "time", have an empty prototype () and so
    # take no parameters but produce a value to operate on
    elsif ( $expecting_operator_token{$last_nonblank_token} ) {
        $op_expected = OPERATOR;
    }

    # post-increment and decrement produce values to be operated on
    elsif ( $expecting_operator_types{$last_nonblank_type} ) {
        $op_expected = OPERATOR;
    }

    # no value to operate on after sub block
    elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }

    # a right brace here indicates the end of a simple block.
    # all non-structural right braces have type 'R'
    # all braces associated with block operator keywords have been given those
    # keywords as "last_nonblank_token" and caught above.
    # (This statement is order dependent, and must come after checking
    # $last_nonblank_token).
    elsif ( $last_nonblank_type =~ /^\}$/ ) {
        $op_expected = TERM;
    }

    # something else..what did I forget?
    else {

        # collecting diagnostics on unknown operator types..see what was missed
        $op_expected = UNKNOWN;
        write_diagnostics(
"OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
        );
    }

    if ( $TOKENIZER_DEBUG_FLAG{EXPECT} ) {
        print
"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
    }
    return $op_expected;
}

=pod

The following routines keep track of nesting depths of the three brace
types, ( [ and {.  This is necessary for determining the indentation
level, and also for debugging programs.  Not only do they keep track of
nesting depths of the individual brace types, but they check that each
of the other brace types is balanced within matching pairs.  For
example, if the program sees this sequence:

		{  ( ( ) }

then it can determine that there is an extra left paren somewhere
between the { and the }.  And so on with every other possible
combination of outer and inner brace types.  For another
example:

		( [ ..... ]  ] )

which has an extra ] within the parens.  

The brace types have indexes 0,1,2 , which are indexes into
the matrices.

The matrix 

		$depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];

saves the nesting depth of brace type $b (where $b is either of the other
two brace types) when brace type $a enters a new depth.  When this depth
decreases, a check is made that the current depth of brace types $b is
unchanged, or otherwise there must have been an error.  This can
be very useful for localizing errors, particularly when perl runs to
the end of a large file (such as this one) and announces that there
is a problem somewhere.

=cut
sub increase_nesting_depth {
    my ( $a, $i_tok ) = @_;
    my $b;
    $current_depth[$a]++;
    my $pos = $$rpretoken_map[$i_tok];
    $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
      [ $input_line_number, $input_line, $pos ];

    for $b ( 0 .. 2 ) {
        next if ( $b == $a );
        $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
    }
}

sub decrease_nesting_depth {

    my ( $a, $i_tok ) = @_;
    my $pos = $$rpretoken_map[$i_tok];
    my $b;

    if ( $current_depth[$a] > 0 ) {

        # check that any brace types $b contained within are balanced
        for $b ( 0 .. 2 ) {
            next if ( $b == $a );

            unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
              $current_depth[$b] )
            {
                my $diff = $current_depth[$b] -
                  $depth_array[$a][$b][ $current_depth[$a] ];

                # don't whine too many times
                my $saw_brace_error = get_saw_brace_error();
                if ( $saw_brace_error <= MAX_NAG_MESSAGES

                  # if too many closing types have occured, we probably
                  # already caught this error
                  && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) )
                {
                    interrupt_logfile();
                    my $rsl =
                      $starting_line_of_current_depth[$a][ $current_depth[$a] ];
                    my $sl  = $$rsl[0];
                    my $rel = [ $input_line_number, $input_line, $pos ];
                    my $el  = $$rel[0];
                    my ($ess);

                    if ( $diff == 1 || $diff == -1 ) {
                        $ess = '';
                    }
                    else {
                        $ess = 's';
                    }
                    my $bname =
                      ( $diff > 0 ) ? $opening_brace_names[$b] :
                      $closing_brace_names[$b];
                    write_error_indicator_pair( @$rsl, '^' );
                    my $msg = <<"EOM";
Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
EOM

                    if ( $diff > 0 ) {
                        my $rml =
                          $starting_line_of_current_depth[$b]
                          [ $current_depth[$b] ];
                        my $ml = $$rml[0];
                        $msg .=
"    The most recent un-matched $bname is on line $ml\n";
                        write_error_indicator_pair( @$rml, '^' );
                    }
                    write_error_indicator_pair( @$rel, '^' );
                    warning($msg);
                    resume_logfile();
                }
                increment_brace_error();
            }
        }
        $current_depth[$a]--;
    }
    else {

        my $saw_brace_error = get_saw_brace_error();
        if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
            my $msg = <<"EOM";
There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
EOM
            indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
        }
        increment_brace_error();
    }
}

sub check_final_nesting_depths {
    my ($a);

    for $a ( 0 .. 2 ) {

        if ( $current_depth[$a] ) {
            my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
            my $sl  = $$rsl[0];
            my $msg = <<"EOM";
Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
The most recent un-matched $opening_brace_names[$a] is on line $sl
EOM
            indicate_error( $msg, @$rsl, '^' );
            increment_brace_error();
        }
    }
}

sub numerator_expected {

    # this is a filter for a possible numerator, in support of guessing
    # for the / pattern delimiter token.
    # returns -
    # 	1 - yes
    # 	0 - can't tell
    #  -1 - no
    # Note: I am using the convention that variables ending in
    # _expected have these 3 possible values.
    my ( $i, $rtokens ) = @_;
    my $next_token = $$rtokens[ $i + 1 ];
    if ( $next_token eq '=' ) { $i++; }    # handle /=
    my ( $next_nonblank_token, $i_next ) =
      find_next_nonblank_token( $i, $rtokens );

    if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
        1;
    }
    else {

        if ( $next_nonblank_token =~ /^\s*$/ ) {
            0;
        }
        else {
            -1;
        }
    }
}

sub pattern_expected {

    # This is the start of a filter for a possible pattern.
    # It looks at the token after a possbible pattern and tries to
    # determine if that token could end a pattern.
    # returns -
    # 	1 - yes
    # 	0 - can't tell
    #  -1 - no
    my ( $i, $rtokens ) = @_;
    my $next_token = $$rtokens[ $i + 1 ];
    if ( $next_token =~ /^[cgimosx]/ ) { $i++; }    # skip possible modifier
    my ( $next_nonblank_token, $i_next ) =
      find_next_nonblank_token( $i, $rtokens );

    # list of tokens which may follow a pattern
    # (can probably be expanded)
    if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
    {
        1;
    }
    else {

        if ( $next_nonblank_token =~ /^\s*$/ ) {
            0;
        }
        else {
            -1;
        }
    }
}

sub find_next_nonblank_token_on_this_line {
    my ( $i, $rtokens ) = @_;
    my $next_nonblank_token;

    if ( $i < $max_token_index ) {
        $next_nonblank_token = $$rtokens[ ++$i ];

        if ( $next_nonblank_token =~ /^\s*$/ ) {

            if ( $i < $max_token_index ) {
                $next_nonblank_token = $$rtokens[ ++$i ];
            }
        }
    }
    else {
        $next_nonblank_token = "";
    }
    return ( $next_nonblank_token, $i );
}

sub find_next_nonblank_token {
    my ( $i, $rtokens ) = @_;

    if ( $i >= $max_token_index ) {

        if ( !$peeked_ahead ) {
            $peeked_ahead = 1;
            $rtokens      = peek_ahead_for_nonblank_token($rtokens);
        }
    }
    my $next_nonblank_token = $$rtokens[ ++$i ];

    if ( $next_nonblank_token =~ /^\s*$/ ) {
        $next_nonblank_token = $$rtokens[ ++$i ];
    }
    return ( $next_nonblank_token, $i );
}

# look ahead for next non-blank, non-comment line of code
sub peek_ahead_for_nonblank_token {
    my ($rtokens) = @_;
    my $line;
    my $i = 0;

    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
    {
        $line =~ s/^\s*//;    # trim leading blanks
        next if ( length($line) <= 0 );    # skip blank
        next if ( $line =~ /^#/ );    # skip comment
        my ( $rtok, $rmap, $rtype ) =
          pre_tokenize( $line, 2 );    # only need 2 pre-tokens
        my $j = $max_token_index + 1;
        my $tok;

        #write_logfile_entry("peeking at line:$line")
        #  if $rOpts->{'DEBUG'};

        foreach $tok (@$rtok) {
            last if ( $tok =~ "\n" );
            $$rtokens[ ++$j ] = $tok;
        }
        last;
    }
    return $rtokens;
}

sub pre_tokenize {

    # Break a string, $str, into a sequence of preliminary tokens.  We
    # are only interested in these types of tokens: identifier strings,
    # digits, spaces, and other characters.  We cannot do better than
    # this yet because we might be in a quoted string or pattern.
    # Caller sets $max_tokens_wanted to 0 to get all tokens.
    my ( $str, $max_tokens_wanted ) = @_;
    my @tokens    = ();
    my @token_map = ();
    my @type      = ();
    my $i         = 0;
    $token_map[0] = 0;

    while (1) {

        # whitespace
        if ( $str =~ /\G(\s+)/gc ) { $type[$i] = 'b'; }

        # numbers
        # note that this must come before identifiers
        elsif ( $str =~ /\G(\d+)/gc ) { $type[$i] = 'd'; }

        # identifiers
        elsif ( $str =~ /\G(\w+)/gc ) { $type[$i] = 'w'; }

        # punctuation
        elsif ( $str =~ /\G(\W)/gc ) { $type[$i] = $1; }

        else { last; }

        $tokens[$i] = $1;
        $token_map[ ++$i ] = pos($str);
        last if ( $i == $max_tokens_wanted );
    }
    return ( \@tokens, \@token_map, \@type );
}

sub show_tokens {

    # this is an old debug routine
    my ( $rtokens, $rtoken_map ) = @_;
    my $num = scalar(@$rtokens);
    my $i;

    for ( $i = 0 ; $i < $num ; $i++ ) {
        my $len = length( $$rtokens[$i] );
        print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
    }
}

sub find_angle_operator_termination {

    # We are looking at a '<' and want to know if it is an angle operator.
    # We are to return:
    #	$i = pretoken index of ending '>' if found, current $i otherwise
    #	$type = 'Q' if found, '>' otherwise
    my ( $input_line, $i_beg, $rtoken_map, $expecting ) = @_;
    my $i    = $i_beg;
    my $type = '<';
    pos($input_line) = 1 + $$rtoken_map[$i];

    my $filter;

    # we just have to find the next '>' if a term is expected
    if ( $expecting == TERM ) { $filter = '[\>]' }

    # we have to guess if we don't know what is expected
    elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }

    # shouldn't happen - we shouldn't be here if operator is expected
    else { warning("Program Bug in find_angle_operator_termination\n") }

    # To illustrate what we might be looking at, in case we are
    # guessing, here are some examples of valid angle operators
    # (or file globs):
    #  <tmp_imp/*>
    #  <FH>
    #  <$fh>
    #  <*.c *.h>
    #  <_>
    #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
    #  <${PREFIX}*img*.$IMAGE_TYPE>
    #  <img*.$IMAGE_TYPE>
    #  <Timg*.$IMAGE_TYPE>
    #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
    #
    # Here are some examples of lines which do not have angle operators:
    #  return undef unless $self->[2]++ < $#{$self->[1]};
    #  < 2  || @$t >
    #
    # the following line from dlister.pl caused trouble:
    #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
    #
    # If the '<' starts an angle operator, it must end on this line and
    # it must not have certain characters like ';' and '=' in it.  I use
    # this to limit the testing.  This filter should be improved if
    # possible.

    if ( $input_line =~ /($filter)/g ) {

        if ( $1 eq '>' ) {

            # We MAY have found an angle operator termination if we get
            # here, but we need to do more to be sure we haven't been
            # fooled.
            my $pos = pos($input_line);

            ######################################debug#####
            my $pos_beg = $$rtoken_map[$i];
            my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );

            #write_diagnostics( "ANGLE? :$str\n");
            #print "ANGLE: found $1 at pos=$pos\n";
            ######################################debug#####
            $type = 'Q';
            my $error;
            ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );

            # It may be possible that a quote ends midway in a pretoken.
            # If this happens, it may be necessary to split the pretoken.
            if ($error) {
                warning( "Possible tokinization error..please check this line\n"
                );
                report_possible_bug();
            }

            # Now let's see where we stand....
            # OK if math op not possible
            if ( $expecting == TERM ) {
            }

            # OK if there are no more than 2 pre-tokens inside
            # (not possible to write 2 token math between < and >)
            # This catches most common cases
            elsif ( $i <= $i_beg + 3 ) {
                write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
            }

            # Not sure..
            else {

                # Let's try a Brace Test: any braces inside must balance
                my $br = 0;
                while ( $str =~ /\{/g ) { $br++ }
                while ( $str =~ /\}/g ) { $br-- }
                my $sb = 0;
                while ( $str =~ /\[/g ) { $sb++ }
                while ( $str =~ /\]/g ) { $sb-- }
                my $pr = 0;
                while ( $str =~ /\(/g ) { $pr++ }
                while ( $str =~ /\)/g ) { $pr-- }

                # if braces do not balance - not angle operator
                if ( $br || $sb || $pr ) {
                    $i    = $i_beg;
                    $type = '<';
                    write_diagnostics("NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"
                    );
                }

                # we should keep doing more checks here...to be continued
                # Tentatively accepting this as a valid angle operator.
                # There are lots more things that can be checked.
                else {
                    write_diagnostics(
                      "ANGLE-Guessing yes: $str expecting=$expecting\n");
                    write_logfile_entry("Guessing angle operator here: $str\n");
                }
            }
        }

        # didn't find ending >
        else {
            if ( $expecting == TERM ) {
                warning("No ending > for angle operator\n");
            }
        }
    }
    return ( $i, $type );
}

sub inverse_pretoken_map {

    # Starting with the current pre_token index $i, scan forward until
    # finding the index of the next pre_token whose position is $pos.
    my ( $i, $pos, $rtoken_map ) = @_;
    my $error = 0;

    while ( ++$i <= $max_token_index ) {

        if ( $pos <= $$rtoken_map[$i] ) {

            # Let the calling routine handle errors in which we do not
            # land on a pre-token boundary.  It can happen by running
            # perltidy on some non-perl scripts, for example.
            if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
            $i--;
            last;
        }
    }
    return ( $i, $error );
}

sub guess_if_pattern_or_conditional {

    # this routine is called when we have encountered a ? following an
    # unknown bareword, and we must decide if it starts a pattern or is a
    # division
    # input parameters:
    #   $i - token index of the ? starting possible pattern
    # output parameters:
    #   $is_pattern = 0 if probably division,  =1 if probably a pattern
    #   msg = a warning or diagnostic message
    my ( $i, $rtokens, $rtoken_map ) = @_;
    my $is_pattern = 0;
    my $msg        = "guessing that ? after $last_nonblank_token starts a ";

    if ( $i >= $max_token_index ) {
        $msg .= "conditional (no end to pattern found on the line)\n";
    }
    else {
        my $ibeg = $i;
        $i = $ibeg + 1;
        my $next_token = $$rtokens[$i];    # first token after ?

        # look for a possible ending ? on this line..
        my $in_quote        = 1;
        my $quote_depth     = 0;
        my $quote_character = '';
        my $quote_pos       = 0;
        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
          follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
          $quote_pos, $quote_depth );

        if ($in_quote) {

            # we didn't find an ending ? on this line, 
            # so we bias towards conditional
            $is_pattern = 0;
            $msg .= "conditional (no ending ? on this line)\n";

            # we found an ending ?, so we bias towards a pattern
        }
        else {

            if ( pattern_expected( $i, $rtokens ) >= 0 ) {
                $is_pattern = 1;
                $msg .= "pattern (found ending ? and pattern expected)\n";
            }
            else {
                $msg .= "pattern (uncertain, but found ending ?)\n";
            }
        }
    }
    return ( $is_pattern, $msg );
}

sub guess_if_pattern_or_division {

    # this routine is called when we have encountered a / following an
    # unknown bareword, and we must decide if it starts a pattern or is a
    # division
    # input parameters:
    #   $i - token index of the / starting possible pattern
    # output parameters:
    #   $is_pattern = 0 if probably division,  =1 if probably a pattern
    #   msg = a warning or diagnostic message
    my ( $i, $rtokens, $rtoken_map ) = @_;
    my $is_pattern = 0;
    my $msg        = "guessing that / after $last_nonblank_token starts a ";

    if ( $i >= $max_token_index ) {
        "division (no end to pattern found on the line)\n";
    }
    else {
        my $ibeg = $i;
        my $divide_expected = numerator_expected( $i, $rtokens );
        $i = $ibeg + 1;
        my $next_token = $$rtokens[$i];    # first token after slash

        # look for a possible ending / on this line..
        my $in_quote        = 1;
        my $quote_depth     = 0;
        my $quote_character = '';
        my $quote_pos       = 0;
        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
          follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
          $quote_pos, $quote_depth );

        if ($in_quote) {

            # we didn't find an ending / on this line, 
            # so we bias towards division
            if ( $divide_expected >= 0 ) {
                $is_pattern = 0;
                $msg .= "division (no ending / on this line)\n";
            }
            else {
                $msg        = "multi-line pattern (division not possible)\n";
                $is_pattern = 1;
            }

        }

        # we found an ending /, so we bias towards a pattern
        else {

            if ( pattern_expected( $i, $rtokens ) >= 0 ) {

                if ( $divide_expected >= 0 ) {

                    if ( $i - $ibeg > 60 ) {
                        $msg .= "division (matching / too distant)\n";
                        $is_pattern = 0;
                    }
                    else {
                        $msg .= "pattern (but division possible too)\n";
                        $is_pattern = 1;
                    }
                }
                else {
                    $is_pattern = 1;
                    $msg .= "pattern (division not possible)\n";
                }
            }
            else {

                if ( $divide_expected >= 0 ) {
                    $is_pattern = 0;
                    $msg .= "division (pattern not possible)\n";
                }
                else {
                    $is_pattern = 1;
                    $msg .=
                      "pattern (uncertain, but division would not work here)\n";
                }
            }
        }
    }
    return ( $is_pattern, $msg );
}

sub find_here_doc {

    # find the target of a here document, if any
    # input parameters:
    #   $i - token index of the second < of <<
    # 	($i must be less than the last token index if this is called)
    # output parameters:
    #   $found_target = 0 didn't find target; =1 found target
    #   HERE_TARGET - the target string (may be empty string)
    #   $i - unchanged if not here doc, 
    #    or index of the last token of the here target
    my ( $expecting, $i, $rtokens, $rtoken_map ) = @_;
    my $ibeg            = $i;
    my $found_target    = 0;
    my $here_doc_target = '';
    my ( $next_nonblank_token, $i_next_nonblank, $next_token );
    $next_token = $$rtokens[ $i + 1 ];

    # perl allows a backslash before the target string (heredoc.t)
    my $backslash = 0;
    if ( $next_token eq '\\' ) {
        $backslash  = 1;
        $next_token = $$rtokens[ $i + 2 ];
    }

    ( $next_nonblank_token, $i_next_nonblank ) =
      find_next_nonblank_token_on_this_line( $i, $rtokens );

    if ( $next_nonblank_token =~ /[\'\"\`]/ ) {

        my $in_quote        = 1;
        my $quote_depth     = 0;
        my $quote_character = '';
        my $quote_pos       = 0;

        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
          follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
          $quote_character, $quote_pos, $quote_depth );

        if ($in_quote) {    # didn't find end of quote, so no target found
            $i = $ibeg;
        }
        else {    # found ending quote
            my $j;
            $found_target = 1;

            for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
                $here_doc_target .= $$rtokens[$j];
            }
        }
    }

    elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
        $found_target = 1;
        write_logfile_entry(
          "found blank here-target after <<; suggest using \"\"\n");
        $i = $ibeg;
    }
    elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<

        my $here_doc_expected;
        if ( $expecting == UNKNOWN ) {
            $here_doc_expected = guess_if_here_doc($next_token);
        }
        else {
            $here_doc_expected = 1;
        }

        if ($here_doc_expected) {
            $found_target    = 1;
            $here_doc_target = $next_token;
            $i = $ibeg + 1;
        }

    }
    else {

        if ( $expecting == TERM ) {
            $found_target = 1;
            write_logfile_entry("Note: bare here-doc operator <<\n");
        }
        else {
            $i = $ibeg;
        }
    }

    # patch to neglect any prepended backslash
    if ( $found_target && $backslash ) { $i++ }

    return ( $found_target, $here_doc_target, $i );
}

# try to resolve here-doc vs. shift by looking ahead for
# non-code or the end token (currently only looks for end token)
# returns 1 if it is probably a here doc, 0 if not
sub guess_if_here_doc {

    # This is how many lines we will search for a target as part of the
    # guessing strategy.  It is a constant because there is probably
    # little reason to change it.
    use constant HERE_DOC_WINDOW => 40;

    my ($next_token) = @_;
    my $here_doc_expected = 0;
    my $line;
    my $k   = 0;
    my $msg = "checking <<";

    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
    {
        chomp $line;

        if ( $line =~ /^$next_token$/ ) {
            $msg .= " -- found target $next_token ahead $k lines\n";
            $here_doc_expected = 1;    # got it
            last;
        }
        last if ( $k >= HERE_DOC_WINDOW );
    }

    unless ($here_doc_expected) {

        if ( !defined($line) ) {
            $here_doc_expected = -1;    # hit eof without seeing target
            $msg .= " -- must be shift; target $next_token not in file\n";

        }
        else {    # still unsure..taking a wild guess

            if ( !$is_constant{$current_package}{$next_token} ) {
                $here_doc_expected = 1;
                $msg .=
                  " -- guessing it's a here-doc ($next_token not a constant)\n";
            }
            else {
                $msg .= " -- guessing it's a shift\n";
                $msg .=
                  " -- guessing it's a shift ($next_token is a constant)\n";
            }
        }
    }
    write_logfile_entry($msg);
    return $here_doc_expected;
}

sub do_quote {

    # follow (or continue following) quoted string or pattern
    # $in_quote return code:
    # 	0 - ok, found end
    # 	1 - still must find end of quote whose target is $quote_character
    # 	2 - still looking for end of first of two quotes
    my ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, $rtokens,
      $rtoken_map ) = @_;

    if ( $in_quote == 2 ) {    # two quotes/patterns to follow
        my $ibeg = $i;
        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
          follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
          $quote_pos, $quote_depth );

        if ( $in_quote == 1 ) {
            if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
            $quote_character = '';
        }
    }

    if ( $in_quote == 1 ) {    # one (more) quote to follow
        my $ibeg = $i;
        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
          follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
          $quote_pos, $quote_depth );
    }
    return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth );
}

sub scan_number {

=pod
  scan a number in any of the formats that Perl accepts
  Underbars (_) are allowed in decimal numbers.  
  input parameters -
  	$input_line	- the string to scan
	$i			- pre_token index to start scanning
	$rtoken_map	- reference to the pre_token map giving starting
					character position in $input_line of token $i
  output parameters -
	$i			- last pre_token index of the number just scanned
	number		- the number (characters); or undef if not a number
=cut
    my ( $input_line, $i, $rtoken_map, $input_type ) = @_;
    my $pos_beg = $$rtoken_map[$i];
    my $pos;
    my $i_begin = $i;
    my $number  = undef;
    my $type    = $input_type;

    my $first_char = substr( $input_line, $pos_beg, 1 );

    # Look for bad starting characters; Shouldn't happen..
    if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
        warning("Program bug - scan_number given character $first_char\n");
        report_definite_bug();
        return ( $i, $type, $number );
    }

    # handle v-string without leading 'v' character ('Two Dot' rule)
    # (vstring.t)
    pos($input_line) = $pos_beg;
    if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
        $pos = pos($input_line);
        my $numc = $pos - $pos_beg;
        $number = substr( $input_line, $pos_beg, $numc );
        $type = 'v';
        unless ($saw_v_string) { report_v_string($number) }
    }

    # handle octal, hex, binary
# FIXME: I think perl 5.6 may have added some extensions, like X, must check 
    if ( !defined($number) ) {
        pos($input_line) = $pos_beg;
        if ( $input_line =~ /\G0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01]+))/g ) {
            $pos = pos($input_line);
            my $numc = $pos - $pos_beg;
            $number = substr( $input_line, $pos_beg, $numc );
            $type = 'n';
        }
    }

    # handle decimal 
    if ( !defined($number) ) {
        pos($input_line) = $pos_beg;

        if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
            $pos = pos($input_line);

            # watch out for things like 0..40 which would give 0. by this;
            if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
              && ( substr( $input_line, $pos, 1 ) eq '.' ) )
            {
                $pos--;
            }
            my $numc = $pos - $pos_beg;
            $number = substr( $input_line, $pos_beg, $numc );
            $type = 'n';
        }
    }

    # filter out non-numbers like e + - . e2  .e3 +e6
    # the rule: at least one digit, and any 'e' must be preceded by a digit
    if ( $number !~ /\d+[e]?/ ) {
        $number = undef;
        $type = $input_type;
        return ( $i, $type, $number );
    }

    # Found a number; now we must convert back from character position
    # to pre_token index. An error here implies user syntax error.
    # An example would be an invalid octal number like '009'.
    my $error;
    ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
    if ($error) { warning("Possibly invalid number\n") }

    return ( $i, $type, $number );
}

sub scan_bare_identifier {

    # this routine is called to scan a token starting with an alphanumeric
    # variable or package separator, :: or '.

    my ( $input_line, $i, $tok, $type, $rtoken_map ) = @_;
    my $i_begin = $i;
    my $package = undef;

    my $i_beg = $i;

    # we have to back up one pretoken at a :: since each : is one pretoken
    if ( $tok eq '::' ) { $i_beg-- }
    my $pos_beg = $$rtoken_map[$i_beg];
    pos($input_line) = $pos_beg;

    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(\w+)/gc ) {

        my $pos = pos($input_line);
        my $numc = $pos - $pos_beg;
        $tok = substr( $input_line, $pos_beg, $numc );

        # type 'w' includes anything without leading type info
        # ($,%,@,*) including something like abc::def::ghi
        $type = 'w';

        if ( defined($1) ) {
            $package = $1;
            $package =~ s/\'/::/g;
            if ( $package =~ /^\:/ ) { $package = 'main' . $package }
            $package =~ s/::$//;
        }
        else {
            $package = $current_package;

            if ( $is_keyword{$tok} ) {
                $type = 'k';
            }
        }
        my $sub_name = $2;

        if ( $type ne 'k' ) {

            # check for v-string with leading 'v' type character
            # (This seems to have presidence over filehandle, type 'Y')
            if ( $tok =~ /^v\d+$/ ) {

                # we only have the first part - something like 'v101' - 
                # look for more
                if ( $input_line =~ m/\G(\.\d+)+/gc ) {
                    $pos  = pos($input_line);
                    $numc = $pos - $pos_beg;
                    $tok  = substr( $input_line, $pos_beg, $numc );
                }
                $type = 'v';

                # warn if this version can't handle v-strings
                unless ($saw_v_string) { report_v_string($tok) }
            }

            elsif ( $is_constant{$package}{$sub_name} ) {
                $type = 'C';
            }

            # Note: strangely, perl does not seem to really let you create
            # functions which act like eval and do, in the sense that eval
            # and do may have operators following the final }, but any operators
            # that you create with prototype (&) apparently do not allow
            # trailing operators, only terms.  This seems strange.
            # If this ever changes, here is the update
            # to make perltidy behave accordingly:

            # elsif ( $is_block_function{$package}{$tok} ) {
            #    $tok='eval'; # patch to do braces like eval  - doesn't work
            #    $type = 'k';
            #}
            # FIXME: This should become a separate type to allow for different
            # future behavior:
            elsif ( $is_block_function{$package}{$sub_name} ) {
                $type = 'G';
            }

            elsif ( $is_block_list_function{$package}{$sub_name} ) {
                $type = 'G';
            }
            elsif ( $is_user_function{$package}{$sub_name} ) {
                $type = 'U';
            }

            # check for filehandle
            elsif (

              # added 2001-03-27: must not be followed immediately by '('
              # see fhandle.t
              ( $input_line !~ m/\G\(/gc )

              # and
              && (

              # preceded by 'print' or 'printf'
              ( $last_nonblank_token =~ /^(print|printf)$/ )

              # or preceded by 'print(' or 'printf('
              || ( ( $last_nonblank_token eq '(' )
              && ( $paren_type[$paren_depth] =~ /^(print|printf)$/ ) ) ) )
            {

                # may not be filehandle unless followed by a space
                if ( $input_line =~ m/\G\s+/gc ) {
                    $type = 'Y';

					# Complain if a filehandle has any lower case
					# letters.  This is suggested good practice, but the
					# main reason for this warning is that prior to
					# release 20010328, perltidy incorrectly parsed a
					# function call after a print/printf, with the
					# result that a space got added before the opening
					# paren, thereby converting the function name to a
					# filehandle according to perl's weird rules.  This
					# will not usually generate a syntax error, so this
					# is a potentially serious bug.  By warning loudly
					# of filehandles with any lower case letters,
					# followed by opening parens, we will help the user
					# find almost all of these older errors.  This can
					# be converted to a logfile entry after some time.
					# use 'sub_name' because something like
					# main::MYHANDLE is ok
                    if ( $sub_name =~ /[a-z]/ ) {

                        # could be bug caused by older perltidy if 
						# followed by '('
                        if ( $input_line =~ m/\G\s*\(/gc ) {
                            warning(
"Filehandle '$tok' should be in caps; see NOTE below\n"
                            );
                            $tokenizer_self->{_saw_lc_filehandle} = 1;
                        }

                        # probably not bug caused by older perltidy, but note
                        else {
                            write_logfile_entry(
							  "Filehandle '$tok' should be in caps\n");
                        }
                    }
                }

                # bareword not followed by a space -- may not be filehandle
				# (may be function call defined in a 'use' statement)
                else {
                    $type = 'Z';
                }
            }
        }

        # Now we must convert back from character position
        # to pre_token index. 
        # I don't think an error flag can occur here ..but who knows
        my $error;
        ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
        if ($error) {
            warning("scan_bare_identifier: Possibly invalid tokenization\n");
        }
    }

    # no match but line not blank - could be syntax error
    # perl will take '::' alone without complaint
    else {
        $type = 'w';

        # change this warning to log message if it becomes annoying
        warning("didn't find identifier after leading ::\n");
    }
    return ( $i, $tok, $type );
}

sub scan_id {

=pod

This is the new scanner and will eventually replace scan_identifier.
Only type 'sub' and 'package' are implemented.
Token types $ * % @ & -> are not yet implemented.

Scan identifier following a type token.
The type of call depends on $id_scan_state: $id_scan_state = ''
for starting call, in which case $tok must be the token defining
the type.  

If the type token is the last nonblank token on the line, a value
of $id_scan_state = $tok is returned, indicating that further
calls must be made to get the identifier.  If the type token is
not the last nonblank token on the line, the identifier is
scanned and handled and a value of '' is returned.

=cut
    my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state ) = @_;
    my $type = '';
    my ( $i_beg, $pos_beg );

    #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
    #my ($a,$b,$c) = caller;
    #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";

    # on re-entry, start scanning at first token on the line
    if ($id_scan_state) {
        $i_beg = $i;
        $type  = '';
    }

    # on initial entry, start scanning just after type token
    else {
        $i_beg         = $i + 1;
        $id_scan_state = $tok;
        $type          = 't';
    }

    # find $i_beg = index of next nonblank token,
    # and handle empty lines
    my $blank_line = 0;
    my $next_nonblank_token = $$rtokens[$i_beg];
    if ( $i_beg > $max_token_index ) {
        $blank_line = 1;
    }
    else {

        # only a '#' immediately after a '$' is not a comment
        if ( $next_nonblank_token eq '#' ) {
            unless ( $tok eq '$' ) {
                $blank_line = 1;
            }
        }

        if ( $next_nonblank_token =~ /^\s/ ) {
            ( $next_nonblank_token, $i_beg ) =
              find_next_nonblank_token_on_this_line( $i_beg, $rtokens );
            if ( $next_nonblank_token =~ /^[#\s]/ ) {
                $blank_line = 1;
            }
        }
    }

    # handle non-blank line; identifier, if any, must follow
    unless ($blank_line) {

        if ( $id_scan_state eq 'sub' ) {
            ( $i, $tok, $type ) =
              do_scan_sub( $input_line, $i, $i_beg, $tok, $type, $rtoken_map );
        }

        elsif ( $id_scan_state eq 'package' ) {
            ( $i, $tok, $type ) =
              do_scan_package( $input_line, $i, $i_beg, $tok, $type,
              $rtoken_map );
        }

        else {
            warning("invalid token in scan_id: $tok\n");
        }
        $id_scan_state = '';
    }

    if ( $id_scan_state && ( !defined($type) || !$type ) ) {

        # shouldn't happen:
        warning(
        "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
        );
    }

    if ( $TOKENIZER_DEBUG_FLAG{NSCAN} ) {
        print
          "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
    }
    return ( $i, $tok, $type, $id_scan_state );
}

sub do_scan_sub {

    # do_scan_sub parses a sub name and prototype
    # it is called with $i_beg equal to the index of the first nonblank
    # token following a 'sub' token.

    my ( $input_line, $i, $i_beg, $tok, $type, $rtoken_map ) = @_;
    my $subname = undef;
    my $package = undef;
    my $proto   = undef;

    my $pos_beg = $$rtoken_map[$i_beg];
    pos($input_line) = $pos_beg;

    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*)(\w+)(\s*\([^){]*\))?/gc ) {
        $subname = $2;
        $proto   = $3;
        $package = ( defined($1) && $1 ) ? $1 : $current_package;
        $package =~ s/\'/::/g;
        if ( $package =~ /^\:/ ) { $package = 'main' . $package }
        $package =~ s/::$//;
        my $pos = pos($input_line);
        my $numc = $pos - $pos_beg;
        $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
        $type = 'i';

        if ( defined($proto) ) {
            $proto =~ s/^\s*\(\s*//;
            $proto =~ s/\s*\)$//;
            if ($proto) {
                $is_user_function{$package}{$subname} = 1;

                # prototypes containing '&' must be treated specially..
                if ( $proto =~ /\&/ ) {

                    # right curly braces of prototypes ending in 
                    # '&' may be followed by an operator
                    if ( $proto =~ /\&$/ ) {
                        $is_block_function{$package}{$subname} = 1;
                    }

                    # right curly braces of prototypes NOT ending in 
                    # '&' may NOT be followed by an operator
                    elsif ( $proto !~ /\&$/ ) {
                        $is_block_list_function{$package}{$subname} = 1;
                    }
                }
            }
            else {
                $is_constant{$package}{$subname} = 1;
            }
        }
        else {
            $is_user_function{$package}{$subname} = 1;
        }

        # Now we must convert back from character position
        # to pre_token index. 
        # I don't think an error flag can occur here ..but ?
        my $error;
        ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
        if ($error) { warning("Possibly invalid sub\n") }
    }

    # look for prototype following an anonymous sub so they don't get
    # stranded.  ( sub.t )  
    elsif ( $input_line =~ m/\G\s*\([^){]*\)/gc ) {
        my $pos = pos($input_line);
        my $error;
        ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
        if ($error) { warning("Possibly invalid sub\n") }
    }

    # no match but line not blank
    else {
    }
    return ( $i, $tok, $type );
}

sub do_scan_package {

    # do_scan_package parses a package name 
    # it is called with $i_beg equal to the index of the first nonblank
    # token following a 'package' token.

    my ( $input_line, $i, $i_beg, $tok, $type, $rtoken_map ) = @_;
    my $package = undef;
    my $type;
    my $pos_beg = $$rtoken_map[$i_beg];
    pos($input_line) = $pos_beg;

    # handle non-blank line; package name, if any, must follow
    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
        $package = $1;
        $package = ( defined($1) && $1 ) ? $1 : 'main';
        $package =~ s/\'/::/g;
        if ( $package =~ /^\:/ ) { $package = 'main' . $package }
        $package =~ s/::$//;
        my $pos = pos($input_line);
        my $numc = $pos - $pos_beg;
        $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
        $type = 'i';

        # Now we must convert back from character position
        # to pre_token index. 
        # I don't think an error flag can occur here ..but ?
        my $error;
        ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
        if ($error) { warning("Possibly invalid package\n") }
        $current_package = $package;

    }

    # no match but line not blank --
    # could be a label with name package, like package:  , for example.
    else {
        $type = 'k';
    }

    return ( $i, $tok, $type );
}

sub scan_identifier {

=pod scan_identifier

NOTE: This develomental scanner WILL BE REPLACED by the newer version
"scan_id".  The reason is that scan_id will be regex based, which makes
maintainence much easier, and probably improves the speed.

This routine assembles tokens into identifiers.  
It maintains a scan state, id_scan_state.  It updates
id_scan_state based upon current id_scan_state and token, and returns an
updated id_scan_state and the next index after the identifier.  

=cut
    my ( $i, $id_scan_state, $identifier, $rtokens ) = @_;
    my $i_begin   = $i;
    my $type      = '';
    my $tok_begin = $$rtokens[$i_begin];
    if ( $tok_begin eq ':' ) { $tok_begin = '::' }
    my $id_scan_state_begin = $id_scan_state;
    my $identifier_begin    = $identifier;
    my $tok     = $tok_begin;
    my $message = "";

    # these flags will be used to help figure out the type:
    my $saw_alpha = ( $tok =~ /[A-Za-z_]/ );
    my $saw_type;

    # allow old package separator (') except in 'use' statement
    my $allow_tick = ( $last_nonblank_token ne 'use' );

    # get started by defining a type and a state if necessary
    unless ($id_scan_state) {

        # fixup for digraph
        if ( $tok eq '>' ) {
            $tok       = '->';
            $tok_begin = $tok;
        }
        $identifier = $tok;

        if ( $tok =~ /[\$\%\@\*]/ ) {
            $id_scan_state = '$';
        }
        elsif ( $tok eq '&' ) {
            $id_scan_state = '&';
        }
        elsif ( $tok =~ /^(sub|package)$/ ) {
            $saw_alpha = 0;         # 'sub' is considered type info here
            $id_scan_state = '$';
            $identifier .= ' ';     # need a space to separate sub from sub name
        }
        elsif ( $tok eq '::' ) {
            $id_scan_state = 'A';
        }
        elsif ( $tok =~ /[A-Za-z_]/ ) {
            $id_scan_state = ':';
        }
        elsif ( $tok eq '->' ) {
            $id_scan_state = '$';
        }
        else {

            # shouldn't happen
            my ( $a, $b, $c ) = caller;
            warning("Program Bug: scan_identifier given bad token = $tok \n");
            warning("   called from sub $a  line: $c\n");
            report_definite_bug();
        }
        $saw_type = !$saw_alpha;
    }
    else {
        $i--;
        $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
    }

    # now loop to gather the identifier
    my $i_save = $i;

    while ( $i < $max_token_index ) {
        $i_save = $i unless ( $tok =~ /^\s*$/ );
        $tok = $$rtokens[ ++$i ];

        if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
            $tok = '::';
            $i++;
        }

        if ( $id_scan_state eq '$' ) {    # starting variable name

            if ( $tok eq '$' ) {

                $identifier .= $tok;

                # we've got a punctuation variable if end of line (punct.t)
                if ( $i == $max_token_index ) {
                    $type = 'i';
                    $id_scan_state = '';
                    last;
                }
            }
            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
                $saw_alpha     = 1;
                $id_scan_state = ':';    # now need ::
                $identifier .= $tok;
            }
            elsif ( ( $tok =~ /^[\']$/ ) && $allow_tick ) {    # alphanumeric ..
                $saw_alpha     = 1;
                $id_scan_state = ':';    # now need ::
                $identifier .= $tok;

                # Perl will accept leading digits in identifiers,
                # although they may not always produce useful results.  
                # Something like $main::0 is ok.  But this also works:
                #
                #  sub howdy::123::bubba{ print "bubba $54321!\n" }
                #  howdy::123::bubba();
                #
            }
            elsif ( $tok =~ /^[0-9]/ ) {    # numeric
                $saw_alpha     = 1;
                $id_scan_state = ':';    # now need ::
                $identifier .= $tok;
            }
            elsif ( $tok eq '::' ) {
                $id_scan_state = 'A';
                $identifier .= $tok;
            }
            elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
                $identifier .= $tok;    # keep same state, a $ could follow
            }
            elsif ( $tok eq '{' ) {    # skip something like ${xxx} or ->{
                $id_scan_state = '';

                # if this is the first token of a line, any tokens for this
                # identifier have already been accumulated
                if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
                $i = $i_save;
                last;
            }

            # space ok after leading $ % * & @ ->
            elsif ( $tok =~ /^\s*$/ ) {

                if ( $identifier =~ /^[\$\%\*\&\@]/ ) {

                    if ( length($identifier) > 1 ) {
                        $id_scan_state = '';
                        $i    = $i_save;
                        $type = 'i';       # probably punctuation variable
                        last;
                    }
                    else {

                        # spaces after $'s are common, and space after @
                        # is harmless, so only complain about space
                        # after other type characters. Space after $ and
                        # @ will be removed in formatting.  Report space
                        # after % and * because they might indicate a
                        # parsing error.  In other words '% ' might be a
                        # modulo operator.  Delete this warning if it
                        # gets annoying.
                        if ( $identifier !~ /^[\@\$]$/ ) {
                            $message =
                              "Space in identifier, following $identifier\n";
                        }
                    }
                }
                elsif ( $identifier =~ /^(->)$/ ) {

                    if ( length($identifier) > 2 ) {
                        $id_scan_state = '';
                        last;
                    }
                }
            }
            elsif ( $tok eq '^' ) {

                # check for some special variables like $^W
                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
                    $identifier .= $tok;
                    $id_scan_state = 'A';
                }
                else {
                    $id_scan_state = '';
                }
            }
            else {    # something else

                # check for various punctuation variables
                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
                    $identifier .= $tok;
                }
                elsif ( $identifier eq '$#' ) {
                    $i = $i_save;
                    if ( $tok =~ /^[\{\(\[]$/ ) { $type = 'i' }
                }
                elsif ( $identifier eq '$$' ) {
                    $i = $i_save;
                    if ( $tok =~ /^[\{\(\[]$/ ) { $type = 'i' }
                }
                elsif ( $identifier eq '->' ) {
                    $i = $i_save;
                }
                else {
                    $i = $i_save;
                    if ( length($identifier) == 1 ) { $identifier = ''; }
                }
                $id_scan_state = '';
                last;
            }
        }
        elsif ( $id_scan_state eq '&' ) {    # starting sub call?

            if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
                $id_scan_state = ':';    # now need ::
                $saw_alpha     = 1;
                $identifier .= $tok;
            }
            elsif ( ( $tok =~ /^[\']$/ ) && $allow_tick ) {    # alphanumeric ..
                $id_scan_state = ':';    # now need ::
                $saw_alpha     = 1;
                $identifier .= $tok;
            }
            elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
                $id_scan_state = ':';    # now need ::
                $saw_alpha     = 1;
                $identifier .= $tok;
            }
            elsif ( $tok =~ /^\s*$/ ) {    # allow space
            }
            elsif ( $tok eq '::' ) {       # leading ::
                $id_scan_state = 'A';    # accept alpha next
                $identifier .= $tok;
            }
            elsif ( $tok eq '{' ) {
                if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
                $i = $i_save;
                $id_scan_state = '';
                last;
            }
            else {

                # punctuation variable?
                # testfile: cunningham4.pl
                if ( $identifier eq '&' ) {
                    $identifier .= $tok;
                }
                else {
                    $identifier = '';
                    $i    = $i_save;
                    $type = '&';
                }
                $id_scan_state = '';
                last;
            }
        }
        elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)

            if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
                $identifier .= $tok;
                $id_scan_state = ':';    # now need ::
                $saw_alpha     = 1;
            }
            elsif ( ( $tok =~ /^[\']$/ ) && $allow_tick ) {
                $identifier .= $tok;
                $id_scan_state = ':';    # now need ::
                $saw_alpha     = 1;
            }
            elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
                $identifier .= $tok;
                $id_scan_state = ':';    # now need ::
                $saw_alpha     = 1;
            }
            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
                $id_scan_state = '(';
                $identifier .= $tok;
            }
            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
                $id_scan_state = ')';
                $identifier .= $tok;
            }
            else {
                $id_scan_state = '';
                $i = $i_save;
                last;
            }
        }
        elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha

            if ( $tok eq '::' ) {    # got it
                $identifier .= $tok;
                $id_scan_state = 'A';    # now require alpha
            }
            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
                $identifier .= $tok;
                $id_scan_state = ':';    # now need ::
                $saw_alpha     = 1;
            }
            elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
                $identifier .= $tok;
                $id_scan_state = ':';    # now need ::
                $saw_alpha     = 1;
            }
            elsif ( $tok =~ /^[\']/ && $allow_tick ) {    # tick

                if ( $is_keyword{$identifier} ) {
                    $id_scan_state = '';    # that's all
                    $i = $i_save;
                }
                else {
                    $identifier .= $tok;
                }
            }
            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
                $id_scan_state = '(';
                $identifier .= $tok;
            }
            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
                $id_scan_state = ')';
                $identifier .= $tok;
            }
            else {
                $id_scan_state = '';    # that's all
                $i = $i_save;
                last;
            }
        }
        elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype

            if ( $tok eq '(' ) {    # got it
                $identifier .= $tok;
                $id_scan_state = ')';    # now find the end of it
            }
            elsif ( $tok =~ /^\s*$/ ) {    # blank - keep going
                $identifier .= $tok;
            }
            else {
                $id_scan_state = '';    # that's all - no prototype
                $i = $i_save;
                last;
            }
        }
        elsif ( $id_scan_state eq ')' ) {    # looking for ) to end

            if ( $tok eq ')' ) {    # got it
                $identifier .= $tok;
                $id_scan_state = '';    # all done
                last;
            }
            elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
                $identifier .= $tok;
            }
            else {    # probable error in script, but keep going
                warning("Unexpected '$tok' while seeking end of prototype\n");
                $identifier .= $tok;
            }
        }
        else {    # can get here due to error in initialization
            $id_scan_state = '';
            $i = $i_save;
            last;
        }
    }

    if ( $id_scan_state eq ')' ) {
        warning("Hit end of line while seeking ) to end prototype\n");
    }

    # once we enter the actual identifier, it may not extend beyond
    # the end of the current line
    if ( $id_scan_state =~ /[A\:\(\)]/ ) {
        $id_scan_state = '';
    }
    if ( $i < 0 ) { $i = 0 }

    unless ($type) {

        if ($saw_type) {

            if ($saw_alpha) {
                $type = 'i';
            }
            elsif ( $identifier eq '->' ) {
                $type = '->';
            }
            elsif ( ( length($identifier) > 1 ) && ( $identifier !~ /\$$/ )
              && ( $identifier !~ /^(sub |package )$/ ) )
            {
                $type = 'i';
            }
            else { $type = 't' }
        }
        elsif ($saw_alpha) {

            # type 'w' includes anything without leading type info
            # ($,%,@,*) including something like abc::def::ghi
            $type = 'w';
        }
        else {
            $type = '';
        }    # this can happen on a restart
    }

    if ($identifier) {
        $tok = $identifier;
        if ($message) { write_logfile_entry($message) }
    }
    else {
        $tok = $tok_begin;
        $i   = $i_begin;
    }

    if ( $TOKENIZER_DEBUG_FLAG{SCAN_ID} ) {
        my ( $a, $b, $c ) = caller;
        print
"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
        print
"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
    }
    return ( $i, $tok, $type, $id_scan_state, $identifier );
}

sub follow_quoted_string {

    # scan for a specific token, skipping escaped characters
    # if the quote character is blank, use the first non-blank character
    # input parameters:
    # 	$rtokens = reference to the array of tokens
    # 	$i = the token index of the first character to search
    #   $in_quote = number of quoted strings being followed
    #   $beginning_tok = the starting quote character
    #   $quote_pos = index to check next for alphanumeric delimiter
    # output parameters:
    # 	$i = the token index of the ending quote character
    #   $in_quote = decremented if found end, unchanged if not
    #   $beginning_tok = the starting quote character
    #   $quote_pos = index to check next for alphanumeric delimiter
    #	$quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
    my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth )
      = @_;
    my ( $tok, $end_tok );
    my $i = $i_beg - 1;

    if ( $TOKENIZER_DEBUG_FLAG{QUOTE} ) {
        print
"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
    }

    # get the corresponding end token
    if ( $beginning_tok !~ /^\s*$/ ) {
        $end_tok = matching_end_token($beginning_tok);
    }

    # a blank token means we must find and use the first non-blank one
    else {
        my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>

        while ( $i < $max_token_index ) {
            $tok = $$rtokens[ ++$i ];

            if ( $tok !~ /^\s*$/ ) {

                if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
                    $i = $max_token_index;
                }
                else {

                    if ( length($tok) > 1 ) {
                        if ( $quote_pos <= 0 ) { $quote_pos = 1 }
                        $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
                    }
                    else {
                        $beginning_tok = $tok;
                        $quote_pos     = 0;
                    }
                    $end_tok     = matching_end_token($beginning_tok);
                    $quote_depth = 1;
                    last;
                }
            }
            else {
                $allow_quote_comments = 1;
            }
        }
    }

    # There are two different loops which search for the ending quote
    # character.  In the rare case of an alphanumeric quote delimiter, we
    # have to look through alphanumeric tokens character-by-character, since
    # the pre-tokenization process combines multiple alphanumeric
    # characters, whereas for a non-alphanumeric delimiter, only tokens of
    # length 1 can match.

    # loop for case of alphanumeric quote delimiter..
    # "quote_pos" is the position the current word to begin searching
    if ( $beginning_tok =~ /\w/ ) {

        # Note this because it is not recommended practice except
        # for obfuscated perl contests
        if ( $in_quote == 1 ) {
            write_logfile_entry(
              "Note: alphanumeric quote delimiter ($beginning_tok) \n");
        }

        while ( $i < $max_token_index ) {

            if ( $quote_pos == 0 || ( $i < 0 ) ) {
                $tok = $$rtokens[ ++$i ];

                if ( $tok eq '\\' ) {

                    $quote_pos++;
                    last if ( $i >= $max_token_index );
                    $tok = $$rtokens[ ++$i ];

                }
            }
            my $old_pos = $quote_pos;

            unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
            {

            }
            $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );

            if ( $quote_pos > 0 ) {

                $quote_depth--;

                if ( $quote_depth == 0 ) {
                    $in_quote--;
                    last;
                }
            }
        }

        # loop for case of a non-alphanumeric quote delimiter..
    }
    else {

        while ( $i < $max_token_index ) {
            $tok = $$rtokens[ ++$i ];

            if ( $tok eq $end_tok ) {
                $quote_depth--;

                if ( $quote_depth == 0 ) {
                    $in_quote--;
                    last;
                }
            }
            elsif ( $tok eq $beginning_tok ) {
                $quote_depth++;
            }
            elsif ( $tok eq '\\' ) {
                $i++;
            }
        }
    }
    if ( $i > $max_token_index ) { $i = $max_token_index }
    return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth );
}

sub matching_end_token {

    # find closing character for a pattern
    my ($beginning_token) = @_;

    if ( $beginning_token eq '{' ) {
        '}';
    }
    elsif ( $beginning_token eq '[' ) {
        ']';
    }
    elsif ( $beginning_token eq '<' ) {
        '>';
    }
    elsif ( $beginning_token eq '(' ) {
        ')';
    }
    else {
        $beginning_token;
    }
}

BEGIN {

    # These names are used in error messages
    @opening_brace_names = qw# '{' '[' '(' #;
    @closing_brace_names = qw# '}' ']' ')' #;

    my @digraphs = qw(
      .. :: << >> ** && .. ||  -> => += -= .= %= &= |= ^= *= <>
      <= >= == =~ !~ != ++ -- /= x=
    );
    @is_digraph{@digraphs} = (1) x scalar(@digraphs);

    my @trigraphs = qw( ... **= <<= >>= &&= ||= <=> );
    @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);

    # make a hash of all valid token types for self-checking the tokenizer
    # (adding NEW_TOKENS : select a new character and add to this list)
    my @valid_token_types = qw#
      b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
      { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
      #;
    push ( @valid_token_types, @digraphs );
    push ( @valid_token_types, @trigraphs );
    push ( @valid_token_types, '#' );
    push ( @valid_token_types, ',' );
    @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);

    # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
    my @file_test_operators =
      qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
    @is_file_test_operator{@file_test_operators} =
      (1) x scalar(@file_test_operators);

    # these functions have prototypes of the form (&@), so when they are
    # followed by a block, that block MAY NOT be followed by an
    # operator.
    $block_list_operator = '^(sort|grep|map)$';

    # these operators have prototypes of the form (&), so when they are
    # followed by a block, that block MAY BE followed by an operator.
    $block_operator = '^(do|eval)$';

    # I'll build the list of keywords incrementally
    my @Keywords = ();

    # keywords and tokens after which a value or pattern is expected,
    # but not an operator.  In other words, these should consume terms
    # to their right, or at least they are not expected to be followed
    # immediately by operators.
    # --added srand 20-mar-01
    my @value_requestor = qw(
      AUTOLOAD
      BEGIN
      END
      CHECK
      INIT
      EQ
      GE
      GT
      LE
      LT
      NE
      abs
      accept
      alarm
      and
      atan2
      bind
      binmode
      bless
      caller
      chdir
      chmod
      chomp
      chop
      chown
      chr
      chroot
      close
      closedir
      cmp
      connect
      continue
      cos
      crypt
      dbmclose
      dbmopen
      defined
      delete
      die
      dump
      each
      else
      elsif
      eq
      exec
      exists
      exit
      exp
      fcntl
      fileno
      flock
      for
      foreach
      formline
      ge
      getgrgid
      getgrnam
      gethostbyaddr
      gethostbyname
      getnetbyaddr
      getnetbyname
      getpeername
      getpgrp
      getpriority
      getprotobyname
      getprotobynumber
      getpwnam
      getpwuid
      getservbyname
      getservbyport
      glob
      gmtime
      goto
      grep
      gt
      hex
      if
      index
      int
      ioctl
      join
      keys
      kill
      last
      lc
      lcfirst
      le
      length
      link
      listen
      local
      localtime
      lock
      log
      lstat
      lt
      map
      mkdir
      msgctl
      msgget
      msgrcv
      msgsnd
      my
      ne
      next
      no
      not
      oct
      open
      opendir
      or
      ord
      our
      pack
      pipe
      pop
      pos
      print
      printf
      prototype
      push
      quotemeta
      rand
      read
      readdir
      readlink
      recv
      redo
      ref
      rename
      require
      reset
      return
      reverse
      rewinddir
      rindex
      rmdir
      scalar
      seek
      seekdir
      select
      semctl
      semget
      semop
      send
      sethostent
      setnetent
      setpgrp
      setpriority
      setprotoent
      setservent
      setsockopt
      shift
      shmctl
      shmget
      shmread
      shmwrite
      shutdown
      sin
      sleep
      socket
      socketpair
      sort
      splice
      split
      sprintf
      sqrt
      srand
      stat
      study
      substr
      symlink
      syscall
      sysopen
      sysread
      sysseek
      system
      syswrite
      tell
      telldir
      tie
      tied
      truncate
      uc
      ucfirst
      umask
      undef
      unless
      unlink
      unpack
      unshift
      untie
      until
      use
      utime
      values
      vec
      waitpid
      warn
      while
      write
      xor
    );

    push ( @Keywords, @value_requestor );

    # These are treated the same but are not keywords:
    my @extra_vr = qw(
      constant
      switch
      vars
    );
    push ( @value_requestor, @extra_vr );

    @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);

    # this list contains keywords which do not look for arguments,
    # so that they might be followed by an operator, or at least
    # not a term.
    my @operator_requestor = qw(
      endgrent
      endhostent
      endnetent
      endprotoent
      endpwent
      endservent
      fork
      getgrent
      gethostent
      getlogin
      getnetent
      getppid
      getprotoent
      getpwent
      getservent
      setgrent
      setpwent
      time
      times
      wait
      wantarray
    );

    push ( @Keywords, @operator_requestor );

    # These are treated the same but are not considered keywords:
    my @extra_or = qw(
      STDERR
      STDIN
      STDOUT
    );

    push ( @operator_requestor, @extra_or );

    @expecting_operator_token{@operator_requestor} =
      (1) x scalar(@operator_requestor);

    # these token TYPES expect trailing operator but not a term
    # note: ++ and -- are post-increment and decrement, 'C' = constant
    my @operator_requestor_types = qw( ++ -- C );
    @expecting_operator_types{@operator_requestor_types} =
      (1) x scalar(@operator_requestor_types);

    # these token TYPES consume values (terms)
    # note: pp and mm are pre-increment and decrement
    # f=semicolon in for,  F=file test operator
    my @value_requestor_type = qw#
      L { ( [ ~ !~ =~ ; . .. ...  : && ! || = + - x
      **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||=
      <= >= == != => \ > < % * / ? & | ** <=>
      f F pp mm Y p m U J G
      #;
    push ( @value_requestor_type, ',' )
      ;    # (perl doesn't like a ',' in a qw block)
    @expecting_term_types{@value_requestor_type} =
      (1) x scalar(@value_requestor_type);

    #These keywords are handled specially in the tokenizer code:
    my @special_keywords = qw(
      q
      qq
      qr
      qw
      qx
      package
      sub
      tr
      format
      eval
      do
    );
    push ( @Keywords, @special_keywords );

    # These are not used in any way yet
    #	my @unused_keywords = qw(
    #	  CORE
    #	  DESTROY
    #	  __FILE__
    #	  __LINE__
    #	  __PACKAGE__
    #	  eof
    #	  getc
    #	  getsockname
    #	  getsockopt
    #	  readline
    #	  readpipe
    #	  );

=pod
	 The list of keywords was extracted from function 'keyword' in perl file
	 toke.c version 5.005.03, using this utility, plus a little editing:
	 (file getkwd.pl):
	 while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
	 Add 'get' prefix where necessary, then split into the above lists.

	 This list should be updated as necessary.
	 The list should not contain these special variables:
	 ARGV DATA ENV SIG STDERR STDIN STDOUT
	 __DATA__ __END__
=cut
    @is_keyword{@Keywords} = (1) x scalar(@Keywords);
}

BEGIN {

    # Caution: these debug flags produce a lot of output
    # They should all be 0 except when debugging small scripts
    %TOKENIZER_DEBUG_FLAG = (
      GUESS    => 0,
      TOKENIZE => 0,
      EXPECT   => 0,
      SCAN_ID  => 0,
      NSCAN    => 0,
      QUOTE    => 0,
    );

    foreach ( keys %TOKENIZER_DEBUG_FLAG ) {
        if ( $TOKENIZER_DEBUG_FLAG{$_} ) {
            print "TOKENIZER DEBUGGING with key $_\n";
        }
    }
}

package Perltidy::VerticalAligner;

BEGIN {

    # Caution: these debug flags produce a lot of output
    # They should all be 0 except when debugging small scripts
    %VALIGN_DEBUG_FLAG = (
      APPEND  => 0,
      APPEND0 => 0,
    );

    foreach ( keys %VALIGN_DEBUG_FLAG ) {
        if ( $VALIGN_DEBUG_FLAG{$_} ) {
            print "DEBUGGING with key $_\n";
        }
    }
}

package Perltidy::Formatter;

BEGIN {

    # Caution: these debug flags produce a lot of output
    # They should all be 0 except when debugging small scripts
    %FORMATTER_DEBUG_FLAG = (
      FLUSH   => 0,
      OUTPUT  => 0,
      STORE   => 0,
      BOND    => 0,
      BREAK   => 0,
      WHITE   => 0,
      FORCE   => 0,
      UNDOBP  => 0,
      NOBREAK => 0,
      LIST    => 0,
      SPARSE  => 0,
      CI      => 0,
    );

    foreach ( keys %FORMATTER_DEBUG_FLAG ) {
        if ( $FORMATTER_DEBUG_FLAG{$_} ) {
            print "FORMATTER_DEBUGGING with key $_\n";
        }
    }
}

package main;
main();

# vi: set ts=4 sw=4 :
