Article 11100 of comp.lang.perl:
Path: feenix.metronet.com!news.utdallas.edu!wupost!howland.reston.ans.net!gatech!usenet.ufl.edu!travis.csd.harris.com!amber!brad
From: brad@amber.ssd.csd.harris.com (Brad Appleton)
Newsgroups: comp.lang.perl
Subject: texinfo preprocessor - need help optimizing
Date: 2 Mar 1994 15:21:24 GMT
Organization: Harris CSD, Ft. Lauderdale, FL
Lines: 430
Message-ID: <2l2atk$np1@travis.csd.harris.com>
NNTP-Posting-Host: amber.ssd.csd.harris.com
Summary: I need your help

I have a perl script/library function to preprocess a texinfo
file. What it does is read the texinfo file, expanding all
the @include's and reading all the @iftex/@ifinfo/@ignore
commands and spit out the actual commands that would NOT
be ignored by makeinfo (or by TeX).

The problem is, it seems to frequently dump core when I
get to somewhere around 25,000 lines of input and I really
dont know why. I have used "perl -d" but it really hasnt
given me any reason to suspect that I am doing something
wrong (as opposed to "correct but inefficient") in my script.
I have also debugged the perl executable (a stripped version
of it anyway) and have not found any evidence of infinite
recursion.

I am using perl4 patchlevel 36 on a Harris NightHawk running
CX/UX 7.X (a "real-time" variant of Unix).

I be much obliged if some of you would be willing to read the script
and point out some inefficiencies (speed-wise or memory-wise), or
things that you think could be done "better" (in your possibly
subjective opinion).

Here is the script - your help is appreciated!
-------------------------------------------------------------------------------

# texinfo.pl --

# ---------------------------------------------------------------------------
# Preprocess a texinfo file and let the user see only those lines that will
# actually appear in in the info file (@include'd files are expanded).
#
# Usage: 
#         require 'texinfo.pl';
#         &texinfo(name);
#
#    where <name> is the name of the users function (which takes an input
#    line as its parameter) that will process the input lines, one by one.
#    If no "name" parameter is given than the default action (which is to
#    print the line) is used. The "&texinfo" function will parse @ARGV
#    looking for 'makeinfo' options and then will preprocess the input files
#    specified by the rest of @ARGV.
#
# Other useful items:
#    &texinfo'where -- Subroutine (takes no parameters) to return the current
#                      input file name and line number (in a single string).
#
#    $texinfo'DEBUG -- Set this variable to non-zero if you want some simple
#                      debugging output.
# ---------------------------------------------------------------------------

# I have to do my own option processing cuz getopt.pl, getopts.pl, and
# newgetopt.pl wont do what I want.
#
# Need to accept the following makeinfo options:
# -I DIR              to add DIR to the directory search list for including
#                     files with the `@include' command.
# -D VAR              to define a variable, as with `@set'.
# -U VAR              to undefine a variable, as with `@clear'.
# --no-validate       to suppress node cross reference validation.
# --no-warn           to suppress warning messages (errors are still output).
# --no-split          to suppress the splitting of large files.
# --no-headers        to suppress the output of Node: Foo headers.
# --verbose           to print information about what is being done.
# --version           to print the version number of Makeinfo.
# --output FILE or -o FILE
#                     to specify the output file.  When you specify the
#                     output file in this way, any `@setfilename' in the
#                     input file is ignored.
# --paragraph-indent NUM
#                     to set the paragraph indent to NUM (default 3).
# --fill-column NUM   to set the filling column to NUM (default 72).
# --error-limit NUM   to set the error limit to NUM (default 100).
# --reference-limit NUM
#                     to set the reference warning limit to NUM (default 1000).
# --footnote-style STYLE
#                     to set the footnote style to STYLE.  STYLE should
#                     either be `separate' to place footnotes in their own
#                     node, or `end', to place the footnotes at the end of
#                     the node in which they are defined (the default).
#
# In addition, I will allow the following:
# -X FILE             dont expand @includes of the given FILE
# --debug             generate output for debugging
# --no-include        dont expand '@include'ed files
# --tex               generate output that TeX would see (instead of makeinfo).
#                     reverses the interpretation of @ifinfo, @iftex, and @tex
#                     directives.
#

package texinfo;

$DEBUG = 0;            ## Debugging?
$IS_INFO = 1;          ## Should @ifinfo be TRUE?
$IGNORE_INCLUDES = 0;  ## Ignore @include directives?
%IGNORE_INCS = ();     ## List of files to NEVER @include
$CMD='@';              ## Texinfo command prefix.
$USERSUB = "texinfo'default_usersub" ;  ## default line-processing action
@CONDS = ();           ## Stack of active texinfo conditions
@INCS = ();            ## Directory search path for @included files
@HANDLES = ();         ## Stack of input-file handles
%LINES = ();           ## Keep track of current line in each active handle
$LINES = 0;            ## Total # of input lines seen so far

# Default line-processing action
sub texinfo'default_usersub {
   # just print to standard output
   print  STDOUT @_ ;
}

# Get makeinfo-options from main'@ARGV
sub texinfo'getopts {
   local($opt, $_);
   ## get options from @main'ARGV
   while ($main'ARGV[0] =~ /^-/) {
      $_ = shift(@main'ARGV);
      last if ($_ eq "--");
      if (/^-([UDIXo])(.*)$/o) {
         $opt = $1;
         $_ = $2;
         $_ = shift(@main'ARGV) if (! $_);
         if ($opt eq "D") {
            if (/^([^=]+)=(.*)$/) {
               $DEFS{$1} = $2;
            } else {
               $DEFS{$_} = 1;
            }
         } elsif ($opt eq "U") {
            undef $DEFS{$_};
         } elsif ($opt eq "I") {
            unshift(@INCS, "$_");
         } elsif ($opt eq "X") {
            ++$IGNORE_INCS{$_};
         } elsif ($opt eq "o") {
            ;
         }
      } elsif (/^--o.*$/o) {
         $_ = shift(@main'ARGV);
      } elsif (/^--no-inc.*$/o) {
         $IGNORE_INCLUDES = 1;
      } elsif (/^--[Dd][Ee][Bb][Uu][Gg].*$/o) {
         $DEBUG =1;
      } elsif (/^--[Tt][Ee][Xx].*$/o) {
         $IS_INFO = 0;
      } elsif (/^--([^o].*)$/o) {
         $_ = $1;
         if (/^[^nv]/) {
            $_ = shift(@main'ARGV);
         }
      } else {
         print STDERR "$0: unrecognized 'makeinfo' option \"$_\" -- ignored.\n";
      }
   }
}

# Return the current filename and line-number in the format "file:line"
# (good for printing error-messages)
#
sub texinfo'where {
   local($fhandle) = $HANDLES[$#HANDLES];
   return "$LINES (${fhandle}:$LINES{$fhandle})" if ($DEBUG);
   return "${fhandle}:$LINES{$fhandle}";
}


# ------------------------------------------------------------------
# The array CONDS is a "stack" of conditions!!! Each time we see
# a statement that implies conditional inclusion/exclusion of text,
# we push the statement type on the stack (along with an indication
# of whether the condition was true or false).
# ------------------------------------------------------------------

# Push the active condition on the stack, remembering its type, result
# and where in the input it occurred.
#
sub texinfo'push_cond {
   local($type, $result) = @_;
   local($where) = &texinfo'where;
   local($_) = "${type}\t${result}\t${where}";
   push(@CONDS, $_);
}

# Get the current active condition. returns the list (type, result, where).
# If there is no active condition, returns the list ('', 1, 0).
#
sub texinfo'top_cond {
   return ('', 1, 0) if (0 == @CONDS);
   split(" ", $CONDS[$#CONDS]);
}

# Same as top_cond, but also removes the topmost condition from the stack
sub texinfo'pop_cond {
   return ('', 1, 0) if (0 == @CONDS);
   split(" ", pop(@CONDS));
}

# Complain about any conditions that are still active in the given input file
sub texinfo'extra_cond {
   local($fhandle) = @_;
   local($cond_type, $cond_result, $cond_where);
   local($fname, $line);
   ## Check for any un-terminated conditions in this file
   ## (they are not allowed to span across files)
   while (1) {
      ($cond_type, $cond_result, $cond_where) = &texinfo'top_cond;
      last if (! $cond_type);
      ($fname, $line) = split(':', $cond_where);
      last if ($fname ne $fhandle);
      print STDERR "$0: ($where) expecting \"${CMD}end $cond_type\" for ${CMD}$cond_type at $cond_where.\n";
      &texinfo'pop_cond;
   }
}

# Evaluate an inclusion/exclusion condition. The type should be one
# of 'ifset', 'ifclear', 'iftex', 'ifinfo', 'ignore', or 'tex'. The
# expression should only be present if the type is 'ifset' or 'ifclear',
# and should be the name of the corresponding value to test.
#
sub texinfo'eval_cond {
   local($type, $expr) = @_;
   local($result) = 0;

   ## trim whitespace
   $type =~ s/\s*(\S*)\s*/\1/;
   $expr =~ s/\s*(\S*)\s*/\1/;

   ## evaluate the expression
   if ($type eq "ifset") {
      $result = 1 if (defined($DEFS{$expr}));
   } elsif ($type eq "ifclear") {
      $result = 1 if (! defined($DEFS{$expr}));
   } elsif ($type eq "iftex") {
      $result = 1 - $IS_INFO;
   } elsif ($type eq "ifinfo") {
      $result = $IS_INFO;
   } elsif ($type eq "tex") {
      $result = 1 - $IS_INFO;
   } elsif ($type eq "ignore") {
      $result = 0;
   } else {
      local($where) = &texinfo'where;
      print STDERR "$0: ($where) unknown command \"${CMD}$type\"\n.";
   } 
   print STDERR "> $type $expr = $result\n"  if ($DEBUG);

   $result;
}

# Start conditional inclusion/exclusion command.
#
# We are given the type of the current condition, and its result.
# The result we "push" onto the condition stack will be TRUE *only* if the
# previous condition result and the given condition result are both TRUE.
# 
sub texinfo'start_cond {
   local($type, $expr) = @_;
   local($result, $_);
   local($prev_type, $prev_result, $prev_where) = &texinfo'top_cond;
   $result = $prev_result && &eval_cond($type, $expr);
   &texinfo'push_cond($type, $result);
}

# End conditional inclusion/exclusion command
#
# We are given the type of condition we expect to be in the middle of.
# If we are in the middle of that condition-type, then pop it from the
# stack, otherwise complain about an @end for the wrong type.
#
sub texinfo'end_cond {
   local($end_type) = @_;
   local($where) = &texinfo'where;
   local($cond_type, $cond_result, $cond_where) = &texinfo'top_cond;
   if ($cond_type eq $end_type) {
      &texinfo'pop_cond;
   } elsif ($cond_type) {
      print STDERR "$0: ($where) expecting \"${CMD}end $cond_type\" for ${CMD}$cond_type at $cond_where but saw \"${CMD}end $end_type\" instead.\n" ;
   } else {
      print STDERR "$0: ($where) \"${CMD}end $end_type\" has no corresponding ${CMD}$cond_type.\n";
   }
}

# Handle @set VALUE
sub texinfo'def_cmd {
   local($name, $value) = @_;
   $value = 1  if (! $value);
   $DEFS{$name} = $value ;
}

# Handle @clear VALUE
sub texinfo'undef_cmd {
   local($name) = @_;
   undef $DEFS{$name} ;
}

# Find an @included file
sub texinfo'find_file {
   local($fname) = @_;
   local($found) = 0;
   local($dir);
   local($result);

   ## look for file in current directory
   if (-f $fname) {
      ++$found;
      $result = $fname;
   } else {
      ## look for file in directory list
      foreach $dir (@INCS) {
         if (-f "$dir/$fname") {
            ++$found;
            $result = "$dir/$fname";
            last;
         }
      }
   }
   $result;
}

# Handle @include's
sub texinfo'include_cmd {
   local($included) = @_;
   local($result) = 0;
   local($where) = &texinfo'where;
   local($fname) = &texinfo'find_file($included);
   if ($IGNORE_INCLUDES ||
       defined($IGNORE_INCS{$fname}) ||
       defined($IGNORE_INCS{$included})) {
      print STDERR "> Ignoring @include $included\n" if ($DEBUG);
   } elsif ($fname) {
      &texinfo'preprocess_files($fname);
      $result = 1;
   } else {
      print STDERR "$0: ($where) file $included not found.\n";
   }
   $result;
}

# Process a texinfo input line
sub texinfo'preprocess_line {
   local($_) = @_;
   local($where) = &texinfo'where;
   print STDERR "$where\n" if ($DEBUG);

   if (/^\s*${CMD}(if\w*|tex|ignore)\s*(.*)\s*$/) {
      print STDERR "> $_\n" if ($DEBUG);
      &texinfo'start_cond($1, $2);
   } elsif (/^\s*${CMD}end\s+(if\w*|tex|ignore)\s*/) {
      print STDERR "> $_\n" if ($DEBUG);
      &texinfo'end_cond($1);
   } else {
      local($type, $result, $where) = &texinfo'top_cond;
      if ($result) {
         if (/^\s*${CMD}set\s+(\S*)\s*(\S*)/) {
            &texinfo'def_cmd($1, $2);
         } elsif (/^\s*${CMD}clear\s+(\S*)/) {
            &texinfo'undef_cmd($1);
         } elsif (/^\s*${CMD}include\s+(\S*)/) {
            s/^/@c / if (&texinfo'include_cmd($1));
         }
         ## Regular line - pass it to the user
         $_ .= "\n";
         eval "&${USERSUB}(\$_);" ;
      }
   }
}

# Handle a file of texinfo commands
sub texinfo'preprocess_files {
   local(@argv) = @_;
   local($fname, $fhandle);
   foreach $fname (@argv) {
      $fhandle = $fname;
      if (open($fhandle, $fname)) {
         &texinfo'preprocess_handle($fhandle);
         close($fhandle);
      } else {
         print STDERR "$0: unable to open $fname for reading: $!\n" ;
      }
   }
}

# Handle an open filehandle of texinfo commands
sub texinfo'preprocess_handle {
   local($_) = @_;
   local($fhandle) = $_;
   push(@HANDLES, $fhandle);
   while (<$fhandle>) {
      ++$LINES{$fhandle};       ## Update the line-count for this file
      ++$LINES;
      chop;
      &texinfo'preprocess_line("$_");
   }
   &texinfo'extra_cond($fhandle);
   pop(@HANDLES);
}

# If a list is given, preprocess the given files, otherwise preprocess STDIN
sub texinfo'preprocess {
   local(@argv) = @_;
   if (0 < @argv) {
      &texinfo'preprocess_files(@argv);
   } elsif (0 == @HANDLES) {
      &texinfo'preprocess_handle(STDIN);
   }
}


# Here is the main routine that parses @ARGV for options and then
# processes the remaining filenames in @ARGV ...

package  main;

sub main'texinfo {
   local($_) = @_;
   local($old_usersub) = $texinfo'USERSUB;
   if ($_) {
      s/^/main\'/ if (! /^\w+\'\w+$/);
      $texinfo'USERSUB = $_;
   }
   &texinfo'getopts();
   &texinfo'preprocess(@main'ARGV);
   $texinfo'USERSUB = $old_usersub;
}

1;

-- 
_______________________"And miles to go before I sleep."______________________
 Brad Appleton               Harris Computer Systems, Fort Lauderdale, FL USA
 brad@ssd.csd.harris.com     Disclaimer: I said it, not my employer! 


