@q Copyright 2012-2014, Alexander Shibakov@>
@q This file is part of SPLinT@>

@q SPLinT is free software: you can redistribute it and/or modify@>
@q it under the terms of the GNU General Public License as published by@>
@q the Free Software Foundation, either version 3 of the License, or@>
@q (at your option) any later version.@>

@q SPLinT is distributed in the hope that it will be useful,@>
@q but WITHOUT ANY WARRANTY; without even the implied warranty of@>
@q MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the@>
@q GNU General Public License for more details.@>

@q You should have received a copy of the GNU General Public License@>
@q along with SPLinT.  If not, see <http://www.gnu.org/licenses/>.@>

@*1\bison\ specific routines.
The placeholder code left blank in the common routines is filed in
with the code relevant to the output of parser tables in the following sections.

@*2Tables. \namedspot{bsfile}Here are all the parser table names. Some tables are not output but adding
one to the list in the future will be easy: it does not even have to
be done here.
@<Parser table names@>=
  _register_table_d(yytranslate)@;
  _register_table_d(yyr1)@;
  _register_table_d(yyr2)@;
  _register_table_d(yydefact)@;
  _register_table_d(yydefgoto)@;
  _register_table_d(yypact)@;
  _register_table_d(yypgoto)@;
  _register_table_d(yytable)@;
  _register_table_d(yycheck)@;
  _register_table_d(yyprhs)@;
  _register_table_d(yyrhs)@;
  _register_table_d(yytoknum)@;
  _register_table_d(yystos)@;
  _register_table_d(yytname)@;

@ One special table requires a little bit more preparation. This is a
table that lists the depth of the stack before an implicit terminal. It
is not one of the tables that is used by \bison\ itself but is needed
if the symbolic name processing is to be implemented (\bison\ has
access to this information `on the fly').

@<Variables and types local to the parser@>=
  unsigned int yyrthree[YYNRULES + 1] = { 0 };

@ We populate this table below $\ldots$
@<Parser defaults@>=
  assert( YYNRULES + 1 == sizeof(yyprhs)/sizeof(yyprhs[0]) );

  { int i, j;

      for ( i = 1; i <= YYNRULES; i++ ) {
  
          for ( j = 0; yyrhs[ yyprhs[i] + j ] != -1; j++ ) {

              assert( yyprhs[i] + j < sizeof(yyrhs) );
              assert( j < yyr1[i] );

              if ( @<This is an implicit term@> ) {

                  @<Find the rule that defines it and set |yyrthree|@>@;

              }
 
          }
      }
  }

@ @<This is an implicit term@>=
  ( strlen( yytname[ yyrhs[yyprhs[i]+j] ] ) > 1 ) &&
  ( yytname[ yyrhs[yyprhs[i]+j] ][0] == '$' ) &&
  ( yytname[ yyrhs[yyprhs[i]+j] ][1] == '@@' )

@ @<Find the rule that defines it...@>=
  int rule_number;

  for ( rule_number = 1; rule_number < YYNRULES; rule_number++ ) {

      if ( yyr1[rule_number] == yyrhs[yyprhs[i]+j] ) {

          yyrthree[rule_number] = j;
          break;

      }
  }

  assert( rule_number < YYNRULES );

@ $\ldots$ and add its name to the list.
@<Parser table names@>=
  _register_table_d(yyrthree)@;

@*2Actions. There are several ways of making |yyparse()| execute all portions of
the action code. The one chosen here makes sure that none of the
tables gets written past its last element. To see how it works, it
might be helpful to `walk through' \bison's output to see how each
change affects the generated parser.
@<Output parser semantic actions@>=
  if ( output_desc.output_actions ) {

      int i, j;

      fprintf( tables_out, "%s", action_desc.preamble );

      if ( !bare_actions ) {

          yypact[0] = YYPACT_NINF;
          yypgoto[0] = -1;
          yydefgoto[0] = YYFINAL;  

      }

      for ( i = 1; i < sizeof(yyr1)/sizeof(yyr1[0]); i++ ) {

          fprintf( tables_out, action_desc.act_setup, i, yyr2[i] - 1 );

          if ( action_desc.print_rule ) {

              action_desc.print_rule( i );

          }

          if ( yyr2[i] > 0 ) {

              if ( action_desc.action1 ) {

                  fprintf( tables_out, "%s", action_desc.action1 );

              }
          }

          for ( j = 2; j <= yyr2[i]; j++ ) {

              if ( action_desc.actionn ) {
 
                  fprintf( tables_out, action_desc.actionn, j );
              }

          }

          if ( !bare_actions ) {

              yyr1[i] = YYNTOKENS;
              yydefact[0] = i;
              yyr2[i] = 0;
              yyparse(YYPARSE_PARAMETERS);

          }

          fprintf( tables_out, action_desc.act_suffix, i, yyr2[i] - 1 );

      }

      fprintf( tables_out, "%s", action_desc.postamble );

      if ( action_desc.cleanup ) {

          action_desc.cleanup( &action_desc );

      }

  }

@*2Constants.
@<Parser constants@>=
  _register_const_d(YYEMPTY)@;
  _register_const_d(YYPACT_NINF)@;
  _register_const_d(YYEOF)@;
  _register_const_d(YYLAST)@;
  _register_const_d(YYNTOKENS)@;
  _register_const_d(YYNRULES)@;
  _register_const_d(YYNSTATES)@;
  _register_const_d(YYFINAL)@;

@*2Tokens.
Similar techniques are employed in token output. Tokens are parser
specific (the scanner only needs their numeric values) so we need {\it
some\/} flexibility to output them in a desired format. For special
purposes (say changing the way tokens are typeset) we can control the
format tokens are output in.
@<Variables and types local to the parser@>=
  char *token_format_char = NULL;
  char *token_format_affix = NULL;
  char *token_format_suffix = NULL;
  char *bootstrap_token_format = NULL;

@ @<Parser specific option list@>= 
  _register_option("token-format-char", required_argument, 0, TOKEN_FORMAT_CHAR, "")@;
  _register_option("token-format-affix", required_argument, 0, TOKEN_FORMAT_AFFIX, "")@;
  _register_option("token-format-suffix", required_argument, 0, TOKEN_FORMAT_SUFFIX, "")@;
  _register_option("bootstrap-token-format", required_argument, 0, BOOTSTRAP_TOKEN_FORMAT, "")@;

@ @<Higher index parser specific options@>=
  TOKEN_FORMAT_CHAR,@[@]
  TOKEN_FORMAT_AFFIX,@[@]
  TOKEN_FORMAT_SUFFIX,@[@]
  BOOTSTRAP_TOKEN_FORMAT,@[@]

@ @<Handle parser output options@>=
  case TOKEN_FORMAT_CHAR:@;
      token_format_char = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) );
      strcpy(token_format_char, optarg);
      break;

  case TOKEN_FORMAT_AFFIX:@;
      token_format_affix = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) );
      strcpy(token_format_affix, optarg);
      break;

  case TOKEN_FORMAT_SUFFIX:@;
      token_format_suffix = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) );
      strcpy(token_format_suffix, optarg);
      break;

  case BOOTSTRAP_TOKEN_FORMAT:@;
      bootstrap_token_format = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) );
      strcpy(bootstrap_token_format, optarg);
      break;

@ @<Parser specific output descriptor fields@>=
  bool output_tokens:1;

@ No tokens are output by default.
@<Parser specific default outputs@>=
  @[@].output_tokens = 0,@[@]

@ The only part of the code below that needs any explanation is the
`bootstrap' token output. In \bison\ every token has three attributes:
its `macro name' (say, \.{STRING}) that is used by the parse code
internally, its `print name' (\.{"string"} to continue the example)
that \bison\ uses to print the token names in its diagnostic messages,
and its numeric value (that can be assigned implicitly by \bison\
itself or explicitly by the user). Only the `print names' are kept in
the |yytname| array so to reuse the scanner used by \bison\ we either
have to extract the token `macro names' from the \Cee\ code ourselves
to pass them on to the lexer, or use a special `stripped down' version
of a \bison\ grammar parser to extract the names from the parser's
\bison\ grammar. To do this, some token names would still need to be
known to the scanner. These tokens are selected by hand to make the
`bootstrapping' parser operational. The token list for the \bison\
grammar parser can be examined as part of the appropriate
\locallink{bootstraptokens}driver file\endlink.
@<Output parser tokens@>=
  if ( output_desc.output_tokens ) {

      int i;
      int length;
      char token;
      char *token_name;
      bool too_creative = false;

      for ( i = 258; i < sizeof(yytranslate)/sizeof(yytranslate[0]) ; i++ ) {

          token_name = yytname[yytranslate[i]];

          if ( token_name ) {

              fprintf( tables_out, token_format_affix, yytranslate[i], i );

              length = 0;

              while ( (token = *token_name) ) {

                  if ( token_format_char ) {

                      length += fprintf( tables_out, token_format_char, (unsigned int)token );

                  }  

                  if ( token < 040 || token == 0177 ) {

                      too_creative = true;

                  }
 
                  token_name++;
              }

              fprintf( tables_out, token_format_suffix, too_creative ? ".unprintable." : yytname[yytranslate[i]] );

          }
      }
  }

#ifdef BISON_BOOTSTRAP_MODE
    fprintf( tables_out, "\\bootstrapmodetrue\n" );
    fprintf( tables_out, "%% token values needed to bootstrap the parser\n" );
    bootstrap_tokens( bootstrap_token_format );
#endif

@ The size of the token name table is useful to determine, say, how
many `named' tokens the parser uses.
@<Output parser constants@>=
  fprintf( tables_out, "\\constset{YYTRANSLATESIZE}{%d}%%\n", (int)(sizeof(yytranslate)/sizeof(yytranslate[0])) );

@*2Output modes.
The code below can be easily extended and modified to output parser
tables, actions, and constants in a language of one's choice. We are
only interested in \TeX, however, thus other modes are very
rudimentary or non-existent at this point.

@*3 Token only mode. 
Token only output mode does exactly what is expected: outputs token
names and values in the format of your choosing.

@<Parser specific output modes@>=
  TOKEN_ONLY_OUT,@[@]

@ @<Handle parser related output modes@>=
  case TOKEN_ONLY_OUT:@;
      @<Prepare token only output environment@>@;
      break;

@ @<Parser specific option list@>= 
  _register_option("token-only-mode", no_argument, 0, TOKEN_ONLY_MODE, "")@;

@ @<Higher index parser specific options@>=
  TOKEN_ONLY_MODE,@[@]

@ @<Configure parser output modes@>=
  case TOKEN_ONLY_MODE:@;
      mode = TOKEN_ONLY_OUT;
      break;

@ @<Prepare token only output environment@>=
  if ( !token_format_char ) {

      token_format_char = "{%u}";

  }

  if ( !token_format_affix ) {

      token_format_affix = "%% token: %d, token value: %d\n\\prettytoken@@{";

  }

  if ( !token_format_suffix ) {

      token_format_suffix = "}%% %s\n";

  }

  output_desc.output_tokens = 1;

@*3Generic output. Generic output is not programmed yet.

@<Parser specific output modes@>=
  GENERIC_OUT,@[@]

@ @<Handle parser related output modes@>=
  case GENERIC_OUT:@;
      printf( "This mode is not supported yet\n" );
      exit(0);
      break;

@*3\TeX\ output. The \TeX\ mode is the main reason for this software.

@<Parser specific output modes@>=
  TEX_OUT,@[@]

@ @<Handle parser related output modes@>=
  case TEX_OUT:@;
      @<Set up \TeX\ table output for parser tables@>@;
      @<Prepare \TeX\ format for semantic action output@>@;
      @<Prepare \TeX\ format for parser constants@>@;
      @<Prepare \TeX\ format for parser tokens@>@;
      break;

@ \TeX\ tables. We begin with a few macros to facilitate the output of tables in the
format that \TeX\ can understand. There is really no good way to represent an array in \TeX\ so 
a rather weak compromise was chosen. Further explanation of this choice is given in the \TeX\ file
that implements the \TeX\ parser for the \bison\ input grammar. 
Some tables require name adjustments due to \TeX's
reluctance to treat digits as part of a name.
 
@<Set up \TeX\ table output for parser tables@>=
#define _register_table_d(name) tex_table(name);
  @<Table names@>@;
#undef _register_table_d

  yyr1_desc.name = "yyrone";
  yyr2_desc.name = "yyrtwo";

@ The memory allocated for the |yytname| table is released at the end.
@<Helper functions declarations for for parser output@>=
  void yytname_cleanup( struct table_d *table );
  int yytname_formatter_tex( FILE *stream, int index );
  int yytname_formatter( FILE *stream, int index );

@ There are a number of helper functions to output complicated names
in \TeX. The safest way seems to be to output a name as a sequence of
its {\sc ASCII} codes to accommodate names like \.{\$}\.{end}
safely. \TeX's \.{\^\^}$\ldots$ convention is supported as well.
@<Helper functions for parser output@>=
  void yytname_cleanup( struct table_d *table ) {

      free( table->separator );
      free( table->null );

  }

  int yytname_formatter_tex( FILE *stream, int index ) {

      char *token_name = yytname[index];
      unsigned char token;
      int length = 0;

      fprintf( stream, "\\addname " );

      while ( (token = *token_name) ) {

          if ( token < 040 || token == 0177 ) { /* unprintable characters */

              fprintf( stream, "^^%c", token < 0100 ? (unsigned char)(token + 0100) : (unsigned char)(token - 100) );
              length += 3;

          } else {

              fprintf( stream, "%c", token );
              length++;

          }

          token_name++;

      }
      fprintf( stream, "\n" );

      return length;

  }

  int yytname_formatter( FILE *stream, int index ) {

      char *token_name;
      unsigned char token;
      int length = 0;
      bool too_creative = false; /* to indicate if the name is too dangerous to print */ 

      fprintf( stream, "\\addname" );

      if ( index >= 0 ) { /* this is not the last name */

          token_name = yytname[index];

          if ( token_name == NULL ) {

              token_name = "$impossible";

          }

          while ( (token = *token_name) ) {

              length += fprintf( stream, "{%u}", (unsigned int)token );

              if ( token < 040 || token == 0177 ) {

                  too_creative = true;

              }

              token_name++;

          }

          fprintf( stream, "%% %s\n", too_creative ? ".unprintable." : yytname[index] );

      } else { /* this is the last name */

          token_name = yytname[-index];

          if ( token_name == NULL ) {

              token_name = "$impossible";

          }

          while ( (token = *token_name) ) {

              length += fprintf( stream, "{%u}", (unsigned int)token );
              token_name++;

              if ( token < 040 || token == 0177 ) {

                  too_creative = true;

              }

          }

          fprintf( stream, "%% %s\n\\end\n%%\n", too_creative ? ".unprintable." : 
                                                              ( yytname[-index] ? yytname[-index] : "end of array" ) );

      }

      return length;

  }

@ @<Set up \TeX\ table output for parser tables@>=
  yytname_desc.preamble = "%%\n\\newtable{yytname}{}\\tempca0\\relax%% a robust way to add the yytname array\n";
  yytname_desc.separator = NULL;  
  yytname_desc.postamble = NULL;
  yytname_desc.null = NULL;
  yytname_desc.null_postamble = NULL;
  yytname_desc.prettify = false;
  yytname_desc.formatter = yytname_formatter;

  yytname_desc.cleanup = NULL;

  output_desc.output_yytname = 1;

@ @<Prepare \TeX\ format for semantic action output@>=

  if ( optimize_actions ) {

      action_desc.preamble  = "%\n% the big switch\n%\n"@/
                              "\\catcode`\\/=0\\relax % see the documentation for an explanation of this trick\n"@/
                              "\\def\\yybigswitch#1{%%\n"@/
                              "    \\csname dobisonaction\\number #1\\parsernamespace\\endcsname\n"@;
                              "}\\stashswitch{yybigswitch}%%\n";
      action_desc.act_setup = "\n\\expandafter\\def\\csname dobisonaction%d\\parsernamespace\\endcsname{%%\n%%";
      action_desc.act_suffix = "}%% end of rule %d\n";
      action_desc.action1   = NULL;
      action_desc.actionn   = NULL;
      action_desc.postamble = "\n\\catcode`\\/=12\\relax\n\n";
      action_desc.print_rule = print_rule;
      action_desc.cleanup = NULL;
      output_desc.output_actions = 1;

  } else {

      action_desc.preamble  = "%\n% the big switch\n%\n"@/
                              "\\catcode`\\/=0\\relax % see the documentation for an explanation of this trick\n"@/
                              "\\def\\yybigswitch#1{%%\n"@;
                              "  \\ifcase#1\\relax\n";
      action_desc.act_setup = "      \\or %% (rule %d) ";
      action_desc.act_suffix = "";
      action_desc.action1   = NULL;
      action_desc.actionn   = NULL;
      action_desc.postamble = "  \\else\n  \\fi\n}\\stashswitch{yybigswitch}%%\n\\catcode`\\/=12\\relax\n\n";
      action_desc.print_rule = print_rule;
      action_desc.cleanup = NULL;
      output_desc.output_actions = 1;

}

@ Grammar rules are listed in a readable form alongside the action
code to make it possible to quickly find an appropriate action.
@<Helper functions for parser output@>=
  void print_rule( int n ) {

      int i;

      fprintf( tables_out, "%s%s:  ", (n < 10 && !optimize_actions ? " " : ""), yytname[yyr1[n]] );

      i = yyprhs[n];

      if ( yyrhs[i] < 0 ) {

          fprintf( tables_out, "<empty>" );

      } else {

          while( yyrhs[i] > 0 ) {

              fprintf( tables_out, "%s ", yytname[yyrhs[i]] );
              i++;

          }

      }

      fprintf( tables_out, "\n" );

  }

@ \TeX\ constant output is another place where the techniques described above are applied.
As before, the macro handles the repetitive work of initialization, declaration, etc in 
each place where the corresponding constant is mentioned. The one exception is \.{YYPACT\_NINF}, 
which has to be handled separately because the underscore in its name makes it difficult to
use it as a command sequence name.
\def\YYPACTxNINFxdesc{\.{YYPACT\_NINF\_}\\{desc}}

@s YYPACT_NINF_desc TeX

@<Prepare \TeX\ format for parser constants@>=
#define _register_const_d(c_name) @[c_name##_desc.format = "\\constset{%s}{%d}%%\n"; \
                                    c_name##_desc.name = #c_name; \
                                    output_desc.output_##c_name = 1;@]
  @<Parser constants@>@;
#undef _register_const_d
YYPACT_NINF_desc.name = "YYPACTNINF";

@ Token definitions round off the \TeX\ output mode.

@<Prepare \TeX\ format for parser tokens@>=

    token_format_char = NULL; /* do not output individual characters */

    if ( !token_format_affix ) {

        token_format_affix = "\\tokenset{%d}{%d}";

    }

    if ( !token_format_suffix ) {

        token_format_suffix = "%% %s\n";

    }


    if ( !bootstrap_token_format ) {

        bootstrap_token_format = "\\expandafter\\def\\csname token\\parsernamespace %s\\endcsname{%d}%% %s\n";

    }

    /* |output_desc.output_tokens = 1;| is no longer necessary as it is done entirely in \TeX */

@*2 Command line options.
We start with the most obvious option, the one begging for help.

@<Higher index parser specific options@>=
  LONG_HELP,@[@]

@ @<Parser specific option list@>=
  _register_option("help", no_argument, 0, LONG_HELP, "")@;

@ @<Shortcuts for command line options affecting parser output@>=
  "h"

@ @<Handle parser output options@>=
  case 'h': /* short help */@;
    fprintf(stderr, "Usage: %s [options] output_file\n", argv[0]);
    exit(0);
    break; /* should not be needed */

  case LONG_HELP:@;
    fprintf(stderr, "%s [--mode=TeX:options] output_file outputs tables\n"
                    "    and constants for a TeX parser\n", argv[0]);
    exit(0);
    break; /* should not be needed */

@ @<Parser specific option list@>=
  _register_option("debug", optional_argument, 0, 'b', "")@;
  _register_option("mode", required_argument, 0, 'm', "")@;
  _register_option("table-separator", required_argument, 0, 'z', "")@;

  _register_option("format", required_argument, 0, 'f', "")@; /* name? */
  _register_option("table", required_argument, 0, 't', "")@; /* specific table */
  _register_option("constant", required_argument, 0, 'c', "")@; /* specific constant */
  _register_option("name-length", required_argument, 0, 'l', "")@; /* change |MAX_NAME_LENGTH| */
  _register_option("token", required_argument, 0, 'n', "")@; /* specific token */
  _register_option("run-parse", required_argument, 0, 'p', "")@; /* run the parser */
  _register_option("parse-file", required_argument, 0, 'i', "")@; /* input for the parser */

@ The string below is a list of short options.

@<Shortcuts for command line options affecting parser output@>=
  "z:m:f:t:"

@ A few options can be immediately discussed.

@<Variables and types local to the parser@>=
  char *table_separator = "%s ";

@ @<Handle parser output options@>=
  case 'm': /* output mode */@;
      switch( optarg[0] ) {

          case 'T':
          case 't':@;
              mode = TEX_OUT;
              break;

          case 'b':
          case 'B':
          case 'g':
          case 'G':@;
              mode = GENERIC_OUT;
              break;

          default:@;
              break;

      }
      break;

  case 'z':
      table_separator = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) );
      strcpy(table_separator, optarg);
      break;
