@q Copyright 2012-2022, Alexander Shibakov@>
@q This file is part of SPLinT@>

@q SPLinT is free software: you can redistribute it and/or modify@>
@q it under the terms of the GNU General Public License as published by@>
@q the Free Software Foundation, either version 3 of the License, or@>
@q (at your option) any later version.@>

@q SPLinT is distributed in the hope that it will be useful,@>
@q but WITHOUT ANY WARRANTY; without even the implied warranty of@>
@q MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the@>
@q GNU General Public License for more details.@>

@q You should have received a copy of the GNU General Public License@>
@q along with SPLinT.  If not, see <http://www.gnu.org/licenses/>.@>

@*1\eatone{Flex}\flex\ specific routines. The output of the scanner automaton
follows the steps similar to the ones taken during the parser output. 
The major difference is in the output of actions and constants. 
@*2 Tables.
As in the case of a parser we start with all the table names.
@<Scanner table names@>=
  _register_table_d(yy_accept)@;
  _register_table_d(yy_ec)@;
  _register_table_d(yy_meta)@;
  _register_table_d(yy_base)@;
  _register_table_d(yy_def)@;
  _register_table_d(yy_nxt)@;
  _register_table_d(yy_chk)@;

@*2Actions. The scanner function, |yylex()|, has been reverse
engineered to execute all portions of
the action code. The method chosen here makes sure that none of the
tables gets written past its last element.
@<Variables and types local to the scanner driver@>=
  int max_yybase_entry = 0;
  int max_yyaccept_entry = 0;
  int max_yynxt_entry = 0;
  int max_yy_ec_entry = 0;

@ The `exotic' scanner constants treated below are the constants used
to control the scanner code itself. Unfortunately they are not given
any names that can be used by the `driver' to output them in a simple
way. 
@<Compute exotic scanner constants@>=

  { 
      int i;

      for ( i = 0; i < sizeof( yy_base )/sizeof( yy_base[0] ); i++ ) {

          if ( yy_base[i] > max_yybase_entry ) {

              max_yybase_entry = yy_base[i];

          }

      }

      for ( i = 0; i < sizeof( yy_nxt )/sizeof( yy_nxt[0] ); i++ ) {

          if ( yy_nxt[i] > max_yynxt_entry ) {

              max_yynxt_entry = yy_nxt[i];

          }

      }

      for ( i = 0; i < sizeof( yy_accept )/sizeof( yy_accept[0] ); i++ ) {

          if ( yy_accept[i] > max_yyaccept_entry ) {

              max_yyaccept_entry = yy_accept[i];

          }
      }

      for ( i = 0; i < sizeof( yy_ec )/sizeof( yy_ec[0] ); i++ ) {

          if ( yy_ec[i] > max_yy_ec_entry ) {

              max_yy_ec_entry = yy_ec[i];

          }

      }

  }

@ @<Output scanner actions@>=
  if ( output_desc.output_actions ) {
  
      int i, j;
      yyscan_t fake_scanner;

      fprintf( tables_out, "%s", action_desc.preamble );

      if ( !bare_actions ) {

          if ( yylex_init( &fake_scanner ) ) {

              printf( "Cannot initialize the scanner\n" );

          }

          yy_ec[0] = 0;
          yy_base[1] = max_yybase_entry;

          yy_base[2] = 0;
          yy_chk[0] = 2;

          yy_chk[max_yybase_entry] = 1;
          yy_nxt[max_yybase_entry] = 1;

          yy_nxt[0] = 1;

          fprintf( stderr, "max entry: %d\n", max_yybase_entry );

      }

      
      for ( i = 1; i <= max_yyaccept_entry; i++ ) {

          fprintf( tables_out, action_desc.act_setup, i );
      
          if ( i == YY_END_OF_BUFFER ) {

              fprintf( tables_out, " %% YY_END_OF_BUFFER\n%s\n", "          \\yylexeofaction" );

          } else {

              fprintf( tables_out, "\n" );

              if ( !bare_actions ) {

                  (( struct yyguts_t *)fake_scanner)->yy_hold_char = 0;
                  yy_accept[1] = i;
                  if ( i%10 == 0 ) {
                      fprintf( stderr, "." );
                  }
                  yylex( NULL, fake_scanner );
              }
          }

          fprintf( tables_out, action_desc.act_suffix, i );

      }
  
      fprintf( tables_out, "      %% end of file states:\n%s\n", 
                           "      %#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)"
      );

      if ( max_eof_state == 0 ) { /* in case the user has not declared any states */

          max_eof_state = YY_STATE_EOF( INITIAL );

      }

      for ( ; i <= max_eof_state; i++ ) {

          fprintf( tables_out, action_desc.act_setup, i );

          if ( !bare_actions ) {

              fprintf( tables_out, "\n" );

              (( struct yyguts_t *)fake_scanner)->yy_hold_char = 0;
              yy_accept[1] = i;
              yylex( NULL, fake_scanner );

          }

          fprintf( tables_out, action_desc.act_suffix, i );

      }

      fprintf( tables_out, "%s", action_desc.postamble );

      if ( action_desc.cleanup ) {

          action_desc.cleanup( &action_desc );

      }

  }

  @<Compute magic constants@>@;
  @<Output states@>;
  fprintf( tables_out, "\\constset{YYECMAGIC}{%d}%%\n", yy_ec_magic );
  fprintf( tables_out, "\\constset{YYMAXEOFSTATE}{%d}%%\n", max_eof_state );

@ @<Error codes@>=
  BAD_SCANNER,@[@]

@ @<Variables and types local to the scanner driver@>=
  int yy_ec_magic; 

@ The `magic' constants are similar to the `exotic' ones mentioned
above except the methods used to compute them rely on reverse
engineering the scanner function. Since this changes the scanner
tables it has to be done after the `driver' has finished going through all
the actions.
@<Compute magic constants@>=
  {
      int i, j;
      char fake_yytext[ YY_MORE_ADJ + 1 ];      

      yyscan_t yyscanner;
      struct yyguts_t *yyg;

      if ( yylex_init( &yyscanner ) ) {

          printf( "Cannot initialize the scanner\n" );
          exit( BAD_SCANNER );

      }

      yyg = (struct yyguts_t *)yyscanner;
      yyg->yy_start = 0;
      yy_set_bol(0);
      yyg->yytext_ptr = fake_yytext;
      yyg->yy_c_buf_p = yyg->yytext_ptr + 1 + YY_MORE_ADJ;

      fake_yytext[YY_MORE_ADJ] = 0; /* |*yy_cp = 0;| */

      yy_accept[0] = 0;
      yy_base[0] = 0;

      for ( i = 0; i < sizeof( yy_chk )/sizeof( yy_chk[0] ); i++ ) {

          yy_chk[i] = 0;

      }

      for ( i = 0; i < sizeof( yy_nxt )/sizeof( yy_nxt[0] ); i++ ) {

          yy_nxt[i] = i;

      }

      yy_ec_magic = yy_get_previous_state( yyscanner );

  }

@*2State names. There is no easy way to output the symbolic names for
states, so this has to be done by hand while the actions are output. The
state names are accumulated in a list structure and are printed out
after the action output is complete.

Note that parsing the scanner file is only partially helpful (even though the
extended parser and scanner can recognize the \.{\%x} option). All that can
be done is output the state {\it names\/} but not their numerical
values, since all such names are macros whose values are only
known to the  \flex\ generated scanner.

@d Define_State( st_name, st_num ) do {

    struct lexer_state_d *this_state;

    this_state = malloc( sizeof(struct lexer_state_d) );
    this_state->name = st_name;
    this_state->value = st_num;
    this_state->next = NULL;

    if ( last_state ) {

        last_state->next = this_state;
        last_state = this_state;

    } else {

        last_state = state_list = this_state;

    }

    if ( YY_STATE_EOF( st_num ) > max_eof_state ) {

        max_eof_state = YY_STATE_EOF( st_num );

    }

} while (0);

@<Scanner variables and types for \Cee\ preamble@>=
  int max_eof_state = 0;

  struct lexer_state_d {
      
      char *name;
      int value;
      struct lexer_state_d *next;

  };

  struct lexer_state_d *state_list = NULL;
  struct lexer_state_d *last_state = NULL;

@ @<Output states@>=
  {

      struct lexer_state_d *current_state;
      struct lexer_state_d *next_state;

      current_state = next_state = state_list;

      if ( current_state ) {

          fprintf( tables_out, "\\def\\setflexstates{%%\n" 
                               "  \\stateset{INITIAL}{%d}%%\n", INITIAL );

          while ( current_state ) {

              fprintf( tables_out, "  \\stateset{%s}{%d}%%\n",
                       current_state->name, current_state->value);

              current_state = current_state->next;

              free( next_state );
              next_state = current_state; /* the |name| field is not
                                             deallocated because it is not 
                                             allocated on the heap */

          }

          fprintf( tables_out, "}%%\n%%\n" );

      }

  }

@*2Constants.
The few hard coded constants needed for the lexer to work are listed here.
@<Scanner constants@>=
  _register_const_d(YY_END_OF_BUFFER_CHAR)@;
  _register_const_d(YY_NUM_RULES)@;
  _register_const_d(YY_END_OF_BUFFER)@;

@*2Output modes.
The output modes are the same as those in the parser driver with some minor
changes.

@*3Generic output. Generic output is not programmed yet.
@<Scanner specific output modes@>=
  GENERIC_OUT,@[@]

@ @<Handle scanner output modes@>=
  case GENERIC_OUT:@;
      printf( "This mode is not supported yet\n" );
      exit(0);
      break;

@*3\TeX~mode. The \TeX\ mode is the main focus of this software.
@<Scanner specific output modes@>=
  TEX_OUT,@[@]

@ @<Handle scanner output modes@>=
  case TEX_OUT:@;
      @<Set up \TeX\ format for scanner tables@>@;
      @<Set up \TeX\ format for scanner actions@>@;
      @<Prepare \TeX\ format for scanner constants@>@;
      break;

@ @<Set up \TeX\ format for scanner tables@>=
  tex_table_generic(yy_accept);
  yy_accept_desc.name = "yyaccept";
  tex_table_generic(yy_ec);
  yy_ec_desc.name = "yyec";
  tex_table_generic(yy_meta);
  yy_meta_desc.name = "yymeta";
  tex_table_generic(yy_base);
  yy_base_desc.name = "yybase";
  tex_table_generic(yy_def);
  yy_def_desc.name = "yydef";
  tex_table_generic(yy_nxt);
  yy_nxt_desc.name = "yynxt";
  tex_table_generic(yy_chk);
  yy_chk_desc.name = "yychk";

@ @<Set up \TeX\ format for scanner actions@>=

  if ( optimize_actions ) {

      action_desc.preamble  = "%\n% the big switch\n%\n"@/
                              "\\catcode`\\/=0\\relax\n%\n"@/
                              "\\def\\yydoactionswitch#1{%%\n"@/
                              "    \\let\\yylextail\\yylexcontinue\n"@/
                              "    \\csname doflexaction\\number #1\\parsernamespace\\endcsname\n"@/
                              "    \\yylextail\n"@;
                              "}\\stashswitch{yydoactionswitch}%\n";
      action_desc.act_setup = "\n\\expandafter\\def\\csname doflexaction%d\\parsernamespace\\endcsname{%%";
      action_desc.act_suffix = "}%% end of rule %d\n";
      action_desc.action1   = NULL;
      action_desc.actionn   = NULL;
      action_desc.postamble = "\\catcode`\\/=12\\relax\n%\n";
      action_desc.print_rule = NULL;
      action_desc.cleanup = NULL;
      output_desc.output_actions = 1;

  } else {

      action_desc.preamble  = "%\n% the big switch\n%\n"@/
                              "\\catcode`\\/=0\\relax\n%\n"@/
                              "\\def\\yydoactionswitch#1{%%\n  \\let\\yylextail\\yylexcontinue\n"@;
                              "  \\ifcase#1\\relax\n";
      action_desc.act_setup = "      \\or\n"
                              "      \\YYRULESETUP %% (rule %d) ";
      action_desc.act_suffix = "      %% end of rule %d\n";
      action_desc.action1   = NULL;
      action_desc.actionn   = NULL;
      action_desc.postamble = "  \\else\n  \\fi\n  \\yylextail\n}\\stashswitch{yydoactionswitch}%\n\\catcode`\\/=12\\relax\n%\n";
      action_desc.print_rule = NULL;
      action_desc.cleanup = NULL;
      output_desc.output_actions = 1;

  }

@ \TeX\ constant output is another place where the techniques described above are applied. A few names 
are handled separately, because they contain underscores.
\def\YYxENDxOFxBUFFERxCHARxdesc{\.{YY\_END\_OF\_BUFFER\_CHAR\_}\\{desc}}
\def\YYxNUMxRULESxdesc{\.{YY\_NUM\_RULES\_}\\{desc}}
\def\YYxENDxOFxBUFFERxdesc{\.{YY\_END\_OF\_BUFFER\_}\\{desc}}

@s YY_END_OF_BUFFER_CHAR_desc TeX
@s YY_NUM_RULES_desc          TeX
@s YY_END_OF_BUFFER_desc      TeX

@<Prepare \TeX\ format for scanner constants@>=
#define _register_const_d(c_name) @[c_name##_desc.format = "\\constset{%s}{%d}%%\n"; \
                                    c_name##_desc.name =  #c_name; \
                                    c_name##_desc.value = c_name; \
                                    output_desc.output_##c_name = 1;@]
  @<Scanner constants@>@;
#undef _register_const_d

  YY_END_OF_BUFFER_CHAR_desc.name = "YYENDOFBUFFERCHAR";
  YY_NUM_RULES_desc.name = "YYNUMRULES";
  YY_END_OF_BUFFER_desc.name = "YYENDOFBUFFER";

@ @<Output exotic scanner constants@>=
  fprintf( tables_out, "\\constset{YYMAXREALCHAR}{%ld}%%\n", sizeof( yy_accept )/(sizeof( yy_accept[0] )) - 1 );
  fprintf( tables_out, "\\constset{YYBASEMAXENTRY}{%d}%%\n", max_yybase_entry );
  fprintf( tables_out, "\\constset{YYNXTMAXENTRY}{%d}%%\n", max_yynxt_entry );
  fprintf( tables_out, "\\constset{YYMAXRULENO}{%d}%%\n", max_yyaccept_entry );
  fprintf( tables_out, "\\constset{YYECMAXENTRY}{%d}%%\n", max_yy_ec_entry );

@*2 Command line options.
We start with the most obvious option, the one begging for help.

@ @<Scanner specific options without shortcuts@>=
  register_option_("help", no_argument, 0, LONG_HELP, "")@;

@ @<Shortcuts for command line options affecting scanner output@>=
  @[@[@], 'h'@]

@ @<Handle scanner output options@>=
  case 'h': /* short help */@;
    fprintf(stderr, "Usage: %s [options] output_file\n", argv[0]);
    exit(0);
    break; /* should not be needed */

  case LONG_HELP:@;
    fprintf(stderr, "%s [--mode=TeX:options] output_file outputs tables\n"
                    "    and constants for a TeX scanner\n", argv[0]);
    exit(0);
    break; /* should not be needed */

@ @<Scanner specific options with shortcuts@>=
  register_option_("debug", optional_argument, 0, 'b', "")@;
  register_option_("mode", required_argument, 0, 'm', "")@;
  register_option_("table-separator", required_argument, 0, 'z', "")@;

  register_option_("format", required_argument, 0, 'f', "")@; /* name? */
  register_option_("table", required_argument, 0, 't', "")@; /* specific table */
  register_option_("constant", required_argument, 0, 'c', "")@; /* specific constant */
  register_option_("name-length", required_argument, 0, 'l', "")@; /* change |MAX_NAME_LENGTH| */
  register_option_("token", required_argument, 0, 'n', "")@; /* specific token */
  register_option_("run-scan", required_argument, 0, 'p', "")@; /* run the scanner */
  register_option_("scan-file", required_argument, 0, 'i', "")@; /* input for the scanner */

@ A few options can be immediately discussed.
@<Variables and types local to the scanner driver@>=
  int debug_level = 0;
  char *table_separator = "%s ";

@ @<Handle scanner output options@>=
  case 'b': /* debug (level) */@;
       debug_level = optarg ? atoi(optarg) : 1;
       break;      
      
  case 'm': /* output mode */@;
      switch( optarg[0] ) {

          case 'T':
          case 't':@;
              mode = TEX_OUT;
              break;

          case 'b':
          case 'B':
          case 'g':
          case 'G':@;
              mode = GENERIC_OUT;
              break;

          default:@;
              break;

      }
      break;

  case 'z':
      table_separator = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) );
      strcpy(table_separator, optarg);
      break;