Reports

It seems to work with the hack:

n.l

%option noyywrap nounput noinput batch debug
%x l8 retnl

%{

#include "parse.h"

%}


id    [a-zA-Z][a-zA-Z_0-9]*
int   [0-9]+
blank [ \t\r]

%%

<INITIAL>.|\n       {BEGIN l8; yyless(0); }

<retnl>[\n]           {return '\n';}
<l8>[\n]              { }
<l8,retnl>[ \t]       { }
<l8,retnl>[#][^\n]*   { }

<l8,retnl>fun         { return FUNC;   }

<l8,retnl>"{"       {return '{';}
<l8,retnl>"}"       {return '}';}
<l8,retnl>"("       {return '(';}
<l8,retnl>")"       {return ')';}
<l8,retnl>"+"       {return '+';}
<l8,retnl>";"       {return ';';}

<l8,retnl>{id}        {return ID; }
<l8,retnl>{int}       {return NUM; }

%%

I define 2 states l8 and retnl. l8 will swollow '\n' and retnl will return '\n'.

Now in the grammar I do: n.y:

%{
#define YYDEBUG 1

%}

%code requires {


extern int yy_start;
#define retnl 2

extern enum yytokentype yylex();
  extern void yyerror(const char* errmsg);
  extern void yyerrorf(const char* format, ...);

}

%expect 0
//          %define api.pure
//          %locations
%define parse.trace
%verbose
%header
%define parse.error verbose


%token  FUNC

%token  ID NUM

%left  '+'

%%
%start unit;

unit: stmts

stmts:
        stmt                       {}
    |   stmts  stmt                {}

stmt:
        expr D                     { yy_start = 1 + 2 * 1 /*state:l8*/; }
    ;

D:      ';'
    |   '\n'
    ;


expr:       expr '+' expr              {}
    |   primary                    {}
    ;

primary:
        NUM                        {}
    |   ID                         {}
    |   FUNC     '(' ')' '{' stmts { yy_start = 1 + 2 * 2 /*state:retnl*/; } '}' { }
    |   FUNC ID '('  ')' '{' stmts { yy_start = 1 + 2 * 2 /*state:retnl*/; } '}' {}
    ;

%%

void yyerror(const char* errmsg)
{
  printf("%s",errmsg);
}

To be able to access yy_start i need to add

sed -i -e 's/static int yy_start/int yy_start/' scan.c

in Makefile:

all:
    bison -rall -o parse.c n.y
    flex -o scan.c n.l
    sed -i -e 's/static int yy_start/int yy_start/' scan.c
    gcc  -g -c parse.c -o parse.o
    gcc  -g -c scan.c -o scan.o
    gcc  -g -c n.c -o n.o
    gcc  -g scan.o parse.o n.o -lc -o n.exe
    ./n.exe test.txt

The 2 lines yy_start = 1 + 2 * 1 /*state:l8*/ and yy_start = 1 + 2 * 2 /*state:retnl*/ come from the definition of BEGIN(l8) and BEGIN(retnl) if they would be used inside the flex grammar.

Does anybody know a more standard way of achieving this?

79318599