79318599

Date: 2024-12-30 20:59:21
Score: 3.5
Natty:
Report link

It seems to work with the hack:

n.l

%option noyywrap nounput noinput batch debug
%x l8 retnl

%{

#include "parse.h"

%}


id    [a-zA-Z][a-zA-Z_0-9]*
int   [0-9]+
blank [ \t\r]

%%

<INITIAL>.|\n       {BEGIN l8; yyless(0); }

<retnl>[\n]           {return '\n';}
<l8>[\n]              { }
<l8,retnl>[ \t]       { }
<l8,retnl>[#][^\n]*   { }

<l8,retnl>fun         { return FUNC;   }

<l8,retnl>"{"       {return '{';}
<l8,retnl>"}"       {return '}';}
<l8,retnl>"("       {return '(';}
<l8,retnl>")"       {return ')';}
<l8,retnl>"+"       {return '+';}
<l8,retnl>";"       {return ';';}

<l8,retnl>{id}        {return ID; }
<l8,retnl>{int}       {return NUM; }

%%

I define 2 states l8 and retnl. l8 will swollow '\n' and retnl will return '\n'.

Now in the grammar I do: n.y:

%{
#define YYDEBUG 1

%}

%code requires {


extern int yy_start;
#define retnl 2

extern enum yytokentype yylex();
  extern void yyerror(const char* errmsg);
  extern void yyerrorf(const char* format, ...);

}

%expect 0
//          %define api.pure
//          %locations
%define parse.trace
%verbose
%header
%define parse.error verbose


%token  FUNC

%token  ID NUM

%left  '+'

%%
%start unit;

unit: stmts

stmts:
        stmt                       {}
    |   stmts  stmt                {}

stmt:
        expr D                     { yy_start = 1 + 2 * 1 /*state:l8*/; }
    ;

D:      ';'
    |   '\n'
    ;


expr:       expr '+' expr              {}
    |   primary                    {}
    ;

primary:
        NUM                        {}
    |   ID                         {}
    |   FUNC     '(' ')' '{' stmts { yy_start = 1 + 2 * 2 /*state:retnl*/; } '}' { }
    |   FUNC ID '('  ')' '{' stmts { yy_start = 1 + 2 * 2 /*state:retnl*/; } '}' {}
    ;

%%

void yyerror(const char* errmsg)
{
  printf("%s",errmsg);
}

To be able to access yy_start i need to add

sed -i -e 's/static int yy_start/int yy_start/' scan.c

in Makefile:

all:
    bison -rall -o parse.c n.y
    flex -o scan.c n.l
    sed -i -e 's/static int yy_start/int yy_start/' scan.c
    gcc  -g -c parse.c -o parse.o
    gcc  -g -c scan.c -o scan.o
    gcc  -g -c n.c -o n.o
    gcc  -g scan.o parse.o n.o -lc -o n.exe
    ./n.exe test.txt

The 2 lines yy_start = 1 + 2 * 1 /*state:l8*/ and yy_start = 1 + 2 * 2 /*state:retnl*/ come from the definition of BEGIN(l8) and BEGIN(retnl) if they would be used inside the flex grammar.

Does anybody know a more standard way of achieving this?

Reasons:
  • Blacklisted phrase (0.5): i need
  • Blacklisted phrase (1): anybody know
  • RegEx Blacklisted phrase (2): Does anybody know
  • Long answer (-1):
  • Has code block (-0.5):
  • Ends in question mark (2):
  • Self-answer (0.5):
  • High reputation (-1):
Posted by: Konrad Eisele