/* scanner.c - support for re2c source files * Copyright (c) 2002, 2003, Ed L. Cashin * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of the * License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * */ #include #include #include #include #include #include "scanner.h" #ifdef DMALLOC #include #endif typedef unsigned char uchar; #define YYCHAR uchar #ifndef SCANNER_BUFSIZ /* #define SCANNER_BUFSIZ 8192 */ #define SCANNER_BUFSIZ 1024 #endif #define YYCTYPE YYCHAR void scanner_init(scanner *s) { s->fd = STDIN_FILENO; /* stdin is default. user may change * fd directly. */ s->pos = s->cursor = s->ptr = s->limit = s->token = s->buf_begin = 0; s->eof = s->buf_end = 0; s->line = 1; } void scanner_init_for_buf(scanner *s, char *buf, int len) { s->fd = -1; s->pos = s->cursor = s->ptr = s->limit = s->token = s->buf_begin = buf; s->eof = s->buf_end = s->buf_begin + len; } void scanner_destroy(scanner *s) { char *p = s->buf_begin; if (p) free(p); } /* called by re2c-generate code when the buffer has been read and the lexer is * ready to process more input. */ uchar *scanner_fill(scanner *s, uchar *cursor) { int count; if (s->eof) return cursor; /* move any existing token to the beginning of the buffer */ if ( (count = s->token - s->buf_begin) ) { memmove(s->buf_begin, s->token, s->limit - s->token); s->token = s->buf_begin; s->ptr -= count; cursor -= count; s->pos -= count; s->limit -= count; } /* increase buffer size if necessary */ if ((s->buf_end - s->limit) < SCANNER_BUFSIZ) { uchar *buf = (uchar *) malloc(((s->limit - s->buf_begin) + SCANNER_BUFSIZ) * sizeof(uchar)); if (! buf) { fprintf(stderr, __FUNCTION__ " Error on malloc: %s", strerror(errno)); exit(EXIT_FAILURE); } memcpy(buf, s->token, s->limit - s->token); s->token = buf; s->ptr = &buf[s->ptr - s->buf_begin]; cursor = &buf[cursor - s->buf_begin]; s->pos = &buf[s->pos - s->buf_begin]; s->limit = &buf[s->limit - s->buf_begin]; s->buf_end = &s->limit[SCANNER_BUFSIZ]; if (s->buf_begin) free(s->buf_begin); s->buf_begin = buf; } /* reading less than SCANNER_BUFSIZ doesn't necessarily mean EOF; * e.g., could be a line-buffered terminal. */ if ((count = read(s->fd, (char*) s->limit, SCANNER_BUFSIZ)) == 0) { s->eof = &s->limit[count]; *(s->eof)++ = '\n'; } s->limit += count; #ifdef DEBUG fputs("debug: scanner_fill buffer follows\n", stderr); if ("debug") { unsigned char *p; for (p = s->buf_begin; p != s->limit; ++p) putc(*p, stderr); } #endif /* DEBUG */ return cursor; } #if 0 /* example scanner finds words and numbers. * * usually gotos are "a Bad Thing", but an * re2c source is different from a C source. */ int lex(scanner *s) { uchar *cursor = s->cursor; #define YYCURSOR cursor #define YYLIMIT s->limit #define YYMARKER s->ptr #define YYFILL(n) do { cursor = fill(s, cursor); } while (0) #define RET(i) do { s->cursor = cursor; return i; } while (0) #ifdef OBSOLETE_DEBUG #define SHOW(str) do { \ uchar *show_pointer; \ fprintf(stderr, "debug: on line %d found a %s: %.*s\n", \ s->line, str, cursor - s->token, s->token); \ } while (0) #else #define SHOW(str) do { } while (0) #endif /* DEBUG */ /*!re2c alpha = [a-zA-Z]; num = [0-9]; any = [\000-\377]; */ std: s->token = cursor; /*!re2c alpha (alpha|num)* { SHOW("word"); RET(WORD); } [ \t\v\f]+ { goto std; } "\n" { if (cursor == s->eof) RET(EOI); s->pos = cursor; ++(s->line); goto std; } any { SHOW("char"); goto std; } */ return 0; } #endif