Fix transition table. Need to fix string literal read

This commit is contained in:
Victor Fernandes 2017-03-15 20:57:28 -04:00
parent 0b5fb5befa
commit b78f4a831d
5 changed files with 366 additions and 330 deletions

Binary file not shown.

View File

@ -91,6 +91,5 @@ char b_rflag(Buffer* const);
short b_retract(Buffer* const);
short b_retract_to_mark(Buffer* const);
short b_getcoffset(Buffer* const);
char* b_cbhead(Buffer* const);
#endif

View File

@ -9,12 +9,18 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "scanner", "scanner\scanner.
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
COMPILERS|x64 = COMPILERS|x64
COMPILERS|x86 = COMPILERS|x86
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x64.ActiveCfg = COMPILERS|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x64.Build.0 = COMPILERS|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x86.ActiveCfg = COMPILERS|Win32
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x86.Build.0 = COMPILERS|Win32
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x64.ActiveCfg = Debug|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x64.Build.0 = Debug|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x86.ActiveCfg = Debug|Win32
@ -23,6 +29,10 @@ Global
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x64.Build.0 = Release|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x86.ActiveCfg = Release|Win32
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x86.Build.0 = Release|Win32
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x64.ActiveCfg = COMPILERS|x64
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x64.Build.0 = COMPILERS|x64
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x86.ActiveCfg = COMPILERS|Win32
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x86.Build.0 = COMPILERS|Win32
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x64.ActiveCfg = Debug|x64
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x64.Build.0 = Debug|x64
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x86.ActiveCfg = Debug|Win32

620
scanner.c
View File

@ -10,11 +10,11 @@
* Date: 30 January 2017
*/
/* The #define _CRT_SECURE_NO_WARNINGS should be used in MS Visual Studio projects
* to suppress the warnings about using "unsafe" functions like fopen()
* and standard sting library functions defined in string.h.
* The define does not have any effect in Borland compiler projects.
*/
/* The #define _CRT_SECURE_NO_WARNINGS should be used in MS Visual Studio projects
* to suppress the warnings about using "unsafe" functions like fopen()
* and standard sting library functions defined in string.h.
* The define does not have any effect in Borland compiler projects.
*/
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h> /* standard input / output */
@ -24,7 +24,7 @@
#include <limits.h> /* integer types constants */
#include <float.h> /* floating-point types constants */
/*#define NDEBUG to suppress assert() call */
/*#define NDEBUG to suppress assert() call */
#include <assert.h> /* assert() prototype */
/* project header files */
@ -33,7 +33,7 @@
#include "table.h"
#define DEBUG /* for conditional processing */
#undef DEBUG
/*#undef DEBUG*/
@ -56,7 +56,7 @@ static int iskeyword(char * kw_lexeme); /*keywords lookup functuion */
static long atool(char * lexeme); /* converts octal string to decimal value */
int scanner_init(Buffer * sc_buf) {
if(b_isempty(sc_buf)) return EXIT_FAILURE;/*1*/
if (b_isempty(sc_buf)) return EXIT_FAILURE;/*1*/
/* in case the buffer has been read previously */
b_setmark(sc_buf, 0);
b_retract_to_mark(sc_buf);
@ -68,272 +68,301 @@ int scanner_init(Buffer * sc_buf) {
Token malar_next_token(Buffer * sc_buf)
{
Token t; /* token to return after recognition */
unsigned char c; /* input symbol */
int state = 0; /* initial state of the FSM */
short lexstart; /*start offset of a lexeme in the input buffer */
short lexend; /*end offset of a lexeme in the input buffer */
int accept = NOAS; /* type of state - initially not accepting */
/*
lexstart is the offset from the beginning of the char buffer of the
input buffer (sc_buf) to the first character of the current lexeme,
which is being processed by the scanner.
lexend is the offset from the beginning of the char buffer of the
input buffer (sc_buf) to the last character of the current lexeme,
which is being processed by the scanner.
Token t; /* token to return after recognition */
unsigned char c; /* input symbol */
int state = 0; /* initial state of the FSM */
short lexstart; /*start offset of a lexeme in the input buffer */
short lexend; /*end offset of a lexeme in the input buffer */
int accept = NOAS; /* type of state - initially not accepting */
/*
lexstart is the offset from the beginning of the char buffer of the
input buffer (sc_buf) to the first character of the current lexeme,
which is being processed by the scanner.
lexend is the offset from the beginning of the char buffer of the
input buffer (sc_buf) to the last character of the current lexeme,
which is being processed by the scanner.
*/
*/
//DECLARE YOUR VARIABLES HERE IF NEEDED
int i; /* Counter for loop in string error case */
static int str_offset = 0;
//DECLARE YOUR VARIABLES HERE IF NEEDED
int i; /* Counter for loop in string error case */
static int str_offset = 0;
if (sc_buf == NULL) {
return aa_func12("RUN TIME ERROR"); /* WHOOPS */
}
if (sc_buf == NULL) {
scerrnum = 1;
return aa_table[ES]("RUN TIME ERROR"); /* WHOOPS */
}
while (1){ /* endless loop broken by token returns it will generate a warning */
while (1) { /* endless loop broken by token returns it will generate a warning */
//GET THE NEXT SYMBOL FROM THE INPUT BUFFER
/* GET THE NEXT SYMBOL FROM THE INPUT BUFFER */
c = b_getc(sc_buf);
switch (c) {
case 255: t.code = SEOF_T; return t; /* EOF */
case '\0': t.code = SEOF_T; return t; /* Source EOF */
case '\n': line++; continue; /* Ignore new line, increment line count */
case '\r': line++; continue; /* CR, increment line count*/
case ' ': continue; /* Ignore white space */
case '\t': continue; /* Ignore tabs */
case ';': t.code = EOS_T; return t; /* End of statement */
case ',': t.code = COM_T; return t; /* Comma */
case '{': t.code = LBR_T; return t; /* Left brace */
case '}': t.code = RBR_T; return t; /* Right brace */
case '(': t.code = LPR_T; return t; /* Left parenthesis */
case ')': t.code = RPR_T; return t; /* Right parenthesis */
case '+': t.code = ART_OP_T; t.attribute.arr_op = PLUS; return t; /* Addition operator */
case '-': t.code = ART_OP_T; t.attribute.arr_op = MINUS; return t; /* Substraction operator */
case '*': t.code = ART_OP_T; t.attribute.arr_op = MULT; return t; /* Multiplication operator */
case '/': t.code = ART_OP_T; t.attribute.arr_op = DIV; return t; /* Devision operator */
case '>': t.code = REL_OP_T; t.attribute.rel_op = GT; return t; /* Greater-than relational operator */
case '<':
c = b_getc(sc_buf);
switch (c) {
case 255: t.code = SEOF_T; return t; /* EOF */
case '\0': t.code = SEOF_T; return t; /* Source EOF */
case '\n': line++; continue; /* Ignore new line, increment line count */
case '\r': line++; continue; /* CR, increment line count*/
case ' ': continue; /* Ignore white space */
case ';': t.code = EOS_T; return t; /* End of statement */
case ',': t.code = COM_T; return t; /* Comma */
case '{': t.code = RBR_T; return t; /* Right brace */
case '}': t.code = LBR_T; return t; /* Left brace */
case '(': t.code = RPR_T; return t; /* Right parenthesis */
case ')': t.code = LPR_T; return t; /* Left parenthesis */
case '+': t.code = ART_OP_T; t.attribute.arr_op = PLUS; return t; /* Addition operator */
case '-': t.code = ART_OP_T; t.attribute.arr_op = MINUS; return t; /* Substraction operator */
case '*': t.code = ART_OP_T; t.attribute.arr_op = MULT; return t; /* Multiplication operator */
case '/': t.code = ART_OP_T; t.attribute.arr_op = DIV; return t; /* Devision operator */
case '>': t.code = REL_OP_T; t.attribute.rel_op = GT; return t; /* Greater-than relational operator */
case '<':
/* MSVC will complain about this assignment inside a conditional expression*/
if (c = b_getc(sc_buf) == '>') {
t.code = REL_OP_T;
t.attribute.rel_op = NE; /* Negation operator */
return t;
}
else if (c == '<') {
t.code = SCC_OP_T; /* String concatenation operator */
}
else {
t.code = REL_OP_T;
t.attribute.rel_op = LT; /* Less-than operator */
}
if (c == '>') {
t.code = REL_OP_T;
t.attribute.rel_op = NE; /* Negation operator */
return t;
}
else if (c == '<') {
t.code = SCC_OP_T; /* String concatenation operator */
}
else {
t.code = REL_OP_T;
t.attribute.rel_op = LT; /* Less-than operator */
}
b_retract(sc_buf);
/*c = b_getc(sc_buf);*/
return t;
case '.':
b_setmark(sc_buf, b_getcoffset(sc_buf)); /* Set mark before continuing (AND|OR case) */
c = b_getc(sc_buf);
if (c == 'A' && b_getc(sc_buf) == 'N' && b_getc(sc_buf) == 'D' && b_getc(sc_buf) == '.') {
t.code = LOG_OP_T;
t.attribute.log_op = AND;
return t;
}
else if (c == 'O' && b_getc(sc_buf) == 'R' && b_getc(sc_buf) == '.') {
t.code = LOG_OP_T;
t.attribute.log_op = OR;
return t;
}
t.code = ERR_T; /* "That character's not supposed to be here" case */
t.attribute.err_lex[0] = '.';
t.attribute.err_lex[1] = '\0';
b_retract_to_mark(sc_buf);
return t;
case '!':
c = b_getc(sc_buf);
if (c == '<') { /* It's a comment line */
for (; c != '\0' && c != '\r' && c != '\n' && c != 255; c = b_getc(sc_buf)); /* Consume chars until line ends */
line++;
continue;
}
else { /* Bad character, pump out an error token */
b_retract(sc_buf);
b_retract(sc_buf);
t = aa_table[ES](" ");
t.attribute.err_lex[0] = c = b_getc(sc_buf);
t.attribute.err_lex[1] = c = b_getc(sc_buf);
b_retract(sc_buf);
c = b_getc(sc_buf);
return t;
case '.':
b_setmark(sc_buf, b_getcoffset(sc_buf)); /* Set mark before continuing (AND|OR case) */
if (c == 'A' && b_getc(sc_buf) == 'N' && b_getc(sc_buf) == 'D' && b_getc(sc_buf) == '.') {
t.code = LOG_OP_T;
t.attribute.log_op = AND;
return t;
}
else if (c == 'O' && b_getc(sc_buf) == 'R' && b_getc(sc_buf) == '.') {
t.code = LOG_OP_T;
t.attribute.log_op = OR;
}
t.code = ERR_T; /* "That character's not supposed to be here" case */
t.attribute.err_lex[0] = '.';
t.attribute.err_lex[1] = '\0';
b_retract_to_mark(sc_buf);
}
case '=':
c = b_getc(sc_buf);
if (c == '=') { /* Relational equals-to operator */
t.code = REL_OP_T;
t.attribute.rel_op = EQ;
return t;
case '!':
c = b_getc(sc_buf);
if (c == '<') { /* It's a comment line */
for (; c != '\0' && c != '\r' && c != '\n' && c != 255; c = b_getc(sc_buf)); /* Consume chars until line ends */
}
b_retract(sc_buf);
t.code = ASS_OP_T; /* Assignment operator */
return t;
case '\"': /* Don't quote me on this */
c = b_getc(sc_buf);
t.code = STR_T; /* String literal */
b_setmark(sc_buf, b_getcoffset(sc_buf));
lexstart = (short)str_offset;
lexend = lexstart;
for (; c != '\"'; c = b_getc(sc_buf)) {
b_addc(str_LTBL, c);
if (b_isfull(str_LTBL)) {
t = aa_table[ES]("\"There is always ..."); /* String too big :( */
}
if (c == '\n' || c == '\r') {
line++;
continue;
}
else { /* Bad character, pump out an error token */
t = aa_table[ES](" ");
t.attribute.err_lex[0] = c;
if (c == 255 || c == '\0') {
b_retract_to_mark(sc_buf);
t.code = ERR_T; /* Illegal string, make it an error token */
for (i = 0; i < ERR_LEN; i++) {
t.attribute.err_lex[i] = b_getc(sc_buf);
}
t.attribute.err_lex[i] = '\0';
return t;
}
case '=':
c = b_getc(sc_buf);
if (c == '=') { /* Relational equals-to operator */
t.code = REL_OP_T;
t.attribute.rel_op = EQ;
}
b_retract(sc_buf);
t.code = ASS_OP_T; /* Assignment operator */
return t;
case '\"': /* Don't quote me on this */
c = b_getc(sc_buf);
b_setmark(sc_buf, b_getcoffset(sc_buf));
lexstart = (short)str_offset;
lexend = lexstart;
for (; c != '\"'; c = b_getc(sc_buf)) {
b_addc(str_LTBL, c);
if (b_isfull(str_LTBL)) {
return aa_table[ES]("\"Imagine all the .."); /* String too big :( */
}
if (c == '\n' || c == '\r') {
line++;
}
if (c == 255 || c == '\0') {
b_retract_to_mark(sc_buf);
for (i = 0; i < ERR_LEN; i++) {
t.attribute.err_lex[i] = b_getc(sc_buf);
}
}
lexend++;
str_offset++;
} /*end for loop, string finished*/
lexend++;
str_offset++;
b_addc(str_LTBL, '\0');
} /*end for loop, string finished*/
t.code = STR_T;
t.attribute.str_offset = lexstart;
return t; /* String literal */
default:
if (isalnum(c) || isalpha(c)) {
lexend = 0;
state = 0;
lex_buf = b_create(1, 1, 'a');
b_addc(str_LTBL, '\0');
t.attribute.str_offset = lexstart;
while (accept == NOAS) {
b_addc(lex_buf, c);
state = get_next_state(state, c, &accept);
return t;
default:
if (isalpha(c) || isalnum(c)) {
if (accept != NOAS)
break;
c = b_getc(sc_buf);
lexend++;
}
/* Entering Accepting State */
b_addc(lex_buf, '\0');
/*Set mark to beginning of lexeme*/
b_setmark(sc_buf, b_getcoffset(sc_buf) - 1);
lexstart = 0;
lexend = 0;
state = 0;
if (as_table[state] == ASWR)
b_retract(sc_buf);
if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) {
t.code = KW_T;
b_free(lex_buf);
return t;
}
while (accept == NOAS) {
state = get_next_state(state, c, &accept);
if (aa_table[state] != NULL) {
t = aa_table[state](b_setmark(lex_buf, 0));
}
else {
t = aa_table[ES]("RUN TIME ERROR");
}
if (accept != NOAS) { break; }
c = b_getc(sc_buf);
}
/*
* Entering Accepting State
*/
if (as_table[state] == ASWR)
b_retract(sc_buf);
/* Get start/end of lexeme */
lexstart = b_mark(sc_buf);
lexend = b_getcoffset(sc_buf);
lex_buf = b_create(1, 1, 'a');
b_retract_to_mark(sc_buf);
for (; lexstart < lexend; lexstart++) {
b_addc(lex_buf, b_getc(sc_buf));
}
b_addc(lex_buf, '\0');
/*if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) {
t.code = KW_T;
b_free(lex_buf);
}
return t;
}*/
else {
t = aa_table[ES](" ");
t.attribute.err_lex[0] = c;
if (aa_table[state] != NULL) {
t = aa_table[state](b_setmark(lex_buf, 0));
}
else {
scerrnum = 1;
t = aa_table[ES]("RUN TIME ERROR");
}
b_free(lex_buf);
}
else {
t = aa_table[ES](" ");
t.attribute.err_lex[0] = c;
}
return t;
}
///* special cases or token driven processing */
//
//WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE.
//COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE.
//
//WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT
//INSTEAD OF if-else TO PROCESS THE SPECIAL CASES
//DO NOT FORGET TO COUNT THE PROGRAM LINES
//
//
// IF (c == SOME CHARACTER)
// ...
// SKIP CHARACTER (FOR EXAMPLE SPACE)
// continue;
// OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE))
// return t;
// EXAMPLE:
// if (c == ' ') continue;
// if (c == '{'){ t.code = RBR_T; /*no attribute */ return t;
// if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t;
// ...
//
// IF (c == '.') TRY TO PROCESS .AND. or .OR.
// IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING
// RETURN AN ERROR TOKEN
// IF (c == '!') TRY TO PROCESS COMMENT
// IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR
// ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue;
// ...
// IF STRING (FOR EXAMPLE, "text") IS FOUND
// SET MARK TO MARK THE BEGINNING OF THE STRING
// IF THE STRING IS LEGAL
// USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL
// ADD '\0' at the end make the string C-type string
// SET STRING TOKEN
// (the attribute of the string token is the offset from
// the beginning of the str_LTBL char buffer to the beginning
// of the string (TEXT in the example))
//
// return t;
// ELSE
// THE STRING LITERAL IS ILLEGAL
// SET ERROR TOKEN FOR ILLEGAL STRING (see assignment)
// DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL
//
// return t;
//
// IF(c == ANOTHER CHARACTER)
// SET TOKEN
// return t;
/* Process state transition table */
//IF (c is a digit OR c is a letter){
//
//SET THE MARK AT THE BEGINING OF THE LEXEME
//b_setmark(sc_buf,forward);
// ....
//CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA)
//IT IMPLEMENTS THE FOLLOWING ALGORITHM:
//
//FSM0. Begin with state = 0 and the input character c
//FSM1. Get the next state from the transition table calling
// state = get_next_state(state, c, &accept);
//FSM2. Get the next character
//FSM3. If the state is not accepting (accept == NOAS), go to step FSM1
// If the step is accepting, token is found, leave the machine and
// call an accepting function as described below.
//
//
//RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE
//GET THE BEGINNING AND THE END OF THE LEXEME
//lexstart = b_getmark(sc_buf);
//SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION
//CREATE A TEMPORRARY LEXEME BUFFER HERE;
//lex_buf = b_create(...);
// . RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND
// . USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...),
// . WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED
// . YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH
// . CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE
// . CALLED IS STORED IN THE VARIABLE state.
// . YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME.
// . THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf.
// ....
// b_free(lex_buf);
// return t;
//
// CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT.
// FOR ILLEGAL CHARACTERS SET ERROR TOKEN.
// THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN
// IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE
// A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum
// AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST
// BE THE STRING "RUN TIME ERROR: "
}//end while(1)
/* special cases or token driven processing */
//
//WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE.
//COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE.
//
//WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT
//INSTEAD OF if-else TO PROCESS THE SPECIAL CASES
//DO NOT FORGET TO COUNT THE PROGRAM LINES
//
//
// IF (c == SOME CHARACTER)
// ...
// SKIP CHARACTER (FOR EXAMPLE SPACE)
// continue;
// OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE))
// return t;
// EXAMPLE:
// if (c == ' ') continue;
// if (c == '{'){ t.code = RBR_T; /*no attribute */ return t;
// if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t;
// ...
//
// IF (c == '.') TRY TO PROCESS .AND. or .OR.
// IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING
// RETURN AN ERROR TOKEN
// IF (c == '!') TRY TO PROCESS COMMENT
// IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR
// ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue;
// ...
// IF STRING (FOR EXAMPLE, "text") IS FOUND
// SET MARK TO MARK THE BEGINNING OF THE STRING
// IF THE STRING IS LEGAL
// USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL
// ADD '\0' at the end make the string C-type string
// SET STRING TOKEN
// (the attribute of the string token is the offset from
// the beginning of the str_LTBL char buffer to the beginning
// of the string (TEXT in the example))
//
// return t;
// ELSE
// THE STRING LITERAL IS ILLEGAL
// SET ERROR TOKEN FOR ILLEGAL STRING (see assignment)
// DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL
//
// return t;
//
// IF(c == ANOTHER CHARACTER)
// SET TOKEN
// return t;
/* Process state transition table */
//IF (c is a digit OR c is a letter){
//
//SET THE MARK AT THE BEGINING OF THE LEXEME
//b_setmark(sc_buf,forward);
// ....
//CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA)
//IT IMPLEMENTS THE FOLLOWING ALGORITHM:
//
//FSM0. Begin with state = 0 and the input character c
//FSM1. Get the next state from the transition table calling
// state = get_next_state(state, c, &accept);
//FSM2. Get the next character
//FSM3. If the state is not accepting (accept == NOAS), go to step FSM1
// If the step is accepting, token is found, leave the machine and
// call an accepting function as described below.
//
//
//RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE
//GET THE BEGINNING AND THE END OF THE LEXEME
//lexstart = b_getmark(sc_buf);
//SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION
//CREATE A TEMPORRARY LEXEME BUFFER HERE;
//lex_buf = b_create(...);
// . RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND
// . USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...),
// . WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED
// . YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH
// . CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE
// . CALLED IS STORED IN THE VARIABLE state.
// . YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME.
// . THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf.
// ....
// b_free(lex_buf);
// return t;
//
// CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT.
// FOR ILLEGAL CHARACTERS SET ERROR TOKEN.
// THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN
// IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE
// A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum
// AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST
// BE THE STRING "RUN TIME ERROR: "
}//end while(1)
}
@ -347,37 +376,37 @@ int get_next_state(int state, char c, int *accept)
col = char_class(c);
next = st_table[state][col];
#ifdef DEBUG
printf("Input symbol: %c Row: %d Column: %d Next: %d \n",c,state,col,next);
printf("Input symbol: %c Row: %d Column: %d Next: %d \n", c, state, col, next);
#endif
/*
The assert(int test) macro can be used to add run-time diagnostic to programs
and to "defend" from producing unexpected results.
assert() is a macro that expands to an if statement;
if test evaluates to false (zero) , assert aborts the program
(by calling abort()) and sends the following message on stderr:
/*
The assert(int test) macro can be used to add run-time diagnostic to programs
and to "defend" from producing unexpected results.
assert() is a macro that expands to an if statement;
if test evaluates to false (zero) , assert aborts the program
(by calling abort()) and sends the following message on stderr:
Assertion failed: test, file filename, line linenum
Assertion failed: test, file filename, line linenum
The filename and linenum listed in the message are the source file name
and line number where the assert macro appears.
If you place the #define NDEBUG directive ("no debugging")
in the source code before the #include <assert.h> directive,
the effect is to comment out the assert statement.
*/
assert(next != IS);
The filename and linenum listed in the message are the source file name
and line number where the assert macro appears.
If you place the #define NDEBUG directive ("no debugging")
in the source code before the #include <assert.h> directive,
the effect is to comment out the assert statement.
*/
assert(next != IS);
/*
The other way to include diagnostics in a program is to use
conditional preprocessing as shown bellow. It allows the programmer
to send more details describing the run-time problem.
Once the program is tested thoroughly #define DEBUG is commented out
or #undef DEBUF is used - see the top of the file.
*/
/*
The other way to include diagnostics in a program is to use
conditional preprocessing as shown bellow. It allows the programmer
to send more details describing the run-time problem.
Once the program is tested thoroughly #define DEBUG is commented out
or #undef DEBUF is used - see the top of the file.
*/
#ifdef DEBUG
if(next == IS){
printf("Scanner Error: Illegal state:\n");
printf("Input symbol: %c Row: %d Column: %d\n",c,state,col);
exit(1);
if (next == IS) {
printf("Scanner Error: Illegal state:\n");
printf("Input symbol: %c Row: %d Column: %d\n", c, state, col);
exit(1);
}
#endif
*accept = as_table[next];
@ -387,6 +416,7 @@ or #undef DEBUF is used - see the top of the file.
int char_class(char c)
{
int val;
if (isalpha(c))
val = 0;
else if (c == '0')
@ -415,22 +445,32 @@ REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER
*/
Token aa_func02(char lexeme[]) {
unsigned int kw_i; /* Variable to contain keyword table index */
unsigned int i, kw_idx; /* Variable to contain keyword table index */
Token t;
char* temp_str;
if ((kw_i = iskeyword(lexeme)) > -1) { /* Keyword check */
#ifdef DEBUG
printf("Lexeme: '%s'\n size of: %lu\n", lexeme, sizeof(lexeme));
#endif
kw_idx = iskeyword(lexeme);
if (kw_idx != -1) { /* Keyword check */
t.code = KW_T;
t.attribute.kwt_idx = kw_i;
t.attribute.kwt_idx = kw_idx;
return t;
}
/* Not a keyword? Must be AVID*/
if ((temp_str = (char*)calloc(VID_LEN + 1, sizeof(char))) == NULL) {
return aa_table[ES]("RUN TIME ERROR");
}
strncpy(temp_str, lexeme, VID_LEN);
for (i = 0; i < (VID_LEN) && i < strlen(lexeme); i++) {
temp_str[i] = lexeme[i];
}/*
temp_str[strlen(temp_str)] = '\0';*/
strncpy(t.attribute.vid_lex, temp_str, VID_LEN);
t.attribute.vid_lex[strlen(temp_str)] = '\0';
free(temp_str);
switch (lexeme[0]) { /* Read first character of lexeme for implicit type (not used yet?)*/
@ -444,7 +484,7 @@ Token aa_func02(char lexeme[]) {
/* Floating point*/
break;
}
t.code = AVID_T;
return t;
/*
@ -468,15 +508,21 @@ REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER
*/
Token aa_func03(char lexeme[]) {
Token t;
unsigned int i;
char* temp_str;
if ((temp_str = (char*)calloc(VID_LEN + 2, sizeof(char))) == NULL) {
return aa_table[ES]("RUN TIME ERROR");
}
strncpy(temp_str, lexeme, VID_LEN);
temp_str[strlen(temp_str)] = '#'; /* Add# to end of the SVID */
for (i = 0; i < (VID_LEN) && i < strlen(lexeme); i++) {
temp_str[i] = lexeme[i];
}
temp_str[strlen(temp_str) - 1] = '#'; /* Add# to end of the SVID */
temp_str[strlen(temp_str)] = '\0';
strncpy(t.attribute.vid_lex, temp_str, VID_LEN);
t.attribute.vid_lex[strlen(temp_str)] = '\0';
free(temp_str);
t.code = SVID_T;
@ -506,7 +552,6 @@ Token aa_func05(char lexeme[]) {
}
t.code = INL_T;
t.attribute.int_value = temp_num;
return t;
/*
THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING A DECIMAL CONSTANT
@ -532,12 +577,13 @@ Token aa_func08(char lexeme[]) {
}
temp_dbl = atof(lexeme);
#ifdef DEBUG
printf("Lexeme: '%s' | FLT value: %f \n", lexeme, temp_dbl);
#endif
if ((temp_dbl > FLT_MAX) || (temp_dbl < 0)) {
t = aa_table[ES](lexeme);
}
t.attribute.flt_value = (float)temp_dbl;
return t;
/*
THE FUNCTION MUST CONVERT THE LEXEME TO A FLOATING POINT VALUE,
@ -592,16 +638,12 @@ err_lex C-type string.
/*ACCEPTING FUNCTION FOR THE ERROR TOKEN */
Token aa_func12(char lexeme[]) {
Token aa_func13(char lexeme[]) {
Token t;
unsigned int i;
t.code = ERR_T;
for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++) {
for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++)
t.attribute.err_lex[i] = lexeme[i];
}
t.attribute.err_lex[i] = '\0';
return t;
@ -641,7 +683,7 @@ int iskeyword(char * kw_lexeme) {
if (kw_lexeme == NULL) return -1;
for (i = 0; i < KWT_SIZE; i++) {
if (strcmp(kw_table[i], kw_lexeme) == 0) return i;
if (strcmp(kw_table[i], kw_lexeme) == 0) { return i; }
}
return -1;
}

45
table.h
View File

@ -34,21 +34,17 @@
* .AND., .OR. , SEOF, 'wrong symbol',
*/
//REPLACE *ESN* WITH YOUR ERROR STATE NUMBER
#define ES 12 /* Error state */
#define ES 13 /* Error state */
#define IS -1 /* Invalid state */
/* State transition table definition */
//REPLACE *CN* WITH YOUR COLUMN NUMBER
#define TABLE_COLUMNS 7
/*transition table - type of states defined in separate table */
int st_table[][TABLE_COLUMNS] = {
/* INPUT COLUMNS:
COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 |
[a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other
COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 |
|[a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other|
*/
/* State 0 */ {1, 6 , 4 , 4 , IS , IS , IS},
/* State 1 */ {1, 1 , 1 , 1 , ES , 3 , 2},
@ -57,22 +53,18 @@ int st_table[][TABLE_COLUMNS] = {
/* State 4 */ {ES, 4 , 4 , 4 , 7 , 5 , 5},
/* State 5 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 6 */ {ES, 9 , 9, ES, 7 , ES , 5},
/* State 7 */ {ES, 7 , 7, 7, ES , 8 , 8},
/* State 7 */ {ES, 7 , 7, 7, 8 , 8 , 8},
/* State 8 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 9 */ {ES, 9 , 9, ES, ES , ES , 10},
/* State 10 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 11 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 12 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 13 */ {IS, IS , IS, IS, IS , IS , IS}
//
//. YOUR TABLE INITIALIZATION HERE
//.
///* State N */ {YOUR INITIALIZATION},
};
/* Accepting state table definition */
//REPLACE *N1*, *N2*, and *N3* WITH YOUR NUMBERS
#define ASWR 2 /* accepting state with retract */
#define ASNR 3 /* accepting state with no retract */
#define ASWR 1 /* accepting state with retract */
#define ASNR 2 /* accepting state with no retract */
#define NOAS 0 /* not accepting state */
int as_table[] = {
@ -96,19 +88,12 @@ int as_table[] = {
/* Accepting action function declarations */
//FOR EACH OF YOUR ACCEPTING STATES YOU MUST PROVIDE
//ONE FUNCTION PROTOTYPE. THEY ALL RETURN Token AND TAKE
//ONE ARGUMENT: A string REPRESENTING A TOKEN LEXEME.
// Example: Token aa_funcXX(char *lexeme);
Token aa_func02(char* lexeme); // VID AVID/KW
Token aa_func03(char *lexeme); // VID SVID
Token aa_func05(char *lexeme); // DIL
Token aa_func08(char *lexeme); // FPL
Token aa_func10(char *lexeme); // OIL
Token aa_func12(char *lexeme); // ES
//Replace XX with the number of the accepting state: 02, 03 and so on.
Token aa_func02(char* lexeme); /* AVID/KW */
Token aa_func03(char* lexeme); /* SVID */
Token aa_func05(char* lexeme); /* DIL */
Token aa_func08(char* lexeme); /* FPL */
Token aa_func10(char* lexeme); /* OIL */
Token aa_func13(char* lexeme); /* ES */
/* defining a new type: pointer to function (of one char * argument)
returning Token
@ -136,8 +121,8 @@ PTR_AAF aa_table[] = {
/* State 9 */ NULL,
/* State 10 */ aa_func10,
/* State 11 */ NULL,
/* State 12 */ aa_func12,
/* State 13 */ NULL
/* State 12 */ NULL,
/* State 13 */ aa_func13
//HERE YOU MUST PROVIDE AN INITIALIZATION FOR AN ARRAY OF POINTERS
//TO ACCEPTING FUNCTIONS. THE ARRAY HAS THE SAME SIZE AS as_table[ ].