Fix transition table. Need to fix string literal read

This commit is contained in:
Victor Fernandes 2017-03-15 20:57:28 -04:00
parent 0b5fb5befa
commit b78f4a831d
5 changed files with 366 additions and 330 deletions

Binary file not shown.

View File

@ -91,6 +91,5 @@ char b_rflag(Buffer* const);
short b_retract(Buffer* const); short b_retract(Buffer* const);
short b_retract_to_mark(Buffer* const); short b_retract_to_mark(Buffer* const);
short b_getcoffset(Buffer* const); short b_getcoffset(Buffer* const);
char* b_cbhead(Buffer* const);
#endif #endif

View File

@ -9,12 +9,18 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "scanner", "scanner\scanner.
EndProject EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
COMPILERS|x64 = COMPILERS|x64
COMPILERS|x86 = COMPILERS|x86
Debug|x64 = Debug|x64 Debug|x64 = Debug|x64
Debug|x86 = Debug|x86 Debug|x86 = Debug|x86
Release|x64 = Release|x64 Release|x64 = Release|x64
Release|x86 = Release|x86 Release|x86 = Release|x86
EndGlobalSection EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution GlobalSection(ProjectConfigurationPlatforms) = postSolution
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x64.ActiveCfg = COMPILERS|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x64.Build.0 = COMPILERS|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x86.ActiveCfg = COMPILERS|Win32
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x86.Build.0 = COMPILERS|Win32
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x64.ActiveCfg = Debug|x64 {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x64.ActiveCfg = Debug|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x64.Build.0 = Debug|x64 {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x64.Build.0 = Debug|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x86.ActiveCfg = Debug|Win32 {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x86.ActiveCfg = Debug|Win32
@ -23,6 +29,10 @@ Global
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x64.Build.0 = Release|x64 {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x64.Build.0 = Release|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x86.ActiveCfg = Release|Win32 {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x86.ActiveCfg = Release|Win32
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x86.Build.0 = Release|Win32 {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x86.Build.0 = Release|Win32
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x64.ActiveCfg = COMPILERS|x64
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x64.Build.0 = COMPILERS|x64
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x86.ActiveCfg = COMPILERS|Win32
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x86.Build.0 = COMPILERS|Win32
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x64.ActiveCfg = Debug|x64 {C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x64.ActiveCfg = Debug|x64
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x64.Build.0 = Debug|x64 {C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x64.Build.0 = Debug|x64
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x86.ActiveCfg = Debug|Win32 {C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x86.ActiveCfg = Debug|Win32

640
scanner.c
View File

@ -10,11 +10,11 @@
* Date: 30 January 2017 * Date: 30 January 2017
*/ */
/* The #define _CRT_SECURE_NO_WARNINGS should be used in MS Visual Studio projects /* The #define _CRT_SECURE_NO_WARNINGS should be used in MS Visual Studio projects
* to suppress the warnings about using "unsafe" functions like fopen() * to suppress the warnings about using "unsafe" functions like fopen()
* and standard sting library functions defined in string.h. * and standard sting library functions defined in string.h.
* The define does not have any effect in Borland compiler projects. * The define does not have any effect in Borland compiler projects.
*/ */
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#include <stdio.h> /* standard input / output */ #include <stdio.h> /* standard input / output */
@ -24,7 +24,7 @@
#include <limits.h> /* integer types constants */ #include <limits.h> /* integer types constants */
#include <float.h> /* floating-point types constants */ #include <float.h> /* floating-point types constants */
/*#define NDEBUG to suppress assert() call */ /*#define NDEBUG to suppress assert() call */
#include <assert.h> /* assert() prototype */ #include <assert.h> /* assert() prototype */
/* project header files */ /* project header files */
@ -33,7 +33,7 @@
#include "table.h" #include "table.h"
#define DEBUG /* for conditional processing */ #define DEBUG /* for conditional processing */
#undef DEBUG /*#undef DEBUG*/
@ -49,14 +49,14 @@ static Buffer *lex_buf;/*pointer to temporary lexeme buffer*/
/* No other global variable declarations/definitiond are allowed */ /* No other global variable declarations/definitiond are allowed */
/* scanner.c static(local) function prototypes */ /* scanner.c static(local) function prototypes */
static int char_class(char c); /* character class function */ static int char_class(char c); /* character class function */
static int get_next_state(int, char, int *); /* state machine function */ static int get_next_state(int, char, int *); /* state machine function */
static int iskeyword(char * kw_lexeme); /*keywords lookup functuion */ static int iskeyword(char * kw_lexeme); /*keywords lookup functuion */
static long atool(char * lexeme); /* converts octal string to decimal value */ static long atool(char * lexeme); /* converts octal string to decimal value */
int scanner_init(Buffer * sc_buf) { int scanner_init(Buffer * sc_buf) {
if(b_isempty(sc_buf)) return EXIT_FAILURE;/*1*/ if (b_isempty(sc_buf)) return EXIT_FAILURE;/*1*/
/* in case the buffer has been read previously */ /* in case the buffer has been read previously */
b_setmark(sc_buf, 0); b_setmark(sc_buf, 0);
b_retract_to_mark(sc_buf); b_retract_to_mark(sc_buf);
@ -68,272 +68,301 @@ int scanner_init(Buffer * sc_buf) {
Token malar_next_token(Buffer * sc_buf) Token malar_next_token(Buffer * sc_buf)
{ {
Token t; /* token to return after recognition */ Token t; /* token to return after recognition */
unsigned char c; /* input symbol */ unsigned char c; /* input symbol */
int state = 0; /* initial state of the FSM */ int state = 0; /* initial state of the FSM */
short lexstart; /*start offset of a lexeme in the input buffer */ short lexstart; /*start offset of a lexeme in the input buffer */
short lexend; /*end offset of a lexeme in the input buffer */ short lexend; /*end offset of a lexeme in the input buffer */
int accept = NOAS; /* type of state - initially not accepting */ int accept = NOAS; /* type of state - initially not accepting */
/* /*
lexstart is the offset from the beginning of the char buffer of the lexstart is the offset from the beginning of the char buffer of the
input buffer (sc_buf) to the first character of the current lexeme, input buffer (sc_buf) to the first character of the current lexeme,
which is being processed by the scanner. which is being processed by the scanner.
lexend is the offset from the beginning of the char buffer of the lexend is the offset from the beginning of the char buffer of the
input buffer (sc_buf) to the last character of the current lexeme, input buffer (sc_buf) to the last character of the current lexeme,
which is being processed by the scanner. which is being processed by the scanner.
*/ */
//DECLARE YOUR VARIABLES HERE IF NEEDED //DECLARE YOUR VARIABLES HERE IF NEEDED
int i; /* Counter for loop in string error case */ int i; /* Counter for loop in string error case */
static int str_offset = 0; static int str_offset = 0;
if (sc_buf == NULL) { if (sc_buf == NULL) {
return aa_func12("RUN TIME ERROR"); /* WHOOPS */ scerrnum = 1;
} return aa_table[ES]("RUN TIME ERROR"); /* WHOOPS */
}
while (1){ /* endless loop broken by token returns it will generate a warning */
while (1) { /* endless loop broken by token returns it will generate a warning */
//GET THE NEXT SYMBOL FROM THE INPUT BUFFER
/* GET THE NEXT SYMBOL FROM THE INPUT BUFFER */
c = b_getc(sc_buf);
switch (c) {
case 255: t.code = SEOF_T; return t; /* EOF */
case '\0': t.code = SEOF_T; return t; /* Source EOF */
case '\n': line++; continue; /* Ignore new line, increment line count */
case '\r': line++; continue; /* CR, increment line count*/
case ' ': continue; /* Ignore white space */
case '\t': continue; /* Ignore tabs */
case ';': t.code = EOS_T; return t; /* End of statement */
case ',': t.code = COM_T; return t; /* Comma */
case '{': t.code = LBR_T; return t; /* Left brace */
case '}': t.code = RBR_T; return t; /* Right brace */
case '(': t.code = LPR_T; return t; /* Left parenthesis */
case ')': t.code = RPR_T; return t; /* Right parenthesis */
case '+': t.code = ART_OP_T; t.attribute.arr_op = PLUS; return t; /* Addition operator */
case '-': t.code = ART_OP_T; t.attribute.arr_op = MINUS; return t; /* Substraction operator */
case '*': t.code = ART_OP_T; t.attribute.arr_op = MULT; return t; /* Multiplication operator */
case '/': t.code = ART_OP_T; t.attribute.arr_op = DIV; return t; /* Devision operator */
case '>': t.code = REL_OP_T; t.attribute.rel_op = GT; return t; /* Greater-than relational operator */
case '<':
c = b_getc(sc_buf); c = b_getc(sc_buf);
switch (c) { if (c == '>') {
case 255: t.code = SEOF_T; return t; /* EOF */ t.code = REL_OP_T;
case '\0': t.code = SEOF_T; return t; /* Source EOF */ t.attribute.rel_op = NE; /* Negation operator */
case '\n': line++; continue; /* Ignore new line, increment line count */ return t;
case '\r': line++; continue; /* CR, increment line count*/ }
case ' ': continue; /* Ignore white space */ else if (c == '<') {
case ';': t.code = EOS_T; return t; /* End of statement */ t.code = SCC_OP_T; /* String concatenation operator */
case ',': t.code = COM_T; return t; /* Comma */ }
case '{': t.code = RBR_T; return t; /* Right brace */ else {
case '}': t.code = LBR_T; return t; /* Left brace */ t.code = REL_OP_T;
case '(': t.code = RPR_T; return t; /* Right parenthesis */ t.attribute.rel_op = LT; /* Less-than operator */
case ')': t.code = LPR_T; return t; /* Left parenthesis */ }
case '+': t.code = ART_OP_T; t.attribute.arr_op = PLUS; return t; /* Addition operator */ b_retract(sc_buf);
case '-': t.code = ART_OP_T; t.attribute.arr_op = MINUS; return t; /* Substraction operator */ /*c = b_getc(sc_buf);*/
case '*': t.code = ART_OP_T; t.attribute.arr_op = MULT; return t; /* Multiplication operator */ return t;
case '/': t.code = ART_OP_T; t.attribute.arr_op = DIV; return t; /* Devision operator */ case '.':
case '>': t.code = REL_OP_T; t.attribute.rel_op = GT; return t; /* Greater-than relational operator */ b_setmark(sc_buf, b_getcoffset(sc_buf)); /* Set mark before continuing (AND|OR case) */
case '<': c = b_getc(sc_buf);
/* MSVC will complain about this assignment inside a conditional expression*/ if (c == 'A' && b_getc(sc_buf) == 'N' && b_getc(sc_buf) == 'D' && b_getc(sc_buf) == '.') {
if (c = b_getc(sc_buf) == '>') { t.code = LOG_OP_T;
t.code = REL_OP_T; t.attribute.log_op = AND;
t.attribute.rel_op = NE; /* Negation operator */ return t;
return t; }
} else if (c == 'O' && b_getc(sc_buf) == 'R' && b_getc(sc_buf) == '.') {
else if (c == '<') { t.code = LOG_OP_T;
t.code = SCC_OP_T; /* String concatenation operator */ t.attribute.log_op = OR;
} return t;
else { }
t.code = REL_OP_T; t.code = ERR_T; /* "That character's not supposed to be here" case */
t.attribute.rel_op = LT; /* Less-than operator */ t.attribute.err_lex[0] = '.';
} t.attribute.err_lex[1] = '\0';
b_retract_to_mark(sc_buf);
return t;
case '!':
c = b_getc(sc_buf);
if (c == '<') { /* It's a comment line */
for (; c != '\0' && c != '\r' && c != '\n' && c != 255; c = b_getc(sc_buf)); /* Consume chars until line ends */
line++;
continue;
}
else { /* Bad character, pump out an error token */
b_retract(sc_buf);
b_retract(sc_buf);
t = aa_table[ES](" ");
t.attribute.err_lex[0] = c = b_getc(sc_buf);
t.attribute.err_lex[1] = c = b_getc(sc_buf);
b_retract(sc_buf); b_retract(sc_buf);
c = b_getc(sc_buf);
return t; return t;
case '.': }
b_setmark(sc_buf, b_getcoffset(sc_buf)); /* Set mark before continuing (AND|OR case) */ case '=':
if (c == 'A' && b_getc(sc_buf) == 'N' && b_getc(sc_buf) == 'D' && b_getc(sc_buf) == '.') { c = b_getc(sc_buf);
t.code = LOG_OP_T; if (c == '=') { /* Relational equals-to operator */
t.attribute.log_op = AND; t.code = REL_OP_T;
return t; t.attribute.rel_op = EQ;
}
else if (c == 'O' && b_getc(sc_buf) == 'R' && b_getc(sc_buf) == '.') {
t.code = LOG_OP_T;
t.attribute.log_op = OR;
}
t.code = ERR_T; /* "That character's not supposed to be here" case */
t.attribute.err_lex[0] = '.';
t.attribute.err_lex[1] = '\0';
b_retract_to_mark(sc_buf);
return t; return t;
case '!': }
c = b_getc(sc_buf); b_retract(sc_buf);
if (c == '<') { /* It's a comment line */ t.code = ASS_OP_T; /* Assignment operator */
for (; c != '\0' && c != '\r' && c != '\n' && c != 255; c = b_getc(sc_buf)); /* Consume chars until line ends */ return t;
case '\"': /* Don't quote me on this */
c = b_getc(sc_buf);
t.code = STR_T; /* String literal */
b_setmark(sc_buf, b_getcoffset(sc_buf));
lexstart = (short)str_offset;
lexend = lexstart;
for (; c != '\"'; c = b_getc(sc_buf)) {
b_addc(str_LTBL, c);
if (b_isfull(str_LTBL)) {
t = aa_table[ES]("\"There is always ..."); /* String too big :( */
}
if (c == '\n' || c == '\r') {
line++; line++;
continue;
} }
else { /* Bad character, pump out an error token */ if (c == 255 || c == '\0') {
t = aa_table[ES](" "); b_retract_to_mark(sc_buf);
t.attribute.err_lex[0] = c; t.code = ERR_T; /* Illegal string, make it an error token */
for (i = 0; i < ERR_LEN; i++) {
t.attribute.err_lex[i] = b_getc(sc_buf);
}
t.attribute.err_lex[i] = '\0';
return t; return t;
} }
case '=': lexend++;
c = b_getc(sc_buf);
if (c == '=') { /* Relational equals-to operator */
t.code = REL_OP_T;
t.attribute.rel_op = EQ;
}
b_retract(sc_buf);
t.code = ASS_OP_T; /* Assignment operator */
return t;
case '\"': /* Don't quote me on this */
c = b_getc(sc_buf);
b_setmark(sc_buf, b_getcoffset(sc_buf));
lexstart = (short)str_offset;
lexend = lexstart;
for (; c != '\"'; c = b_getc(sc_buf)) {
b_addc(str_LTBL, c);
if (b_isfull(str_LTBL)) {
return aa_table[ES]("\"Imagine all the .."); /* String too big :( */
}
if (c == '\n' || c == '\r') {
line++;
}
if (c == 255 || c == '\0') {
b_retract_to_mark(sc_buf);
for (i = 0; i < ERR_LEN; i++) {
t.attribute.err_lex[i] = b_getc(sc_buf);
}
}
lexend++;
str_offset++;
} /*end for loop, string finished*/
str_offset++; str_offset++;
b_addc(str_LTBL, '\0'); } /*end for loop, string finished*/
t.code = STR_T;
t.attribute.str_offset = lexstart;
return t; /* String literal */
default:
if (isalnum(c) || isalpha(c)) {
lexend = 0;
state = 0;
lex_buf = b_create(1, 1, 'a');
while (accept == NOAS) { b_addc(str_LTBL, '\0');
b_addc(lex_buf, c); t.attribute.str_offset = lexstart;
state = get_next_state(state, c, &accept);
return t;
default:
if (isalpha(c) || isalnum(c)) {
if (accept != NOAS) /*Set mark to beginning of lexeme*/
break; b_setmark(sc_buf, b_getcoffset(sc_buf) - 1);
c = b_getc(sc_buf); lexstart = 0;
lexend++; lexend = 0;
} state = 0;
/* Entering Accepting State */
b_addc(lex_buf, '\0');
if (as_table[state] == ASWR) while (accept == NOAS) {
b_retract(sc_buf); state = get_next_state(state, c, &accept);
if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) {
t.code = KW_T;
b_free(lex_buf);
return t;
}
if (aa_table[state] != NULL) { if (accept != NOAS) { break; }
t = aa_table[state](b_setmark(lex_buf, 0));
} c = b_getc(sc_buf);
else { }
t = aa_table[ES]("RUN TIME ERROR");
} /*
* Entering Accepting State
*/
if (as_table[state] == ASWR)
b_retract(sc_buf);
/* Get start/end of lexeme */
lexstart = b_mark(sc_buf);
lexend = b_getcoffset(sc_buf);
lex_buf = b_create(1, 1, 'a');
b_retract_to_mark(sc_buf);
for (; lexstart < lexend; lexstart++) {
b_addc(lex_buf, b_getc(sc_buf));
}
b_addc(lex_buf, '\0');
/*if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) {
t.code = KW_T;
b_free(lex_buf); b_free(lex_buf);
} return t;
}*/
else { if (aa_table[state] != NULL) {
t = aa_table[ES](" "); t = aa_table[state](b_setmark(lex_buf, 0));
t.attribute.err_lex[0] = c;
} }
else {
scerrnum = 1;
t = aa_table[ES]("RUN TIME ERROR");
}
b_free(lex_buf);
} }
else {
///* special cases or token driven processing */ t = aa_table[ES](" ");
// t.attribute.err_lex[0] = c;
//WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE. }
//COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE. return t;
// }
//WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT
//INSTEAD OF if-else TO PROCESS THE SPECIAL CASES
//DO NOT FORGET TO COUNT THE PROGRAM LINES /* special cases or token driven processing */
// //
// //WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE.
// IF (c == SOME CHARACTER) //COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE.
// ... //
// SKIP CHARACTER (FOR EXAMPLE SPACE) //WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT
// continue; //INSTEAD OF if-else TO PROCESS THE SPECIAL CASES
// OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE)) //DO NOT FORGET TO COUNT THE PROGRAM LINES
// return t; //
// EXAMPLE: //
// if (c == ' ') continue; // IF (c == SOME CHARACTER)
// if (c == '{'){ t.code = RBR_T; /*no attribute */ return t; // ...
// if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t; // SKIP CHARACTER (FOR EXAMPLE SPACE)
// ... // continue;
// // OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE))
// IF (c == '.') TRY TO PROCESS .AND. or .OR. // return t;
// IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING // EXAMPLE:
// RETURN AN ERROR TOKEN // if (c == ' ') continue;
// IF (c == '!') TRY TO PROCESS COMMENT // if (c == '{'){ t.code = RBR_T; /*no attribute */ return t;
// IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR // if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t;
// ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue; // ...
// ... //
// IF STRING (FOR EXAMPLE, "text") IS FOUND // IF (c == '.') TRY TO PROCESS .AND. or .OR.
// SET MARK TO MARK THE BEGINNING OF THE STRING // IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING
// IF THE STRING IS LEGAL // RETURN AN ERROR TOKEN
// USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL // IF (c == '!') TRY TO PROCESS COMMENT
// ADD '\0' at the end make the string C-type string // IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR
// SET STRING TOKEN // ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue;
// (the attribute of the string token is the offset from // ...
// the beginning of the str_LTBL char buffer to the beginning // IF STRING (FOR EXAMPLE, "text") IS FOUND
// of the string (TEXT in the example)) // SET MARK TO MARK THE BEGINNING OF THE STRING
// // IF THE STRING IS LEGAL
// return t; // USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL
// ELSE // ADD '\0' at the end make the string C-type string
// THE STRING LITERAL IS ILLEGAL // SET STRING TOKEN
// SET ERROR TOKEN FOR ILLEGAL STRING (see assignment) // (the attribute of the string token is the offset from
// DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL // the beginning of the str_LTBL char buffer to the beginning
// // of the string (TEXT in the example))
// return t; //
// // return t;
// IF(c == ANOTHER CHARACTER) // ELSE
// SET TOKEN // THE STRING LITERAL IS ILLEGAL
// return t; // SET ERROR TOKEN FOR ILLEGAL STRING (see assignment)
/* Process state transition table */ // DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL
//
//IF (c is a digit OR c is a letter){ // return t;
// //
//SET THE MARK AT THE BEGINING OF THE LEXEME // IF(c == ANOTHER CHARACTER)
//b_setmark(sc_buf,forward); // SET TOKEN
// .... // return t;
//CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA) /* Process state transition table */
//IT IMPLEMENTS THE FOLLOWING ALGORITHM:
// //IF (c is a digit OR c is a letter){
//FSM0. Begin with state = 0 and the input character c //
//FSM1. Get the next state from the transition table calling //SET THE MARK AT THE BEGINING OF THE LEXEME
// state = get_next_state(state, c, &accept); //b_setmark(sc_buf,forward);
//FSM2. Get the next character // ....
//FSM3. If the state is not accepting (accept == NOAS), go to step FSM1 //CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA)
// If the step is accepting, token is found, leave the machine and //IT IMPLEMENTS THE FOLLOWING ALGORITHM:
// call an accepting function as described below. //
// //FSM0. Begin with state = 0 and the input character c
// //FSM1. Get the next state from the transition table calling
//RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE // state = get_next_state(state, c, &accept);
//GET THE BEGINNING AND THE END OF THE LEXEME //FSM2. Get the next character
//lexstart = b_getmark(sc_buf); //FSM3. If the state is not accepting (accept == NOAS), go to step FSM1
//SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION // If the step is accepting, token is found, leave the machine and
//CREATE A TEMPORRARY LEXEME BUFFER HERE; // call an accepting function as described below.
//lex_buf = b_create(...); //
// . RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND //
// . USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...), //RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE
// . WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED //GET THE BEGINNING AND THE END OF THE LEXEME
// . YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH //lexstart = b_getmark(sc_buf);
// . CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE //SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION
// . CALLED IS STORED IN THE VARIABLE state. //CREATE A TEMPORRARY LEXEME BUFFER HERE;
// . YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME. //lex_buf = b_create(...);
// . THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf. // . RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND
// .... // . USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...),
// b_free(lex_buf); // . WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED
// return t; // . YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH
// // . CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE
// CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT. // . CALLED IS STORED IN THE VARIABLE state.
// FOR ILLEGAL CHARACTERS SET ERROR TOKEN. // . YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME.
// THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN // . THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf.
// IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE // ....
// A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum // b_free(lex_buf);
// AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST // return t;
// BE THE STRING "RUN TIME ERROR: " //
}//end while(1) // CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT.
// FOR ILLEGAL CHARACTERS SET ERROR TOKEN.
// THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN
// IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE
// A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum
// AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST
// BE THE STRING "RUN TIME ERROR: "
}//end while(1)
} }
@ -347,37 +376,37 @@ int get_next_state(int state, char c, int *accept)
col = char_class(c); col = char_class(c);
next = st_table[state][col]; next = st_table[state][col];
#ifdef DEBUG #ifdef DEBUG
printf("Input symbol: %c Row: %d Column: %d Next: %d \n",c,state,col,next); printf("Input symbol: %c Row: %d Column: %d Next: %d \n", c, state, col, next);
#endif #endif
/* /*
The assert(int test) macro can be used to add run-time diagnostic to programs The assert(int test) macro can be used to add run-time diagnostic to programs
and to "defend" from producing unexpected results. and to "defend" from producing unexpected results.
assert() is a macro that expands to an if statement; assert() is a macro that expands to an if statement;
if test evaluates to false (zero) , assert aborts the program if test evaluates to false (zero) , assert aborts the program
(by calling abort()) and sends the following message on stderr: (by calling abort()) and sends the following message on stderr:
Assertion failed: test, file filename, line linenum Assertion failed: test, file filename, line linenum
The filename and linenum listed in the message are the source file name The filename and linenum listed in the message are the source file name
and line number where the assert macro appears. and line number where the assert macro appears.
If you place the #define NDEBUG directive ("no debugging") If you place the #define NDEBUG directive ("no debugging")
in the source code before the #include <assert.h> directive, in the source code before the #include <assert.h> directive,
the effect is to comment out the assert statement. the effect is to comment out the assert statement.
*/ */
assert(next != IS); assert(next != IS);
/* /*
The other way to include diagnostics in a program is to use The other way to include diagnostics in a program is to use
conditional preprocessing as shown bellow. It allows the programmer conditional preprocessing as shown bellow. It allows the programmer
to send more details describing the run-time problem. to send more details describing the run-time problem.
Once the program is tested thoroughly #define DEBUG is commented out Once the program is tested thoroughly #define DEBUG is commented out
or #undef DEBUF is used - see the top of the file. or #undef DEBUF is used - see the top of the file.
*/ */
#ifdef DEBUG #ifdef DEBUG
if(next == IS){ if (next == IS) {
printf("Scanner Error: Illegal state:\n"); printf("Scanner Error: Illegal state:\n");
printf("Input symbol: %c Row: %d Column: %d\n",c,state,col); printf("Input symbol: %c Row: %d Column: %d\n", c, state, col);
exit(1); exit(1);
} }
#endif #endif
*accept = as_table[next]; *accept = as_table[next];
@ -387,6 +416,7 @@ or #undef DEBUF is used - see the top of the file.
int char_class(char c) int char_class(char c)
{ {
int val; int val;
if (isalpha(c)) if (isalpha(c))
val = 0; val = 0;
else if (c == '0') else if (c == '0')
@ -415,22 +445,32 @@ REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER
*/ */
Token aa_func02(char lexeme[]) { Token aa_func02(char lexeme[]) {
unsigned int kw_i; /* Variable to contain keyword table index */ unsigned int i, kw_idx; /* Variable to contain keyword table index */
Token t; Token t;
char* temp_str; char* temp_str;
if ((kw_i = iskeyword(lexeme)) > -1) { /* Keyword check */ #ifdef DEBUG
printf("Lexeme: '%s'\n size of: %lu\n", lexeme, sizeof(lexeme));
#endif
kw_idx = iskeyword(lexeme);
if (kw_idx != -1) { /* Keyword check */
t.code = KW_T; t.code = KW_T;
t.attribute.kwt_idx = kw_i; t.attribute.kwt_idx = kw_idx;
return t; return t;
} }
/* Not a keyword? Must be AVID*/ /* Not a keyword? Must be AVID*/
if ((temp_str = (char*)calloc(VID_LEN + 1, sizeof(char))) == NULL) { if ((temp_str = (char*)calloc(VID_LEN + 1, sizeof(char))) == NULL) {
return aa_table[ES]("RUN TIME ERROR"); return aa_table[ES]("RUN TIME ERROR");
} }
strncpy(temp_str, lexeme, VID_LEN);
for (i = 0; i < (VID_LEN) && i < strlen(lexeme); i++) {
temp_str[i] = lexeme[i];
}/*
temp_str[strlen(temp_str)] = '\0';*/
strncpy(t.attribute.vid_lex, temp_str, VID_LEN); strncpy(t.attribute.vid_lex, temp_str, VID_LEN);
t.attribute.vid_lex[strlen(temp_str)] = '\0';
free(temp_str); free(temp_str);
switch (lexeme[0]) { /* Read first character of lexeme for implicit type (not used yet?)*/ switch (lexeme[0]) { /* Read first character of lexeme for implicit type (not used yet?)*/
@ -444,7 +484,7 @@ Token aa_func02(char lexeme[]) {
/* Floating point*/ /* Floating point*/
break; break;
} }
t.code = AVID_T;
return t; return t;
/* /*
@ -468,15 +508,21 @@ REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER
*/ */
Token aa_func03(char lexeme[]) { Token aa_func03(char lexeme[]) {
Token t; Token t;
unsigned int i;
char* temp_str; char* temp_str;
if ((temp_str = (char*)calloc(VID_LEN + 2, sizeof(char))) == NULL) { if ((temp_str = (char*)calloc(VID_LEN + 2, sizeof(char))) == NULL) {
return aa_table[ES]("RUN TIME ERROR"); return aa_table[ES]("RUN TIME ERROR");
} }
strncpy(temp_str, lexeme, VID_LEN); for (i = 0; i < (VID_LEN) && i < strlen(lexeme); i++) {
temp_str[strlen(temp_str)] = '#'; /* Add# to end of the SVID */ temp_str[i] = lexeme[i];
}
temp_str[strlen(temp_str) - 1] = '#'; /* Add# to end of the SVID */
temp_str[strlen(temp_str)] = '\0';
strncpy(t.attribute.vid_lex, temp_str, VID_LEN); strncpy(t.attribute.vid_lex, temp_str, VID_LEN);
t.attribute.vid_lex[strlen(temp_str)] = '\0';
free(temp_str); free(temp_str);
t.code = SVID_T; t.code = SVID_T;
@ -506,7 +552,6 @@ Token aa_func05(char lexeme[]) {
} }
t.code = INL_T; t.code = INL_T;
t.attribute.int_value = temp_num; t.attribute.int_value = temp_num;
return t; return t;
/* /*
THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING A DECIMAL CONSTANT THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING A DECIMAL CONSTANT
@ -532,12 +577,13 @@ Token aa_func08(char lexeme[]) {
} }
temp_dbl = atof(lexeme); temp_dbl = atof(lexeme);
#ifdef DEBUG
printf("Lexeme: '%s' | FLT value: %f \n", lexeme, temp_dbl);
#endif
if ((temp_dbl > FLT_MAX) || (temp_dbl < 0)) { if ((temp_dbl > FLT_MAX) || (temp_dbl < 0)) {
t = aa_table[ES](lexeme); t = aa_table[ES](lexeme);
} }
t.attribute.flt_value = (float)temp_dbl; t.attribute.flt_value = (float)temp_dbl;
return t; return t;
/* /*
THE FUNCTION MUST CONVERT THE LEXEME TO A FLOATING POINT VALUE, THE FUNCTION MUST CONVERT THE LEXEME TO A FLOATING POINT VALUE,
@ -592,16 +638,12 @@ err_lex C-type string.
/*ACCEPTING FUNCTION FOR THE ERROR TOKEN */ /*ACCEPTING FUNCTION FOR THE ERROR TOKEN */
Token aa_func12(char lexeme[]) { Token aa_func13(char lexeme[]) {
Token t; Token t;
unsigned int i; unsigned int i;
t.code = ERR_T; t.code = ERR_T;
for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++)
for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++) {
t.attribute.err_lex[i] = lexeme[i]; t.attribute.err_lex[i] = lexeme[i];
}
t.attribute.err_lex[i] = '\0'; t.attribute.err_lex[i] = '\0';
return t; return t;
@ -641,7 +683,7 @@ int iskeyword(char * kw_lexeme) {
if (kw_lexeme == NULL) return -1; if (kw_lexeme == NULL) return -1;
for (i = 0; i < KWT_SIZE; i++) { for (i = 0; i < KWT_SIZE; i++) {
if (strcmp(kw_table[i], kw_lexeme) == 0) return i; if (strcmp(kw_table[i], kw_lexeme) == 0) { return i; }
} }
return -1; return -1;
} }

45
table.h
View File

@ -34,21 +34,17 @@
* .AND., .OR. , SEOF, 'wrong symbol', * .AND., .OR. , SEOF, 'wrong symbol',
*/ */
#define ES 13 /* Error state */
//REPLACE *ESN* WITH YOUR ERROR STATE NUMBER
#define ES 12 /* Error state */
#define IS -1 /* Invalid state */ #define IS -1 /* Invalid state */
/* State transition table definition */ /* State transition table definition */
//REPLACE *CN* WITH YOUR COLUMN NUMBER
#define TABLE_COLUMNS 7 #define TABLE_COLUMNS 7
/*transition table - type of states defined in separate table */ /*transition table - type of states defined in separate table */
int st_table[][TABLE_COLUMNS] = { int st_table[][TABLE_COLUMNS] = {
/* INPUT COLUMNS: /* INPUT COLUMNS:
COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 | COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 |
[a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other |[a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other|
*/ */
/* State 0 */ {1, 6 , 4 , 4 , IS , IS , IS}, /* State 0 */ {1, 6 , 4 , 4 , IS , IS , IS},
/* State 1 */ {1, 1 , 1 , 1 , ES , 3 , 2}, /* State 1 */ {1, 1 , 1 , 1 , ES , 3 , 2},
@ -57,22 +53,18 @@ int st_table[][TABLE_COLUMNS] = {
/* State 4 */ {ES, 4 , 4 , 4 , 7 , 5 , 5}, /* State 4 */ {ES, 4 , 4 , 4 , 7 , 5 , 5},
/* State 5 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 5 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 6 */ {ES, 9 , 9, ES, 7 , ES , 5}, /* State 6 */ {ES, 9 , 9, ES, 7 , ES , 5},
/* State 7 */ {ES, 7 , 7, 7, ES , 8 , 8}, /* State 7 */ {ES, 7 , 7, 7, 8 , 8 , 8},
/* State 8 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 8 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 9 */ {ES, 9 , 9, ES, ES , ES , 10}, /* State 9 */ {ES, 9 , 9, ES, ES , ES , 10},
/* State 10 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 10 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 11 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 11 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 12 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 12 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 13 */ {IS, IS , IS, IS, IS , IS , IS} /* State 13 */ {IS, IS , IS, IS, IS , IS , IS}
//
//. YOUR TABLE INITIALIZATION HERE
//.
///* State N */ {YOUR INITIALIZATION},
}; };
/* Accepting state table definition */ /* Accepting state table definition */
//REPLACE *N1*, *N2*, and *N3* WITH YOUR NUMBERS #define ASWR 1 /* accepting state with retract */
#define ASWR 2 /* accepting state with retract */ #define ASNR 2 /* accepting state with no retract */
#define ASNR 3 /* accepting state with no retract */
#define NOAS 0 /* not accepting state */ #define NOAS 0 /* not accepting state */
int as_table[] = { int as_table[] = {
@ -96,19 +88,12 @@ int as_table[] = {
/* Accepting action function declarations */ /* Accepting action function declarations */
//FOR EACH OF YOUR ACCEPTING STATES YOU MUST PROVIDE Token aa_func02(char* lexeme); /* AVID/KW */
//ONE FUNCTION PROTOTYPE. THEY ALL RETURN Token AND TAKE Token aa_func03(char* lexeme); /* SVID */
//ONE ARGUMENT: A string REPRESENTING A TOKEN LEXEME. Token aa_func05(char* lexeme); /* DIL */
Token aa_func08(char* lexeme); /* FPL */
// Example: Token aa_funcXX(char *lexeme); Token aa_func10(char* lexeme); /* OIL */
Token aa_func13(char* lexeme); /* ES */
Token aa_func02(char* lexeme); // VID AVID/KW
Token aa_func03(char *lexeme); // VID SVID
Token aa_func05(char *lexeme); // DIL
Token aa_func08(char *lexeme); // FPL
Token aa_func10(char *lexeme); // OIL
Token aa_func12(char *lexeme); // ES
//Replace XX with the number of the accepting state: 02, 03 and so on.
/* defining a new type: pointer to function (of one char * argument) /* defining a new type: pointer to function (of one char * argument)
returning Token returning Token
@ -136,8 +121,8 @@ PTR_AAF aa_table[] = {
/* State 9 */ NULL, /* State 9 */ NULL,
/* State 10 */ aa_func10, /* State 10 */ aa_func10,
/* State 11 */ NULL, /* State 11 */ NULL,
/* State 12 */ aa_func12, /* State 12 */ NULL,
/* State 13 */ NULL /* State 13 */ aa_func13
//HERE YOU MUST PROVIDE AN INITIALIZATION FOR AN ARRAY OF POINTERS //HERE YOU MUST PROVIDE AN INITIALIZATION FOR AN ARRAY OF POINTERS
//TO ACCEPTING FUNCTIONS. THE ARRAY HAS THE SAME SIZE AS as_table[ ]. //TO ACCEPTING FUNCTIONS. THE ARRAY HAS THE SAME SIZE AS as_table[ ].