Fix transition table. Need to fix string literal read

This commit is contained in:
Victor Fernandes 2017-03-15 20:57:28 -04:00
parent 0b5fb5befa
commit b78f4a831d
5 changed files with 366 additions and 330 deletions

Binary file not shown.

View File

@ -91,6 +91,5 @@ char b_rflag(Buffer* const);
short b_retract(Buffer* const);
short b_retract_to_mark(Buffer* const);
short b_getcoffset(Buffer* const);
char* b_cbhead(Buffer* const);
#endif

View File

@ -9,12 +9,18 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "scanner", "scanner\scanner.
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
COMPILERS|x64 = COMPILERS|x64
COMPILERS|x86 = COMPILERS|x86
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x64.ActiveCfg = COMPILERS|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x64.Build.0 = COMPILERS|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x86.ActiveCfg = COMPILERS|Win32
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x86.Build.0 = COMPILERS|Win32
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x64.ActiveCfg = Debug|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x64.Build.0 = Debug|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x86.ActiveCfg = Debug|Win32
@ -23,6 +29,10 @@ Global
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x64.Build.0 = Release|x64
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x86.ActiveCfg = Release|Win32
{034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x86.Build.0 = Release|Win32
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x64.ActiveCfg = COMPILERS|x64
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x64.Build.0 = COMPILERS|x64
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x86.ActiveCfg = COMPILERS|Win32
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x86.Build.0 = COMPILERS|Win32
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x64.ActiveCfg = Debug|x64
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x64.Build.0 = Debug|x64
{C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x86.ActiveCfg = Debug|Win32

130
scanner.c
View File

@ -33,7 +33,7 @@
#include "table.h"
#define DEBUG /* for conditional processing */
#undef DEBUG
/*#undef DEBUG*/
@ -90,34 +90,37 @@ which is being processed by the scanner.
static int str_offset = 0;
if (sc_buf == NULL) {
return aa_func12("RUN TIME ERROR"); /* WHOOPS */
scerrnum = 1;
return aa_table[ES]("RUN TIME ERROR"); /* WHOOPS */
}
while (1) { /* endless loop broken by token returns it will generate a warning */
//GET THE NEXT SYMBOL FROM THE INPUT BUFFER
/* GET THE NEXT SYMBOL FROM THE INPUT BUFFER */
c = b_getc(sc_buf);
switch (c) {
case 255: t.code = SEOF_T; return t; /* EOF */
case '\0': t.code = SEOF_T; return t; /* Source EOF */
case '\n': line++; continue; /* Ignore new line, increment line count */
case '\r': line++; continue; /* CR, increment line count*/
case ' ': continue; /* Ignore white space */
case '\t': continue; /* Ignore tabs */
case ';': t.code = EOS_T; return t; /* End of statement */
case ',': t.code = COM_T; return t; /* Comma */
case '{': t.code = RBR_T; return t; /* Right brace */
case '}': t.code = LBR_T; return t; /* Left brace */
case '(': t.code = RPR_T; return t; /* Right parenthesis */
case ')': t.code = LPR_T; return t; /* Left parenthesis */
case '{': t.code = LBR_T; return t; /* Left brace */
case '}': t.code = RBR_T; return t; /* Right brace */
case '(': t.code = LPR_T; return t; /* Left parenthesis */
case ')': t.code = RPR_T; return t; /* Right parenthesis */
case '+': t.code = ART_OP_T; t.attribute.arr_op = PLUS; return t; /* Addition operator */
case '-': t.code = ART_OP_T; t.attribute.arr_op = MINUS; return t; /* Substraction operator */
case '*': t.code = ART_OP_T; t.attribute.arr_op = MULT; return t; /* Multiplication operator */
case '/': t.code = ART_OP_T; t.attribute.arr_op = DIV; return t; /* Devision operator */
case '>': t.code = REL_OP_T; t.attribute.rel_op = GT; return t; /* Greater-than relational operator */
case '<':
/* MSVC will complain about this assignment inside a conditional expression*/
if (c = b_getc(sc_buf) == '>') {
c = b_getc(sc_buf);
if (c == '>') {
t.code = REL_OP_T;
t.attribute.rel_op = NE; /* Negation operator */
return t;
@ -130,10 +133,11 @@ which is being processed by the scanner.
t.attribute.rel_op = LT; /* Less-than operator */
}
b_retract(sc_buf);
c = b_getc(sc_buf);
/*c = b_getc(sc_buf);*/
return t;
case '.':
b_setmark(sc_buf, b_getcoffset(sc_buf)); /* Set mark before continuing (AND|OR case) */
c = b_getc(sc_buf);
if (c == 'A' && b_getc(sc_buf) == 'N' && b_getc(sc_buf) == 'D' && b_getc(sc_buf) == '.') {
t.code = LOG_OP_T;
t.attribute.log_op = AND;
@ -142,6 +146,7 @@ which is being processed by the scanner.
else if (c == 'O' && b_getc(sc_buf) == 'R' && b_getc(sc_buf) == '.') {
t.code = LOG_OP_T;
t.attribute.log_op = OR;
return t;
}
t.code = ERR_T; /* "That character's not supposed to be here" case */
t.attribute.err_lex[0] = '.';
@ -156,8 +161,12 @@ which is being processed by the scanner.
continue;
}
else { /* Bad character, pump out an error token */
b_retract(sc_buf);
b_retract(sc_buf);
t = aa_table[ES](" ");
t.attribute.err_lex[0] = c;
t.attribute.err_lex[0] = c = b_getc(sc_buf);
t.attribute.err_lex[1] = c = b_getc(sc_buf);
b_retract(sc_buf);
return t;
}
case '=':
@ -165,68 +174,87 @@ which is being processed by the scanner.
if (c == '=') { /* Relational equals-to operator */
t.code = REL_OP_T;
t.attribute.rel_op = EQ;
return t;
}
b_retract(sc_buf);
t.code = ASS_OP_T; /* Assignment operator */
return t;
case '\"': /* Don't quote me on this */
c = b_getc(sc_buf);
t.code = STR_T; /* String literal */
b_setmark(sc_buf, b_getcoffset(sc_buf));
lexstart = (short)str_offset;
lexend = lexstart;
for (; c != '\"'; c = b_getc(sc_buf)) {
b_addc(str_LTBL, c);
if (b_isfull(str_LTBL)) {
return aa_table[ES]("\"Imagine all the .."); /* String too big :( */
t = aa_table[ES]("\"There is always ..."); /* String too big :( */
}
if (c == '\n' || c == '\r') {
line++;
}
if (c == 255 || c == '\0') {
b_retract_to_mark(sc_buf);
t.code = ERR_T; /* Illegal string, make it an error token */
for (i = 0; i < ERR_LEN; i++) {
t.attribute.err_lex[i] = b_getc(sc_buf);
}
t.attribute.err_lex[i] = '\0';
return t;
}
lexend++;
str_offset++;
} /*end for loop, string finished*/
str_offset++;
b_addc(str_LTBL, '\0');
t.code = STR_T;
b_addc(str_LTBL, '\0');
t.attribute.str_offset = lexstart;
return t; /* String literal */
return t;
default:
if (isalnum(c) || isalpha(c)) {
if (isalpha(c) || isalnum(c)) {
/*Set mark to beginning of lexeme*/
b_setmark(sc_buf, b_getcoffset(sc_buf) - 1);
lexstart = 0;
lexend = 0;
state = 0;
lex_buf = b_create(1, 1, 'a');
while (accept == NOAS) {
b_addc(lex_buf, c);
state = get_next_state(state, c, &accept);
if (accept != NOAS)
break;
if (accept != NOAS) { break; }
c = b_getc(sc_buf);
lexend++;
}
/* Entering Accepting State */
b_addc(lex_buf, '\0');
/*
* Entering Accepting State
*/
if (as_table[state] == ASWR)
b_retract(sc_buf);
if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) {
/* Get start/end of lexeme */
lexstart = b_mark(sc_buf);
lexend = b_getcoffset(sc_buf);
lex_buf = b_create(1, 1, 'a');
b_retract_to_mark(sc_buf);
for (; lexstart < lexend; lexstart++) {
b_addc(lex_buf, b_getc(sc_buf));
}
b_addc(lex_buf, '\0');
/*if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) {
t.code = KW_T;
b_free(lex_buf);
return t;
}
}*/
if (aa_table[state] != NULL) {
t = aa_table[state](b_setmark(lex_buf, 0));
}
else {
scerrnum = 1;
t = aa_table[ES]("RUN TIME ERROR");
}
b_free(lex_buf);
@ -236,10 +264,11 @@ which is being processed by the scanner.
t = aa_table[ES](" ");
t.attribute.err_lex[0] = c;
}
return t;
}
///* special cases or token driven processing */
/* special cases or token driven processing */
//
//WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE.
//COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE.
@ -387,6 +416,7 @@ or #undef DEBUF is used - see the top of the file.
int char_class(char c)
{
int val;
if (isalpha(c))
val = 0;
else if (c == '0')
@ -415,22 +445,32 @@ REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER
*/
Token aa_func02(char lexeme[]) {
unsigned int kw_i; /* Variable to contain keyword table index */
unsigned int i, kw_idx; /* Variable to contain keyword table index */
Token t;
char* temp_str;
if ((kw_i = iskeyword(lexeme)) > -1) { /* Keyword check */
#ifdef DEBUG
printf("Lexeme: '%s'\n size of: %lu\n", lexeme, sizeof(lexeme));
#endif
kw_idx = iskeyword(lexeme);
if (kw_idx != -1) { /* Keyword check */
t.code = KW_T;
t.attribute.kwt_idx = kw_i;
t.attribute.kwt_idx = kw_idx;
return t;
}
/* Not a keyword? Must be AVID*/
if ((temp_str = (char*)calloc(VID_LEN + 1, sizeof(char))) == NULL) {
return aa_table[ES]("RUN TIME ERROR");
}
strncpy(temp_str, lexeme, VID_LEN);
for (i = 0; i < (VID_LEN) && i < strlen(lexeme); i++) {
temp_str[i] = lexeme[i];
}/*
temp_str[strlen(temp_str)] = '\0';*/
strncpy(t.attribute.vid_lex, temp_str, VID_LEN);
t.attribute.vid_lex[strlen(temp_str)] = '\0';
free(temp_str);
switch (lexeme[0]) { /* Read first character of lexeme for implicit type (not used yet?)*/
@ -444,7 +484,7 @@ Token aa_func02(char lexeme[]) {
/* Floating point*/
break;
}
t.code = AVID_T;
return t;
/*
@ -468,15 +508,21 @@ REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER
*/
Token aa_func03(char lexeme[]) {
Token t;
unsigned int i;
char* temp_str;
if ((temp_str = (char*)calloc(VID_LEN + 2, sizeof(char))) == NULL) {
return aa_table[ES]("RUN TIME ERROR");
}
strncpy(temp_str, lexeme, VID_LEN);
temp_str[strlen(temp_str)] = '#'; /* Add# to end of the SVID */
for (i = 0; i < (VID_LEN) && i < strlen(lexeme); i++) {
temp_str[i] = lexeme[i];
}
temp_str[strlen(temp_str) - 1] = '#'; /* Add# to end of the SVID */
temp_str[strlen(temp_str)] = '\0';
strncpy(t.attribute.vid_lex, temp_str, VID_LEN);
t.attribute.vid_lex[strlen(temp_str)] = '\0';
free(temp_str);
t.code = SVID_T;
@ -506,7 +552,6 @@ Token aa_func05(char lexeme[]) {
}
t.code = INL_T;
t.attribute.int_value = temp_num;
return t;
/*
THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING A DECIMAL CONSTANT
@ -532,12 +577,13 @@ Token aa_func08(char lexeme[]) {
}
temp_dbl = atof(lexeme);
#ifdef DEBUG
printf("Lexeme: '%s' | FLT value: %f \n", lexeme, temp_dbl);
#endif
if ((temp_dbl > FLT_MAX) || (temp_dbl < 0)) {
t = aa_table[ES](lexeme);
}
t.attribute.flt_value = (float)temp_dbl;
return t;
/*
THE FUNCTION MUST CONVERT THE LEXEME TO A FLOATING POINT VALUE,
@ -592,16 +638,12 @@ err_lex C-type string.
/*ACCEPTING FUNCTION FOR THE ERROR TOKEN */
Token aa_func12(char lexeme[]) {
Token aa_func13(char lexeme[]) {
Token t;
unsigned int i;
t.code = ERR_T;
for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++) {
for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++)
t.attribute.err_lex[i] = lexeme[i];
}
t.attribute.err_lex[i] = '\0';
return t;
@ -641,7 +683,7 @@ int iskeyword(char * kw_lexeme) {
if (kw_lexeme == NULL) return -1;
for (i = 0; i < KWT_SIZE; i++) {
if (strcmp(kw_table[i], kw_lexeme) == 0) return i;
if (strcmp(kw_table[i], kw_lexeme) == 0) { return i; }
}
return -1;
}

43
table.h
View File

@ -34,21 +34,17 @@
* .AND., .OR. , SEOF, 'wrong symbol',
*/
//REPLACE *ESN* WITH YOUR ERROR STATE NUMBER
#define ES 12 /* Error state */
#define ES 13 /* Error state */
#define IS -1 /* Invalid state */
/* State transition table definition */
//REPLACE *CN* WITH YOUR COLUMN NUMBER
#define TABLE_COLUMNS 7
/*transition table - type of states defined in separate table */
int st_table[][TABLE_COLUMNS] = {
/* INPUT COLUMNS:
COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 |
[a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other
|[a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other|
*/
/* State 0 */ {1, 6 , 4 , 4 , IS , IS , IS},
/* State 1 */ {1, 1 , 1 , 1 , ES , 3 , 2},
@ -57,22 +53,18 @@ int st_table[][TABLE_COLUMNS] = {
/* State 4 */ {ES, 4 , 4 , 4 , 7 , 5 , 5},
/* State 5 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 6 */ {ES, 9 , 9, ES, 7 , ES , 5},
/* State 7 */ {ES, 7 , 7, 7, ES , 8 , 8},
/* State 7 */ {ES, 7 , 7, 7, 8 , 8 , 8},
/* State 8 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 9 */ {ES, 9 , 9, ES, ES , ES , 10},
/* State 10 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 11 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 12 */ {IS, IS , IS, IS, IS , IS , IS},
/* State 13 */ {IS, IS , IS, IS, IS , IS , IS}
//
//. YOUR TABLE INITIALIZATION HERE
//.
///* State N */ {YOUR INITIALIZATION},
};
/* Accepting state table definition */
//REPLACE *N1*, *N2*, and *N3* WITH YOUR NUMBERS
#define ASWR 2 /* accepting state with retract */
#define ASNR 3 /* accepting state with no retract */
#define ASWR 1 /* accepting state with retract */
#define ASNR 2 /* accepting state with no retract */
#define NOAS 0 /* not accepting state */
int as_table[] = {
@ -96,19 +88,12 @@ int as_table[] = {
/* Accepting action function declarations */
//FOR EACH OF YOUR ACCEPTING STATES YOU MUST PROVIDE
//ONE FUNCTION PROTOTYPE. THEY ALL RETURN Token AND TAKE
//ONE ARGUMENT: A string REPRESENTING A TOKEN LEXEME.
// Example: Token aa_funcXX(char *lexeme);
Token aa_func02(char* lexeme); // VID AVID/KW
Token aa_func03(char *lexeme); // VID SVID
Token aa_func05(char *lexeme); // DIL
Token aa_func08(char *lexeme); // FPL
Token aa_func10(char *lexeme); // OIL
Token aa_func12(char *lexeme); // ES
//Replace XX with the number of the accepting state: 02, 03 and so on.
Token aa_func02(char* lexeme); /* AVID/KW */
Token aa_func03(char* lexeme); /* SVID */
Token aa_func05(char* lexeme); /* DIL */
Token aa_func08(char* lexeme); /* FPL */
Token aa_func10(char* lexeme); /* OIL */
Token aa_func13(char* lexeme); /* ES */
/* defining a new type: pointer to function (of one char * argument)
returning Token
@ -136,8 +121,8 @@ PTR_AAF aa_table[] = {
/* State 9 */ NULL,
/* State 10 */ aa_func10,
/* State 11 */ NULL,
/* State 12 */ aa_func12,
/* State 13 */ NULL
/* State 12 */ NULL,
/* State 13 */ aa_func13
//HERE YOU MUST PROVIDE AN INITIALIZATION FOR AN ARRAY OF POINTERS
//TO ACCEPTING FUNCTIONS. THE ARRAY HAS THE SAME SIZE AS as_table[ ].