diff --git a/PLATYPUS_Transition_Table.xlsx b/PLATYPUS_Transition_Table.xlsx index fb1de27..4448f81 100644 Binary files a/PLATYPUS_Transition_Table.xlsx and b/PLATYPUS_Transition_Table.xlsx differ diff --git a/buffer.h b/buffer.h index dd0039e..8bb2928 100755 --- a/buffer.h +++ b/buffer.h @@ -91,6 +91,5 @@ char b_rflag(Buffer* const); short b_retract(Buffer* const); short b_retract_to_mark(Buffer* const); short b_getcoffset(Buffer* const); -char* b_cbhead(Buffer* const); #endif diff --git a/buffer/buffer.sln b/buffer/buffer.sln index b361415..1337add 100644 --- a/buffer/buffer.sln +++ b/buffer/buffer.sln @@ -9,12 +9,18 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "scanner", "scanner\scanner. EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution + COMPILERS|x64 = COMPILERS|x64 + COMPILERS|x86 = COMPILERS|x86 Debug|x64 = Debug|x64 Debug|x86 = Debug|x86 Release|x64 = Release|x64 Release|x86 = Release|x86 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution + {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x64.ActiveCfg = COMPILERS|x64 + {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x64.Build.0 = COMPILERS|x64 + {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x86.ActiveCfg = COMPILERS|Win32 + {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.COMPILERS|x86.Build.0 = COMPILERS|Win32 {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x64.ActiveCfg = Debug|x64 {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x64.Build.0 = Debug|x64 {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Debug|x86.ActiveCfg = Debug|Win32 @@ -23,6 +29,10 @@ Global {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x64.Build.0 = Release|x64 {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x86.ActiveCfg = Release|Win32 {034E0E4B-DAFA-45D4-882B-1815AC73DA9E}.Release|x86.Build.0 = Release|Win32 + {C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x64.ActiveCfg = COMPILERS|x64 + {C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x64.Build.0 = COMPILERS|x64 + {C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x86.ActiveCfg = COMPILERS|Win32 + {C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.COMPILERS|x86.Build.0 = COMPILERS|Win32 {C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x64.ActiveCfg = Debug|x64 {C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x64.Build.0 = Debug|x64 {C53261F2-8A2D-4F75-8E54-DBFD16A621BB}.Debug|x86.ActiveCfg = Debug|Win32 diff --git a/scanner.c b/scanner.c index 3067466..c5b6846 100755 --- a/scanner.c +++ b/scanner.c @@ -10,11 +10,11 @@ * Date: 30 January 2017 */ -/* The #define _CRT_SECURE_NO_WARNINGS should be used in MS Visual Studio projects - * to suppress the warnings about using "unsafe" functions like fopen() - * and standard sting library functions defined in string.h. - * The define does not have any effect in Borland compiler projects. - */ + /* The #define _CRT_SECURE_NO_WARNINGS should be used in MS Visual Studio projects + * to suppress the warnings about using "unsafe" functions like fopen() + * and standard sting library functions defined in string.h. + * The define does not have any effect in Borland compiler projects. + */ #define _CRT_SECURE_NO_WARNINGS #include /* standard input / output */ @@ -24,7 +24,7 @@ #include /* integer types constants */ #include /* floating-point types constants */ -/*#define NDEBUG to suppress assert() call */ + /*#define NDEBUG to suppress assert() call */ #include /* assert() prototype */ /* project header files */ @@ -33,7 +33,7 @@ #include "table.h" #define DEBUG /* for conditional processing */ -#undef DEBUG +/*#undef DEBUG*/ @@ -49,14 +49,14 @@ static Buffer *lex_buf;/*pointer to temporary lexeme buffer*/ /* No other global variable declarations/definitiond are allowed */ -/* scanner.c static(local) function prototypes */ +/* scanner.c static(local) function prototypes */ static int char_class(char c); /* character class function */ static int get_next_state(int, char, int *); /* state machine function */ static int iskeyword(char * kw_lexeme); /*keywords lookup functuion */ static long atool(char * lexeme); /* converts octal string to decimal value */ int scanner_init(Buffer * sc_buf) { - if(b_isempty(sc_buf)) return EXIT_FAILURE;/*1*/ + if (b_isempty(sc_buf)) return EXIT_FAILURE;/*1*/ /* in case the buffer has been read previously */ b_setmark(sc_buf, 0); b_retract_to_mark(sc_buf); @@ -68,272 +68,301 @@ int scanner_init(Buffer * sc_buf) { Token malar_next_token(Buffer * sc_buf) { - Token t; /* token to return after recognition */ - unsigned char c; /* input symbol */ - int state = 0; /* initial state of the FSM */ - short lexstart; /*start offset of a lexeme in the input buffer */ - short lexend; /*end offset of a lexeme in the input buffer */ - int accept = NOAS; /* type of state - initially not accepting */ -/* -lexstart is the offset from the beginning of the char buffer of the -input buffer (sc_buf) to the first character of the current lexeme, -which is being processed by the scanner. -lexend is the offset from the beginning of the char buffer of the -input buffer (sc_buf) to the last character of the current lexeme, -which is being processed by the scanner. + Token t; /* token to return after recognition */ + unsigned char c; /* input symbol */ + int state = 0; /* initial state of the FSM */ + short lexstart; /*start offset of a lexeme in the input buffer */ + short lexend; /*end offset of a lexeme in the input buffer */ + int accept = NOAS; /* type of state - initially not accepting */ + /* + lexstart is the offset from the beginning of the char buffer of the + input buffer (sc_buf) to the first character of the current lexeme, + which is being processed by the scanner. + lexend is the offset from the beginning of the char buffer of the + input buffer (sc_buf) to the last character of the current lexeme, + which is being processed by the scanner. -*/ - - - //DECLARE YOUR VARIABLES HERE IF NEEDED - int i; /* Counter for loop in string error case */ - static int str_offset = 0; - - if (sc_buf == NULL) { - return aa_func12("RUN TIME ERROR"); /* WHOOPS */ - } - - while (1){ /* endless loop broken by token returns it will generate a warning */ - - //GET THE NEXT SYMBOL FROM THE INPUT BUFFER - + */ + + + //DECLARE YOUR VARIABLES HERE IF NEEDED + int i; /* Counter for loop in string error case */ + static int str_offset = 0; + + if (sc_buf == NULL) { + scerrnum = 1; + return aa_table[ES]("RUN TIME ERROR"); /* WHOOPS */ + } + + while (1) { /* endless loop broken by token returns it will generate a warning */ + + /* GET THE NEXT SYMBOL FROM THE INPUT BUFFER */ + + c = b_getc(sc_buf); + + switch (c) { + case 255: t.code = SEOF_T; return t; /* EOF */ + case '\0': t.code = SEOF_T; return t; /* Source EOF */ + case '\n': line++; continue; /* Ignore new line, increment line count */ + case '\r': line++; continue; /* CR, increment line count*/ + case ' ': continue; /* Ignore white space */ + case '\t': continue; /* Ignore tabs */ + case ';': t.code = EOS_T; return t; /* End of statement */ + case ',': t.code = COM_T; return t; /* Comma */ + case '{': t.code = LBR_T; return t; /* Left brace */ + case '}': t.code = RBR_T; return t; /* Right brace */ + case '(': t.code = LPR_T; return t; /* Left parenthesis */ + case ')': t.code = RPR_T; return t; /* Right parenthesis */ + case '+': t.code = ART_OP_T; t.attribute.arr_op = PLUS; return t; /* Addition operator */ + case '-': t.code = ART_OP_T; t.attribute.arr_op = MINUS; return t; /* Substraction operator */ + case '*': t.code = ART_OP_T; t.attribute.arr_op = MULT; return t; /* Multiplication operator */ + case '/': t.code = ART_OP_T; t.attribute.arr_op = DIV; return t; /* Devision operator */ + case '>': t.code = REL_OP_T; t.attribute.rel_op = GT; return t; /* Greater-than relational operator */ + case '<': c = b_getc(sc_buf); - switch (c) { - case 255: t.code = SEOF_T; return t; /* EOF */ - case '\0': t.code = SEOF_T; return t; /* Source EOF */ - case '\n': line++; continue; /* Ignore new line, increment line count */ - case '\r': line++; continue; /* CR, increment line count*/ - case ' ': continue; /* Ignore white space */ - case ';': t.code = EOS_T; return t; /* End of statement */ - case ',': t.code = COM_T; return t; /* Comma */ - case '{': t.code = RBR_T; return t; /* Right brace */ - case '}': t.code = LBR_T; return t; /* Left brace */ - case '(': t.code = RPR_T; return t; /* Right parenthesis */ - case ')': t.code = LPR_T; return t; /* Left parenthesis */ - case '+': t.code = ART_OP_T; t.attribute.arr_op = PLUS; return t; /* Addition operator */ - case '-': t.code = ART_OP_T; t.attribute.arr_op = MINUS; return t; /* Substraction operator */ - case '*': t.code = ART_OP_T; t.attribute.arr_op = MULT; return t; /* Multiplication operator */ - case '/': t.code = ART_OP_T; t.attribute.arr_op = DIV; return t; /* Devision operator */ - case '>': t.code = REL_OP_T; t.attribute.rel_op = GT; return t; /* Greater-than relational operator */ - case '<': - /* MSVC will complain about this assignment inside a conditional expression*/ - if (c = b_getc(sc_buf) == '>') { - t.code = REL_OP_T; - t.attribute.rel_op = NE; /* Negation operator */ - return t; - } - else if (c == '<') { - t.code = SCC_OP_T; /* String concatenation operator */ - } - else { - t.code = REL_OP_T; - t.attribute.rel_op = LT; /* Less-than operator */ - } + if (c == '>') { + t.code = REL_OP_T; + t.attribute.rel_op = NE; /* Negation operator */ + return t; + } + else if (c == '<') { + t.code = SCC_OP_T; /* String concatenation operator */ + } + else { + t.code = REL_OP_T; + t.attribute.rel_op = LT; /* Less-than operator */ + } + b_retract(sc_buf); + /*c = b_getc(sc_buf);*/ + return t; + case '.': + b_setmark(sc_buf, b_getcoffset(sc_buf)); /* Set mark before continuing (AND|OR case) */ + c = b_getc(sc_buf); + if (c == 'A' && b_getc(sc_buf) == 'N' && b_getc(sc_buf) == 'D' && b_getc(sc_buf) == '.') { + t.code = LOG_OP_T; + t.attribute.log_op = AND; + return t; + } + else if (c == 'O' && b_getc(sc_buf) == 'R' && b_getc(sc_buf) == '.') { + t.code = LOG_OP_T; + t.attribute.log_op = OR; + return t; + } + t.code = ERR_T; /* "That character's not supposed to be here" case */ + t.attribute.err_lex[0] = '.'; + t.attribute.err_lex[1] = '\0'; + b_retract_to_mark(sc_buf); + return t; + case '!': + c = b_getc(sc_buf); + if (c == '<') { /* It's a comment line */ + for (; c != '\0' && c != '\r' && c != '\n' && c != 255; c = b_getc(sc_buf)); /* Consume chars until line ends */ + line++; + continue; + } + else { /* Bad character, pump out an error token */ + b_retract(sc_buf); + b_retract(sc_buf); + t = aa_table[ES](" "); + t.attribute.err_lex[0] = c = b_getc(sc_buf); + t.attribute.err_lex[1] = c = b_getc(sc_buf); b_retract(sc_buf); - c = b_getc(sc_buf); return t; - case '.': - b_setmark(sc_buf, b_getcoffset(sc_buf)); /* Set mark before continuing (AND|OR case) */ - if (c == 'A' && b_getc(sc_buf) == 'N' && b_getc(sc_buf) == 'D' && b_getc(sc_buf) == '.') { - t.code = LOG_OP_T; - t.attribute.log_op = AND; - return t; - } - else if (c == 'O' && b_getc(sc_buf) == 'R' && b_getc(sc_buf) == '.') { - t.code = LOG_OP_T; - t.attribute.log_op = OR; - } - t.code = ERR_T; /* "That character's not supposed to be here" case */ - t.attribute.err_lex[0] = '.'; - t.attribute.err_lex[1] = '\0'; - b_retract_to_mark(sc_buf); + } + case '=': + c = b_getc(sc_buf); + if (c == '=') { /* Relational equals-to operator */ + t.code = REL_OP_T; + t.attribute.rel_op = EQ; return t; - case '!': - c = b_getc(sc_buf); - if (c == '<') { /* It's a comment line */ - for (; c != '\0' && c != '\r' && c != '\n' && c != 255; c = b_getc(sc_buf)); /* Consume chars until line ends */ + } + b_retract(sc_buf); + t.code = ASS_OP_T; /* Assignment operator */ + return t; + case '\"': /* Don't quote me on this */ + c = b_getc(sc_buf); + t.code = STR_T; /* String literal */ + b_setmark(sc_buf, b_getcoffset(sc_buf)); + lexstart = (short)str_offset; + lexend = lexstart; + for (; c != '\"'; c = b_getc(sc_buf)) { + b_addc(str_LTBL, c); + if (b_isfull(str_LTBL)) { + t = aa_table[ES]("\"There is always ..."); /* String too big :( */ + } + if (c == '\n' || c == '\r') { line++; - continue; } - else { /* Bad character, pump out an error token */ - t = aa_table[ES](" "); - t.attribute.err_lex[0] = c; + if (c == 255 || c == '\0') { + b_retract_to_mark(sc_buf); + t.code = ERR_T; /* Illegal string, make it an error token */ + for (i = 0; i < ERR_LEN; i++) { + t.attribute.err_lex[i] = b_getc(sc_buf); + } + t.attribute.err_lex[i] = '\0'; return t; } - case '=': - c = b_getc(sc_buf); - if (c == '=') { /* Relational equals-to operator */ - t.code = REL_OP_T; - t.attribute.rel_op = EQ; - } - b_retract(sc_buf); - t.code = ASS_OP_T; /* Assignment operator */ - return t; - case '\"': /* Don't quote me on this */ - c = b_getc(sc_buf); - b_setmark(sc_buf, b_getcoffset(sc_buf)); - lexstart = (short)str_offset; - lexend = lexstart; - for (; c != '\"'; c = b_getc(sc_buf)) { - b_addc(str_LTBL, c); - if (b_isfull(str_LTBL)) { - return aa_table[ES]("\"Imagine all the .."); /* String too big :( */ - } - if (c == '\n' || c == '\r') { - line++; - } - if (c == 255 || c == '\0') { - b_retract_to_mark(sc_buf); - for (i = 0; i < ERR_LEN; i++) { - t.attribute.err_lex[i] = b_getc(sc_buf); - } - } - lexend++; - str_offset++; - } /*end for loop, string finished*/ + lexend++; str_offset++; - b_addc(str_LTBL, '\0'); - - t.code = STR_T; - t.attribute.str_offset = lexstart; - return t; /* String literal */ - default: - if (isalnum(c) || isalpha(c)) { - lexend = 0; - state = 0; - lex_buf = b_create(1, 1, 'a'); + } /*end for loop, string finished*/ - while (accept == NOAS) { - b_addc(lex_buf, c); - state = get_next_state(state, c, &accept); + b_addc(str_LTBL, '\0'); + t.attribute.str_offset = lexstart; + + return t; + default: + if (isalpha(c) || isalnum(c)) { - if (accept != NOAS) - break; - c = b_getc(sc_buf); - lexend++; - } - /* Entering Accepting State */ - b_addc(lex_buf, '\0'); + /*Set mark to beginning of lexeme*/ + b_setmark(sc_buf, b_getcoffset(sc_buf) - 1); + lexstart = 0; + lexend = 0; + state = 0; - if (as_table[state] == ASWR) - b_retract(sc_buf); - if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) { - t.code = KW_T; - b_free(lex_buf); - return t; - } + while (accept == NOAS) { + state = get_next_state(state, c, &accept); - if (aa_table[state] != NULL) { - t = aa_table[state](b_setmark(lex_buf, 0)); - } - else { - t = aa_table[ES]("RUN TIME ERROR"); - } + if (accept != NOAS) { break; } + + c = b_getc(sc_buf); + } + + /* + * Entering Accepting State + */ + + if (as_table[state] == ASWR) + b_retract(sc_buf); + + /* Get start/end of lexeme */ + lexstart = b_mark(sc_buf); + lexend = b_getcoffset(sc_buf); + lex_buf = b_create(1, 1, 'a'); + + b_retract_to_mark(sc_buf); + for (; lexstart < lexend; lexstart++) { + b_addc(lex_buf, b_getc(sc_buf)); + } + b_addc(lex_buf, '\0'); + /*if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) { + t.code = KW_T; b_free(lex_buf); - } + return t; + }*/ - else { - t = aa_table[ES](" "); - t.attribute.err_lex[0] = c; + if (aa_table[state] != NULL) { + t = aa_table[state](b_setmark(lex_buf, 0)); } + else { + scerrnum = 1; + t = aa_table[ES]("RUN TIME ERROR"); + } + b_free(lex_buf); } - -///* special cases or token driven processing */ -// -//WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE. -//COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE. -// -//WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT -//INSTEAD OF if-else TO PROCESS THE SPECIAL CASES -//DO NOT FORGET TO COUNT THE PROGRAM LINES -// -// -// IF (c == SOME CHARACTER) -// ... -// SKIP CHARACTER (FOR EXAMPLE SPACE) -// continue; -// OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE)) -// return t; -// EXAMPLE: -// if (c == ' ') continue; -// if (c == '{'){ t.code = RBR_T; /*no attribute */ return t; -// if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t; -// ... -// -// IF (c == '.') TRY TO PROCESS .AND. or .OR. -// IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING -// RETURN AN ERROR TOKEN -// IF (c == '!') TRY TO PROCESS COMMENT -// IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR -// ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue; -// ... -// IF STRING (FOR EXAMPLE, "text") IS FOUND -// SET MARK TO MARK THE BEGINNING OF THE STRING -// IF THE STRING IS LEGAL -// USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL -// ADD '\0' at the end make the string C-type string -// SET STRING TOKEN -// (the attribute of the string token is the offset from -// the beginning of the str_LTBL char buffer to the beginning -// of the string (TEXT in the example)) -// -// return t; -// ELSE -// THE STRING LITERAL IS ILLEGAL -// SET ERROR TOKEN FOR ILLEGAL STRING (see assignment) -// DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL -// -// return t; -// -// IF(c == ANOTHER CHARACTER) -// SET TOKEN -// return t; -/* Process state transition table */ - - //IF (c is a digit OR c is a letter){ - // - //SET THE MARK AT THE BEGINING OF THE LEXEME - //b_setmark(sc_buf,forward); - // .... - //CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA) - //IT IMPLEMENTS THE FOLLOWING ALGORITHM: - // - //FSM0. Begin with state = 0 and the input character c - //FSM1. Get the next state from the transition table calling - // state = get_next_state(state, c, &accept); - //FSM2. Get the next character - //FSM3. If the state is not accepting (accept == NOAS), go to step FSM1 - // If the step is accepting, token is found, leave the machine and - // call an accepting function as described below. - // - // - //RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE - //GET THE BEGINNING AND THE END OF THE LEXEME - //lexstart = b_getmark(sc_buf); - //SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION - //CREATE A TEMPORRARY LEXEME BUFFER HERE; - //lex_buf = b_create(...); - // . RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND - // . USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...), - // . WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED - // . YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH - // . CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE - // . CALLED IS STORED IN THE VARIABLE state. - // . YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME. - // . THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf. - // .... - // b_free(lex_buf); - // return t; - // - // CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT. - // FOR ILLEGAL CHARACTERS SET ERROR TOKEN. - // THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN - // IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE - // A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum - // AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST - // BE THE STRING "RUN TIME ERROR: " - }//end while(1) + else { + t = aa_table[ES](" "); + t.attribute.err_lex[0] = c; + } + return t; + } + + + /* special cases or token driven processing */ + // + //WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE. + //COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE. + // + //WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT + //INSTEAD OF if-else TO PROCESS THE SPECIAL CASES + //DO NOT FORGET TO COUNT THE PROGRAM LINES + // + // + // IF (c == SOME CHARACTER) + // ... + // SKIP CHARACTER (FOR EXAMPLE SPACE) + // continue; + // OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE)) + // return t; + // EXAMPLE: + // if (c == ' ') continue; + // if (c == '{'){ t.code = RBR_T; /*no attribute */ return t; + // if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t; + // ... + // + // IF (c == '.') TRY TO PROCESS .AND. or .OR. + // IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING + // RETURN AN ERROR TOKEN + // IF (c == '!') TRY TO PROCESS COMMENT + // IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR + // ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue; + // ... + // IF STRING (FOR EXAMPLE, "text") IS FOUND + // SET MARK TO MARK THE BEGINNING OF THE STRING + // IF THE STRING IS LEGAL + // USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL + // ADD '\0' at the end make the string C-type string + // SET STRING TOKEN + // (the attribute of the string token is the offset from + // the beginning of the str_LTBL char buffer to the beginning + // of the string (TEXT in the example)) + // + // return t; + // ELSE + // THE STRING LITERAL IS ILLEGAL + // SET ERROR TOKEN FOR ILLEGAL STRING (see assignment) + // DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL + // + // return t; + // + // IF(c == ANOTHER CHARACTER) + // SET TOKEN + // return t; + /* Process state transition table */ + + //IF (c is a digit OR c is a letter){ + // + //SET THE MARK AT THE BEGINING OF THE LEXEME + //b_setmark(sc_buf,forward); + // .... + //CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA) + //IT IMPLEMENTS THE FOLLOWING ALGORITHM: + // + //FSM0. Begin with state = 0 and the input character c + //FSM1. Get the next state from the transition table calling + // state = get_next_state(state, c, &accept); + //FSM2. Get the next character + //FSM3. If the state is not accepting (accept == NOAS), go to step FSM1 + // If the step is accepting, token is found, leave the machine and + // call an accepting function as described below. + // + // + //RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE + //GET THE BEGINNING AND THE END OF THE LEXEME + //lexstart = b_getmark(sc_buf); + //SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION + //CREATE A TEMPORRARY LEXEME BUFFER HERE; + //lex_buf = b_create(...); + // . RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND + // . USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...), + // . WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED + // . YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH + // . CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE + // . CALLED IS STORED IN THE VARIABLE state. + // . YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME. + // . THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf. + // .... + // b_free(lex_buf); + // return t; + // + // CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT. + // FOR ILLEGAL CHARACTERS SET ERROR TOKEN. + // THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN + // IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE + // A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum + // AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST + // BE THE STRING "RUN TIME ERROR: " + }//end while(1) } @@ -347,37 +376,37 @@ int get_next_state(int state, char c, int *accept) col = char_class(c); next = st_table[state][col]; #ifdef DEBUG -printf("Input symbol: %c Row: %d Column: %d Next: %d \n",c,state,col,next); + printf("Input symbol: %c Row: %d Column: %d Next: %d \n", c, state, col, next); #endif -/* -The assert(int test) macro can be used to add run-time diagnostic to programs -and to "defend" from producing unexpected results. -assert() is a macro that expands to an if statement; -if test evaluates to false (zero) , assert aborts the program -(by calling abort()) and sends the following message on stderr: + /* + The assert(int test) macro can be used to add run-time diagnostic to programs + and to "defend" from producing unexpected results. + assert() is a macro that expands to an if statement; + if test evaluates to false (zero) , assert aborts the program + (by calling abort()) and sends the following message on stderr: -Assertion failed: test, file filename, line linenum + Assertion failed: test, file filename, line linenum -The filename and linenum listed in the message are the source file name -and line number where the assert macro appears. -If you place the #define NDEBUG directive ("no debugging") -in the source code before the #include directive, -the effect is to comment out the assert statement. -*/ - assert(next != IS); + The filename and linenum listed in the message are the source file name + and line number where the assert macro appears. + If you place the #define NDEBUG directive ("no debugging") + in the source code before the #include directive, + the effect is to comment out the assert statement. + */ + assert(next != IS); -/* -The other way to include diagnostics in a program is to use -conditional preprocessing as shown bellow. It allows the programmer -to send more details describing the run-time problem. -Once the program is tested thoroughly #define DEBUG is commented out -or #undef DEBUF is used - see the top of the file. -*/ + /* + The other way to include diagnostics in a program is to use + conditional preprocessing as shown bellow. It allows the programmer + to send more details describing the run-time problem. + Once the program is tested thoroughly #define DEBUG is commented out + or #undef DEBUF is used - see the top of the file. + */ #ifdef DEBUG - if(next == IS){ - printf("Scanner Error: Illegal state:\n"); - printf("Input symbol: %c Row: %d Column: %d\n",c,state,col); - exit(1); + if (next == IS) { + printf("Scanner Error: Illegal state:\n"); + printf("Input symbol: %c Row: %d Column: %d\n", c, state, col); + exit(1); } #endif *accept = as_table[next]; @@ -387,6 +416,7 @@ or #undef DEBUF is used - see the top of the file. int char_class(char c) { int val; + if (isalpha(c)) val = 0; else if (c == '0') @@ -415,22 +445,32 @@ REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER */ Token aa_func02(char lexeme[]) { - unsigned int kw_i; /* Variable to contain keyword table index */ + unsigned int i, kw_idx; /* Variable to contain keyword table index */ Token t; char* temp_str; - if ((kw_i = iskeyword(lexeme)) > -1) { /* Keyword check */ +#ifdef DEBUG + printf("Lexeme: '%s'\n size of: %lu\n", lexeme, sizeof(lexeme)); +#endif + + kw_idx = iskeyword(lexeme); + if (kw_idx != -1) { /* Keyword check */ t.code = KW_T; - t.attribute.kwt_idx = kw_i; + t.attribute.kwt_idx = kw_idx; return t; } /* Not a keyword? Must be AVID*/ if ((temp_str = (char*)calloc(VID_LEN + 1, sizeof(char))) == NULL) { return aa_table[ES]("RUN TIME ERROR"); } - strncpy(temp_str, lexeme, VID_LEN); + + for (i = 0; i < (VID_LEN) && i < strlen(lexeme); i++) { + temp_str[i] = lexeme[i]; + }/* + temp_str[strlen(temp_str)] = '\0';*/ strncpy(t.attribute.vid_lex, temp_str, VID_LEN); + t.attribute.vid_lex[strlen(temp_str)] = '\0'; free(temp_str); switch (lexeme[0]) { /* Read first character of lexeme for implicit type (not used yet?)*/ @@ -444,7 +484,7 @@ Token aa_func02(char lexeme[]) { /* Floating point*/ break; } - + t.code = AVID_T; return t; /* @@ -468,15 +508,21 @@ REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER */ Token aa_func03(char lexeme[]) { Token t; + unsigned int i; char* temp_str; if ((temp_str = (char*)calloc(VID_LEN + 2, sizeof(char))) == NULL) { return aa_table[ES]("RUN TIME ERROR"); } - strncpy(temp_str, lexeme, VID_LEN); - temp_str[strlen(temp_str)] = '#'; /* Add# to end of the SVID */ + for (i = 0; i < (VID_LEN) && i < strlen(lexeme); i++) { + temp_str[i] = lexeme[i]; + } + + temp_str[strlen(temp_str) - 1] = '#'; /* Add# to end of the SVID */ + temp_str[strlen(temp_str)] = '\0'; strncpy(t.attribute.vid_lex, temp_str, VID_LEN); + t.attribute.vid_lex[strlen(temp_str)] = '\0'; free(temp_str); t.code = SVID_T; @@ -506,7 +552,6 @@ Token aa_func05(char lexeme[]) { } t.code = INL_T; t.attribute.int_value = temp_num; - return t; /* THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING A DECIMAL CONSTANT @@ -532,12 +577,13 @@ Token aa_func08(char lexeme[]) { } temp_dbl = atof(lexeme); - +#ifdef DEBUG + printf("Lexeme: '%s' | FLT value: %f \n", lexeme, temp_dbl); +#endif if ((temp_dbl > FLT_MAX) || (temp_dbl < 0)) { t = aa_table[ES](lexeme); } t.attribute.flt_value = (float)temp_dbl; - return t; /* THE FUNCTION MUST CONVERT THE LEXEME TO A FLOATING POINT VALUE, @@ -592,16 +638,12 @@ err_lex C-type string. /*ACCEPTING FUNCTION FOR THE ERROR TOKEN */ -Token aa_func12(char lexeme[]) { +Token aa_func13(char lexeme[]) { Token t; unsigned int i; - t.code = ERR_T; - - for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++) { + for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++) t.attribute.err_lex[i] = lexeme[i]; - } - t.attribute.err_lex[i] = '\0'; return t; @@ -641,7 +683,7 @@ int iskeyword(char * kw_lexeme) { if (kw_lexeme == NULL) return -1; for (i = 0; i < KWT_SIZE; i++) { - if (strcmp(kw_table[i], kw_lexeme) == 0) return i; + if (strcmp(kw_table[i], kw_lexeme) == 0) { return i; } } return -1; } \ No newline at end of file diff --git a/table.h b/table.h index f0c6300..11d3208 100755 --- a/table.h +++ b/table.h @@ -34,21 +34,17 @@ * .AND., .OR. , SEOF, 'wrong symbol', */ - - //REPLACE *ESN* WITH YOUR ERROR STATE NUMBER -#define ES 12 /* Error state */ +#define ES 13 /* Error state */ #define IS -1 /* Invalid state */ /* State transition table definition */ -//REPLACE *CN* WITH YOUR COLUMN NUMBER - #define TABLE_COLUMNS 7 /*transition table - type of states defined in separate table */ int st_table[][TABLE_COLUMNS] = { /* INPUT COLUMNS: - COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 | - [a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other + COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 | + |[a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other| */ /* State 0 */ {1, 6 , 4 , 4 , IS , IS , IS}, /* State 1 */ {1, 1 , 1 , 1 , ES , 3 , 2}, @@ -57,22 +53,18 @@ int st_table[][TABLE_COLUMNS] = { /* State 4 */ {ES, 4 , 4 , 4 , 7 , 5 , 5}, /* State 5 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 6 */ {ES, 9 , 9, ES, 7 , ES , 5}, - /* State 7 */ {ES, 7 , 7, 7, ES , 8 , 8}, + /* State 7 */ {ES, 7 , 7, 7, 8 , 8 , 8}, /* State 8 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 9 */ {ES, 9 , 9, ES, ES , ES , 10}, /* State 10 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 11 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 12 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 13 */ {IS, IS , IS, IS, IS , IS , IS} - // - //. YOUR TABLE INITIALIZATION HERE - //. - ///* State N */ {YOUR INITIALIZATION}, + }; /* Accepting state table definition */ -//REPLACE *N1*, *N2*, and *N3* WITH YOUR NUMBERS -#define ASWR 2 /* accepting state with retract */ -#define ASNR 3 /* accepting state with no retract */ +#define ASWR 1 /* accepting state with retract */ +#define ASNR 2 /* accepting state with no retract */ #define NOAS 0 /* not accepting state */ int as_table[] = { @@ -96,19 +88,12 @@ int as_table[] = { /* Accepting action function declarations */ -//FOR EACH OF YOUR ACCEPTING STATES YOU MUST PROVIDE -//ONE FUNCTION PROTOTYPE. THEY ALL RETURN Token AND TAKE -//ONE ARGUMENT: A string REPRESENTING A TOKEN LEXEME. - -// Example: Token aa_funcXX(char *lexeme); - -Token aa_func02(char* lexeme); // VID AVID/KW -Token aa_func03(char *lexeme); // VID SVID -Token aa_func05(char *lexeme); // DIL -Token aa_func08(char *lexeme); // FPL -Token aa_func10(char *lexeme); // OIL -Token aa_func12(char *lexeme); // ES -//Replace XX with the number of the accepting state: 02, 03 and so on. +Token aa_func02(char* lexeme); /* AVID/KW */ +Token aa_func03(char* lexeme); /* SVID */ +Token aa_func05(char* lexeme); /* DIL */ +Token aa_func08(char* lexeme); /* FPL */ +Token aa_func10(char* lexeme); /* OIL */ +Token aa_func13(char* lexeme); /* ES */ /* defining a new type: pointer to function (of one char * argument) returning Token @@ -136,8 +121,8 @@ PTR_AAF aa_table[] = { /* State 9 */ NULL, /* State 10 */ aa_func10, /* State 11 */ NULL, - /* State 12 */ aa_func12, - /* State 13 */ NULL + /* State 12 */ NULL, + /* State 13 */ aa_func13 //HERE YOU MUST PROVIDE AN INITIALIZATION FOR AN ARRAY OF POINTERS //TO ACCEPTING FUNCTIONS. THE ARRAY HAS THE SAME SIZE AS as_table[ ].