diff --git a/PLATYPUS_Transition_Table.xlsx b/PLATYPUS_Transition_Table.xlsx index 469e3ff..fb1de27 100644 Binary files a/PLATYPUS_Transition_Table.xlsx and b/PLATYPUS_Transition_Table.xlsx differ diff --git a/scanner.c b/scanner.c index 64f5369..ce5c86d 100755 --- a/scanner.c +++ b/scanner.c @@ -35,6 +35,8 @@ #define DEBUG /* for conditional processing */ #undef DEBUG + + /* Global objects - variables */ /* This buffer is used as a repository for string literals. It is defined in platy_st.c */ @@ -84,14 +86,19 @@ which is being processed by the scanner. //DECLARE YOUR VARIABLES HERE IF NEEDED - + int i; /* Counter for loop in string error case */ + static int str_offset = 0; + + if (sc_buf == NULL) { + return aa_func12("RUN TIME ERROR"); /* WHOOPS */ + } while (1){ /* endless loop broken by token returns it will generate a warning */ //GET THE NEXT SYMBOL FROM THE INPUT BUFFER - c = b_getc(sc_buf); - switch (c) { + c = b_getc(sc_buf); + switch (c) { case 255: t.code = SEOF_T; return t; /* EOF */ case '\0': t.code = SEOF_T; return t; /* Source EOF */ case '\n': line++; continue; /* Ignore new line, increment line count */ @@ -125,111 +132,212 @@ which is being processed by the scanner. c = b_getc(sc_buf); return t; case '.': - b_setmark(sc_buf, b_getcoffset(sc_buf) - default: /* TODO: Do alpha [a-zA-Z] stuff here*/ - } + b_setmark(sc_buf, b_getcoffset(sc_buf)); /* Set mark before continuing (AND|OR case) */ + if (c == 'A' && b_getc(sc_buf) == 'N' && b_getc(sc_buf) == 'D' && b_getc(sc_buf) == '.') { + t.code = LOG_OP_T; + t.attribute.log_op = AND; + return t; + } + else if (c == 'O' && b_getc(sc_buf) == 'R' && b_getc(sc_buf) == '.') { + t.code = LOG_OP_T; + t.attribute.log_op = OR; + } + t.code = ERR_T; /* "That character's not supposed to be here" case */ + t.attribute.err_lex[0] = '.'; + t.attribute.err_lex[1] = '\0'; + b_retract_to_mark(sc_buf); + return t; + case '!': + c = b_getc(sc_buf); + if (c == '<') { /* It's a comment line */ + for (; c != '\0' && c != '\r' && c != '\n' && c != 255; c = b_getc(sc_buf)); /* Consume chars until line ends */ + line++; + continue; + } + else { /* Bad character, pump out an error token */ + t = aa_table[ES](" "); + t.attribute.err_lex[0] = c; + return t; + } + case '=': + c = b_getc(sc_buf); + if (c == '=') { /* Relational equals-to operator */ + t.code = REL_OP_T; + t.attribute.rel_op = EQ; + } + b_retract(sc_buf); + t.code = ASS_OP_T; /* Assignment operator */ + return t; + case '\"': /* Don't quote me on this */ + c = b_getc(sc_buf); + b_setmark(sc_buf, b_getcoffset(sc_buf)); + lexstart = (short)str_offset; + lexend = lexstart; + for (; c != '\"'; c = b_getc(sc_buf)) { + b_addc(str_LTBL, c); + if (b_isfull(str_LTBL)) { + return aa_table[ES]("\"Imagine all the .."); /* String too big :( */ + } + if (c == '\n' || c == '\r') { + line++; + } + if (c == 255 || c == '\0') { + b_retract_to_mark(sc_buf); + for (i = 0; i < ERR_LEN; i++) { + t.attribute.err_lex[i] = b_getc(sc_buf); + } + } + lexend++; + str_offset++; + } /*end for loop, string finished*/ + str_offset++; + b_addc(str_LTBL, '\0'); + + t.code = STR_T; + t.attribute.str_offset = lexstart; + return t; /* String literal */ + default: + if (isalnum(c) || isalpha(c)) { + lexend = 0; + state = 0; + lex_buf = b_create(1, 1, 'a'); + + while (accept == NOAS) { + b_addc(lex_buf, c); + state = get_next_state(state, c, &accept); + + if (accept != NOAS) + break; + c = b_getc(sc_buf); + lexend++; + } + /* Entering Accepting State */ + b_addc(lex_buf, '\0'); + + if (as_table[state] == ASWR) + b_retract(sc_buf); + if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) { + t.code = KW_T; + b_free(lex_buf); + return t; + } + + if (aa_table[state] != NULL) { + t = aa_table[state](b_setmark(lex_buf, 0)); + } + else { + t = aa_table[ES]("RUN TIME ERROR"); + } + b_free(lex_buf); + } + + else { + t = aa_table[ES](" "); + t.attribute.err_lex[0] = c; + } + } -/* special cases or token driven processing */ - -WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE. -COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE. - -WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT -INSTEAD OF if-else TO PROCESS THE SPECIAL CASES -DO NOT FORGET TO COUNT THE PROGRAM LINES - - - IF (c == SOME CHARACTER) - ... - SKIP CHARACTER (FOR EXAMPLE SPACE) - continue; - OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE)) - return t; - EXAMPLE: - if (c == ' ') continue; - if (c == '{'){ t.code = RBR_T; /*no attribute */ return t; - if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t; - ... - - IF (c == '.') TRY TO PROCESS .AND. or .OR. - IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING - RETURN AN ERROR TOKEN - IF (c == '!') TRY TO PROCESS COMMENT - IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR - ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue; - ... - IF STRING (FOR EXAMPLE, "text") IS FOUND - SET MARK TO MARK THE BEGINNING OF THE STRING - IF THE STRING IS LEGAL - USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL - ADD '\0' at the end make the string C-type string - SET STRING TOKEN - (the attribute of the string token is the offset from - the beginning of the str_LTBL char buffer to the beginning - of the string (TEXT in the example)) - - return t; - ELSE - THE STRING LITERAL IS ILLEGAL - SET ERROR TOKEN FOR ILLEGAL STRING (see assignment) - DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL - - return t; - - IF (c == ANOTHER CHARACTER) - SET TOKEN - return t; +///* special cases or token driven processing */ +// +//WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE. +//COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE. +// +//WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT +//INSTEAD OF if-else TO PROCESS THE SPECIAL CASES +//DO NOT FORGET TO COUNT THE PROGRAM LINES +// +// +// IF (c == SOME CHARACTER) +// ... +// SKIP CHARACTER (FOR EXAMPLE SPACE) +// continue; +// OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE)) +// return t; +// EXAMPLE: +// if (c == ' ') continue; +// if (c == '{'){ t.code = RBR_T; /*no attribute */ return t; +// if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t; +// ... +// +// IF (c == '.') TRY TO PROCESS .AND. or .OR. +// IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING +// RETURN AN ERROR TOKEN +// IF (c == '!') TRY TO PROCESS COMMENT +// IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR +// ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue; +// ... +// IF STRING (FOR EXAMPLE, "text") IS FOUND +// SET MARK TO MARK THE BEGINNING OF THE STRING +// IF THE STRING IS LEGAL +// USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL +// ADD '\0' at the end make the string C-type string +// SET STRING TOKEN +// (the attribute of the string token is the offset from +// the beginning of the str_LTBL char buffer to the beginning +// of the string (TEXT in the example)) +// +// return t; +// ELSE +// THE STRING LITERAL IS ILLEGAL +// SET ERROR TOKEN FOR ILLEGAL STRING (see assignment) +// DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL +// +// return t; +// +// IF(c == ANOTHER CHARACTER) +// SET TOKEN +// return t; /* Process state transition table */ - IF (c is a digit OR c is a letter){ - - SET THE MARK AT THE BEGINING OF THE LEXEME - b_setmark(sc_buf,forward); - .... - CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA) - IT IMPLEMENTS THE FOLLOWING ALGORITHM: - - FSM0. Begin with state = 0 and the input character c - FSM1. Get the next state from the transition table calling - state = get_next_state(state, c, &accept); - FSM2. Get the next character - FSM3. If the state is not accepting (accept == NOAS), go to step FSM1 - If the step is accepting, token is found, leave the machine and - call an accepting function as described below. - - - RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE - GET THE BEGINNING AND THE END OF THE LEXEME - lexstart = b_getmark(sc_buf); - SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION - CREATE A TEMPORRARY LEXEME BUFFER HERE; - lex_buf = b_create(...); - . RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND - . USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...), - . WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED - . YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH - . CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE - . CALLED IS STORED IN THE VARIABLE state. - . YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME. - . THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf. - .... - b_free(lex_buf); - return t; - - CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT. - FOR ILLEGAL CHARACTERS SET ERROR TOKEN. - THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN - IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE - A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum - AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST - BE THE STRING "RUN TIME ERROR: " - }//end while(1) + //IF (c is a digit OR c is a letter){ + // + //SET THE MARK AT THE BEGINING OF THE LEXEME + //b_setmark(sc_buf,forward); + // .... + //CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA) + //IT IMPLEMENTS THE FOLLOWING ALGORITHM: + // + //FSM0. Begin with state = 0 and the input character c + //FSM1. Get the next state from the transition table calling + // state = get_next_state(state, c, &accept); + //FSM2. Get the next character + //FSM3. If the state is not accepting (accept == NOAS), go to step FSM1 + // If the step is accepting, token is found, leave the machine and + // call an accepting function as described below. + // + // + //RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE + //GET THE BEGINNING AND THE END OF THE LEXEME + //lexstart = b_getmark(sc_buf); + //SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION + //CREATE A TEMPORRARY LEXEME BUFFER HERE; + //lex_buf = b_create(...); + // . RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND + // . USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...), + // . WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED + // . YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH + // . CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE + // . CALLED IS STORED IN THE VARIABLE state. + // . YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME. + // . THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf. + // .... + // b_free(lex_buf); + // return t; + // + // CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT. + // FOR ILLEGAL CHARACTERS SET ERROR TOKEN. + // THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN + // IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE + // A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum + // AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST + // BE THE STRING "RUN TIME ERROR: " + }//end while(1) } -DO NOT MODIFY THE CODE OF THIS FUNCTION -YOU CAN REMOVE THE COMMENTS +/* DO NOT MODIFY THE CODE OF THIS FUNCTION +YOU CAN REMOVE THE COMMENTS */ int get_next_state(int state, char c, int *accept) { @@ -275,80 +383,133 @@ or #undef DEBUF is used - see the top of the file. return next; } -int char_class (char c) +int char_class(char c) { - int val; + int val; + if (isalpha(c)) + val = 0; + else if (c == '0') + val = 1; + else if (c > '0' && c < '8') + val = 2; + else if (c == '8' || c == '9') + val = 3; + else if (c == '.') + val = 4; + else if (c == '#') + val = 5; + else + val = 6; -THIS FUNCTION RETURNS THE COLUMN NUMBER IN THE TRANSITION -TABLE st_table FOR THE INPUT CHARACTER c. -SOME COLUMNS MAY REPRESENT A CHARACTER CLASS . -FOR EXAMPLE IF COLUMN 1 REPRESENTS [A-Z] -THE FUNCTION RETURNS 1 EVERY TIME c IS ONE -OF THE LETTERS A,B,...,Z. - - return val; + return val; } - -HERE YOU WRITE THE DEFINITIONS FOR YOUR ACCEPTING FUNCTIONS. +/* +HERE YOU WRITE THE DEFINITIONS FOR YOUR ACCEPTING FUNCTIONS. ************************************************************ ACCEPTING FUNCTION FOR THE arithmentic variable identifier AND keywords (VID - AVID/KW) REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER +*/ -Token aa_funcXX(char lexeme[]){ +Token aa_func02(char lexeme[]) { + unsigned int kw_i; /* Variable to contain keyword table index */ + Token t; + char* temp_str; -WHEN CALLED THE FUNCTION MUST -1. CHECK IF THE LEXEME IS A KEYWORD. - IF YES, IT MUST RETURN A TOKEN WITH THE CORRESPONDING ATTRIBUTE - FOR THE KEYWORD. THE ATTRIBUTE CODE FOR THE KEYWORD - IS ITS INDEX IN THE KEYWORD LOOKUP TABLE (kw_table in table.h). - IF THE LEXEME IS NOT A KEYWORD, GO TO STEP 2. + if ((kw_i = iskeyword(lexeme)) > -1) { /* Keyword check */ + t.code = KW_T; + t.attribute.kwt_idx = kw_i; + return t; + } + /* Not a keyword? Must be AVID*/ + if ((temp_str = (char*)calloc(VID_LEN + 1, sizeof(char))) == NULL) { + return aa_table[ES]("RUN TIME ERROR"); + } + strncpy(temp_str, lexeme, VID_LEN); -2. SET a AVID TOKEN. - IF THE lexeme IS LONGER than VID_LEN (see token.h) CHARACTERS, - ONLY FIRST VID_LEN CHARACTERS ARE STORED - INTO THE VARIABLE ATTRIBUTE ARRAY vid_lex[](see token.h) . - ADD \0 AT THE END TO MAKE A C-type STRING. - return t; + strncpy(t.attribute.vid_lex, temp_str, VID_LEN); + free(temp_str); + + switch (lexeme[0]) { /* Read first character of lexeme for implicit type (not used yet?)*/ + case 'i': + case 'o': + case 'd': + case 'n': + /* Integer */ + break; + default: + /* Floating point*/ + } + + return t; + + /* + WHEN CALLED THE FUNCTION MUST + 1. CHECK IF THE LEXEME IS A KEYWORD. + IF YES, IT MUST RETURN A TOKEN WITH THE CORRESPONDING ATTRIBUTE + FOR THE KEYWORD. THE ATTRIBUTE CODE FOR THE KEYWORD + IS ITS INDEX IN THE KEYWORD LOOKUP TABLE (kw_table in table.h). + IF THE LEXEME IS NOT A KEYWORD, GO TO STEP 2. + + 2. SET a AVID TOKEN. + IF THE lexeme IS LONGER than VID_LEN (see token.h) CHARACTERS, + ONLY FIRST VID_LEN CHARACTERS ARE STORED + INTO THE VARIABLE ATTRIBUTE ARRAY vid_lex[](see token.h) . + ADD \0 AT THE END TO MAKE A C-type STRING. + */ } - +/* ACCEPTING FUNCTION FOR THE string variable identifier (VID - SVID) REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER +*/ +Token aa_func03(char lexeme[]) { + Token t; + int offset; + int i; + char* temp_str; + if ((temp_str = (char*)calloc(VID_LEN + 2, sizeof(char))) == NULL) { + return aa_table[ES]("RUN TIME ERROR"); + } -Token aa_funcXX(char lexeme[]){ + strcpy(temp_str, lexeme, VID_LEN); + temp_str[strlen(temp_str)] = "#"; /* Append # to end of the SVID */ -WHEN CALLED THE FUNCTION MUST -1. SET a SVID TOKEN. - IF THE lexeme IS LONGER than VID_LEN characters, - ONLY FIRST VID_LEN-1 CHARACTERS ARE STORED - INTO THE VARIABLE ATTRIBUTE ARRAY vid_lex[], - AND THEN THE # CHARACTER IS APPENDED TO THE NAME. - ADD \0 AT THE END TO MAKE A C-type STRING. - - return t; + strncpy(t.attribute.vid_lex, temp_str, VID_LEN); + free(temp_str); + + t.code = SVID_T; + return t; + + /* + WHEN CALLED THE FUNCTION MUST + 1. SET a SVID TOKEN. + IF THE lexeme IS LONGER than VID_LEN characters, + ONLY FIRST VID_LEN-1 CHARACTERS ARE STORED + INTO THE VARIABLE ATTRIBUTE ARRAY vid_lex[], + AND THEN THE # CHARACTER IS APPENDED TO THE NAME. + ADD \0 AT THE END TO MAKE A C-type STRING. + */ + return t; } -ACCEPTING FUNCTION FOR THE floating-point literal (FPL) +/*ACCEPTING FUNCTION FOR THE integer literal(IL)-decimal constant(DIL)*/ -Token aa_funcXX(char lexeme[]){ +Token aa_func05(char lexeme[]) { + Token t; + long temp_num; -THE FUNCTION MUST CONVERT THE LEXEME TO A FLOATING POINT VALUE, -WHICH IS THE ATTRIBUTE FOR THE TOKEN. -THE VALUE MUST BE IN THE SAME RANGE AS the value of 4-byte float in C. -IN CASE OF ERROR (OUT OF RANGE) THE FUNCTION MUST RETURN ERROR TOKEN -THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER -than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE -STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE -err_lex C-type string. - return t; -} + temp_num = strtol(lexeme, NULL, 10); -ACCEPTING FUNCTION FOR THE integer literal(IL) - decimal constant (DIL) - -Token aa_funcXX(char lexeme[]){ + if (temp_num > SHRT_MAX || temp_num < 0) { + t = aa_table[ES](lexeme); + } + t.code = INL_T; + t.attribute.int_value = temp_num; + return t; + /* THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING A DECIMAL CONSTANT TO A DECIMAL INTEGER VALUE, WHICH IS THE ATTRIBUTE FOR THE TOKEN. THE VALUE MUST BE IN THE SAME RANGE AS the value of 2-byte integer in C. @@ -356,14 +517,64 @@ IN CASE OF ERROR (OUT OF RANGE) THE FUNCTION MUST RETURN ERROR TOKEN THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE -err_lex C-type string. - return t; +err_lex C-type string. */ } -ACCEPTING FUNCTION FOR THE integer literal(IL) - octal constant (OIL) +/*ACCEPTING FUNCTION FOR THE floating - point literal (FPL)*/ -Token aa_funcXX(char lexeme[]){ +Token aa_func08(char lexeme[]) { + Token t; + double temp_dbl; + t.code = FPL_T; + if (strstr(lexeme, "0.0")) { + t.attribute.flt_value = 0.0f; + return t; + } + + temp_dbl = atof(lexeme); + + if ((temp_dbl > FLT_MAX) || (temp_dbl < 0)) { + t = aa_table[ES](lexeme); + } + t.attribute.flt_value = (float)temp_dbl; + + return t; + /* +THE FUNCTION MUST CONVERT THE LEXEME TO A FLOATING POINT VALUE, +WHICH IS THE ATTRIBUTE FOR THE TOKEN. +THE VALUE MUST BE IN THE SAME RANGE AS the value of 4-byte float in C. +IN CASE OF ERROR (OUT OF RANGE) THE FUNCTION MUST RETURN ERROR TOKEN +THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER +than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE +STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE +err_lex C-type string. */ +} + + + +/*ACCEPTING FUNCTION FOR THE integer literal(IL) - octal constant (OIL)*/ + +Token aa_func10(char lexeme[]) { + Token t; + int new_olval; + + if (strlen(lexeme) > INL_LEN + 1) { + t = aa_table[ES](lexeme); + } + + t.code = INL_T; + new_olval = atool(lexeme); + + if (new_olval < SHRT_MIN || new_olval > SHRT_MAX) { + t = aa_table[ES](lexeme); + } + + t.code = INL_T; + t.attribute.int_value = new_olval; + + return t; + /* THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING AN OCTAL CONSTANT TO A DECIMAL INTEGER VALUE WHICH IS THE ATTRIBUTE FOR THE TOKEN. THE VALUE MUST BE IN THE SAME RANGE AS the value of 2-byte integer in C. @@ -376,35 +587,62 @@ IN CASE OF ERROR (OUT OF RANGE) THE FUNCTION MUST RETURN ERROR TOKEN THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE -err_lex C-type string. - - return t; +err_lex C-type string. +*/ } -ACCEPTING FUNCTION FOR THE ERROR TOKEN +/*ACCEPTING FUNCTION FOR THE ERROR TOKEN */ -Token aa_funcXX(char lexeme[]){ +Token aa_func12(char lexeme[]) { + Token t; + unsigned int i; + t.code = ERR_T; + + for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++) { + t.attribute.err_lex[i] = lexeme[i]; + } + + t.attribute.err_lex[i] = '\0'; + + return t; + /* THE FUNCTION SETS THE ERROR TOKEN. lexeme[] CONTAINS THE ERROR THE ATTRIBUTE OF THE ERROR TOKEN IS THE lexeme ITSELF AND IT MUST BE STORED in err_lex. IF THE ERROR lexeme IS LONGER than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE -err_lex C-type string. - - return t; +err_lex C-type string. +*/ } -CONVERSION FUNCTION +/*CONVERSION FUNCTION*/ -long atool(char * lexeme){ +long atool(char * lexeme) { + int i, x = 1; + long result = 0; + for (i = strlen(lexeme); i > 0; i--, x *= 8) { + result += x*(lexeme[i - 1] - '0'); + } + return result; + /* THE FUNCTION CONVERTS AN ASCII STRING REPRESENTING AN OCTAL INTEGER CONSTANT TO INTEGER VALUE +*/ } -HERE YOU WRITE YOUR ADDITIONAL FUNCTIONS (IF ANY). -FOR EXAMPLE +/*HERE YOU WRITE YOUR ADDITIONAL FUNCTIONS (IF ANY). +FOR EXAMPLE*/ -int iskeyword(char * kw_lexeme){} \ No newline at end of file +int iskeyword(char * kw_lexeme) { + int i; + + if (kw_lexeme == NULL) return -1; + + for (i = 0; i < KWT_SIZE; i++) { + if (strcmp(kw_table[i], kw_lexeme) == 0) return i; + } + return -1; +} \ No newline at end of file diff --git a/table.h b/table.h index a82d70f..f0c6300 100755 --- a/table.h +++ b/table.h @@ -36,33 +36,34 @@ //REPLACE *ESN* WITH YOUR ERROR STATE NUMBER -#define ES -2 /* Error state */ -#define IS -1 /* Inavalid state */ +#define ES 12 /* Error state */ +#define IS -1 /* Invalid state */ /* State transition table definition */ //REPLACE *CN* WITH YOUR COLUMN NUMBER -#define TABLE_COLUMNS 14 +#define TABLE_COLUMNS 7 /*transition table - type of states defined in separate table */ int st_table[][TABLE_COLUMNS] = { /* INPUT COLUMNS: - [a-zA-Z]| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | . | # | other - */ - /* State 0 */ {1, 6 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , IS , IS ,IS}, - /* State 1 */ {1, 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , ES , 3 , 2}, - /* State 2 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS}, - /* State 3 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS}, - /* State 4 */ {ES, 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 7 , 5 , 5}, - /* State 5 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS}, - /* State 6 */ {ES, 9 , 9, 9, 9, 9, 9, 9, 9, ES, ES, 7 , ES , 5}, - /* State 7 */ {ES, 7 , 7, 7, 7, 7, 7, 7, 7, 7, 7, ES , 8 , 8}, - /* State 8 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS}, - /* State 9 */ {ES, 9 , 9, 9, 9, 9, 9, 9, 9, ES, ES, ES , ES , 10}, - /* State 10 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS}, - /* State 11 */ {ES, ES , ES, ES, ES, ES, ES, ES, ES, ES, ES, ES , ES , ES}, - /* State 12 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS}, - /* State 13 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS} + COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 | + [a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other + */ + /* State 0 */ {1, 6 , 4 , 4 , IS , IS , IS}, + /* State 1 */ {1, 1 , 1 , 1 , ES , 3 , 2}, + /* State 2 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 3 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 4 */ {ES, 4 , 4 , 4 , 7 , 5 , 5}, + /* State 5 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 6 */ {ES, 9 , 9, ES, 7 , ES , 5}, + /* State 7 */ {ES, 7 , 7, 7, ES , 8 , 8}, + /* State 8 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 9 */ {ES, 9 , 9, ES, ES , ES , 10}, + /* State 10 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 11 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 12 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 13 */ {IS, IS , IS, IS, IS , IS , IS} // //. YOUR TABLE INITIALIZATION HERE //. @@ -70,8 +71,8 @@ int st_table[][TABLE_COLUMNS] = { }; /* Accepting state table definition */ //REPLACE *N1*, *N2*, and *N3* WITH YOUR NUMBERS -#define ASWR 1 /* accepting state with retract */ -#define ASNR 2 /* accepting state with no retract */ +#define ASWR 2 /* accepting state with retract */ +#define ASNR 3 /* accepting state with no retract */ #define NOAS 0 /* not accepting state */ int as_table[] = { @@ -87,7 +88,7 @@ int as_table[] = { /* State 8 */ ASWR, /* State 9 */ NOAS, /* State 10 */ ASWR, - /* State 11 */ ASNR, + /* State 11 */ ASWR, /* State 12 */ ASNR, /* State 13 */ ASWR @@ -106,7 +107,7 @@ Token aa_func03(char *lexeme); // VID SVID Token aa_func05(char *lexeme); // DIL Token aa_func08(char *lexeme); // FPL Token aa_func10(char *lexeme); // OIL -Token aa_func11(char *lexeme); // ES +Token aa_func12(char *lexeme); // ES //Replace XX with the number of the accepting state: 02, 03 and so on. /* defining a new type: pointer to function (of one char * argument) @@ -134,9 +135,9 @@ PTR_AAF aa_table[] = { /* State 8 */ aa_func08, /* State 9 */ NULL, /* State 10 */ aa_func10, - /* State 11 */ aa_func11, - /* State 12 */ aa_func11, - /* State 13 */ aa_func11 + /* State 11 */ NULL, + /* State 12 */ aa_func12, + /* State 13 */ NULL //HERE YOU MUST PROVIDE AN INITIALIZATION FOR AN ARRAY OF POINTERS //TO ACCEPTING FUNCTIONS. THE ARRAY HAS THE SAME SIZE AS as_table[ ].