State functions, minimize columns in transition table
This commit is contained in:
parent
ab2164b431
commit
e52a190a50
Binary file not shown.
576
scanner.c
576
scanner.c
|
@ -35,6 +35,8 @@
|
||||||
#define DEBUG /* for conditional processing */
|
#define DEBUG /* for conditional processing */
|
||||||
#undef DEBUG
|
#undef DEBUG
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Global objects - variables */
|
/* Global objects - variables */
|
||||||
/* This buffer is used as a repository for string literals.
|
/* This buffer is used as a repository for string literals.
|
||||||
It is defined in platy_st.c */
|
It is defined in platy_st.c */
|
||||||
|
@ -84,14 +86,19 @@ which is being processed by the scanner.
|
||||||
|
|
||||||
|
|
||||||
//DECLARE YOUR VARIABLES HERE IF NEEDED
|
//DECLARE YOUR VARIABLES HERE IF NEEDED
|
||||||
|
int i; /* Counter for loop in string error case */
|
||||||
|
static int str_offset = 0;
|
||||||
|
|
||||||
|
if (sc_buf == NULL) {
|
||||||
|
return aa_func12("RUN TIME ERROR"); /* WHOOPS */
|
||||||
|
}
|
||||||
|
|
||||||
while (1){ /* endless loop broken by token returns it will generate a warning */
|
while (1){ /* endless loop broken by token returns it will generate a warning */
|
||||||
|
|
||||||
//GET THE NEXT SYMBOL FROM THE INPUT BUFFER
|
//GET THE NEXT SYMBOL FROM THE INPUT BUFFER
|
||||||
|
|
||||||
c = b_getc(sc_buf);
|
c = b_getc(sc_buf);
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 255: t.code = SEOF_T; return t; /* EOF */
|
case 255: t.code = SEOF_T; return t; /* EOF */
|
||||||
case '\0': t.code = SEOF_T; return t; /* Source EOF */
|
case '\0': t.code = SEOF_T; return t; /* Source EOF */
|
||||||
case '\n': line++; continue; /* Ignore new line, increment line count */
|
case '\n': line++; continue; /* Ignore new line, increment line count */
|
||||||
|
@ -125,111 +132,212 @@ which is being processed by the scanner.
|
||||||
c = b_getc(sc_buf);
|
c = b_getc(sc_buf);
|
||||||
return t;
|
return t;
|
||||||
case '.':
|
case '.':
|
||||||
b_setmark(sc_buf, b_getcoffset(sc_buf)
|
b_setmark(sc_buf, b_getcoffset(sc_buf)); /* Set mark before continuing (AND|OR case) */
|
||||||
default: /* TODO: Do alpha [a-zA-Z] stuff here*/
|
if (c == 'A' && b_getc(sc_buf) == 'N' && b_getc(sc_buf) == 'D' && b_getc(sc_buf) == '.') {
|
||||||
}
|
t.code = LOG_OP_T;
|
||||||
|
t.attribute.log_op = AND;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
else if (c == 'O' && b_getc(sc_buf) == 'R' && b_getc(sc_buf) == '.') {
|
||||||
|
t.code = LOG_OP_T;
|
||||||
|
t.attribute.log_op = OR;
|
||||||
|
}
|
||||||
|
t.code = ERR_T; /* "That character's not supposed to be here" case */
|
||||||
|
t.attribute.err_lex[0] = '.';
|
||||||
|
t.attribute.err_lex[1] = '\0';
|
||||||
|
b_retract_to_mark(sc_buf);
|
||||||
|
return t;
|
||||||
|
case '!':
|
||||||
|
c = b_getc(sc_buf);
|
||||||
|
if (c == '<') { /* It's a comment line */
|
||||||
|
for (; c != '\0' && c != '\r' && c != '\n' && c != 255; c = b_getc(sc_buf)); /* Consume chars until line ends */
|
||||||
|
line++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else { /* Bad character, pump out an error token */
|
||||||
|
t = aa_table[ES](" ");
|
||||||
|
t.attribute.err_lex[0] = c;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
case '=':
|
||||||
|
c = b_getc(sc_buf);
|
||||||
|
if (c == '=') { /* Relational equals-to operator */
|
||||||
|
t.code = REL_OP_T;
|
||||||
|
t.attribute.rel_op = EQ;
|
||||||
|
}
|
||||||
|
b_retract(sc_buf);
|
||||||
|
t.code = ASS_OP_T; /* Assignment operator */
|
||||||
|
return t;
|
||||||
|
case '\"': /* Don't quote me on this */
|
||||||
|
c = b_getc(sc_buf);
|
||||||
|
b_setmark(sc_buf, b_getcoffset(sc_buf));
|
||||||
|
lexstart = (short)str_offset;
|
||||||
|
lexend = lexstart;
|
||||||
|
for (; c != '\"'; c = b_getc(sc_buf)) {
|
||||||
|
b_addc(str_LTBL, c);
|
||||||
|
if (b_isfull(str_LTBL)) {
|
||||||
|
return aa_table[ES]("\"Imagine all the .."); /* String too big :( */
|
||||||
|
}
|
||||||
|
if (c == '\n' || c == '\r') {
|
||||||
|
line++;
|
||||||
|
}
|
||||||
|
if (c == 255 || c == '\0') {
|
||||||
|
b_retract_to_mark(sc_buf);
|
||||||
|
for (i = 0; i < ERR_LEN; i++) {
|
||||||
|
t.attribute.err_lex[i] = b_getc(sc_buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lexend++;
|
||||||
|
str_offset++;
|
||||||
|
} /*end for loop, string finished*/
|
||||||
|
str_offset++;
|
||||||
|
b_addc(str_LTBL, '\0');
|
||||||
|
|
||||||
|
t.code = STR_T;
|
||||||
|
t.attribute.str_offset = lexstart;
|
||||||
|
return t; /* String literal */
|
||||||
|
default:
|
||||||
|
if (isalnum(c) || isalpha(c)) {
|
||||||
|
lexend = 0;
|
||||||
|
state = 0;
|
||||||
|
lex_buf = b_create(1, 1, 'a');
|
||||||
|
|
||||||
|
while (accept == NOAS) {
|
||||||
|
b_addc(lex_buf, c);
|
||||||
|
state = get_next_state(state, c, &accept);
|
||||||
|
|
||||||
|
if (accept != NOAS)
|
||||||
|
break;
|
||||||
|
c = b_getc(sc_buf);
|
||||||
|
lexend++;
|
||||||
|
}
|
||||||
|
/* Entering Accepting State */
|
||||||
|
b_addc(lex_buf, '\0');
|
||||||
|
|
||||||
|
if (as_table[state] == ASWR)
|
||||||
|
b_retract(sc_buf);
|
||||||
|
if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) {
|
||||||
|
t.code = KW_T;
|
||||||
|
b_free(lex_buf);
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aa_table[state] != NULL) {
|
||||||
|
t = aa_table[state](b_setmark(lex_buf, 0));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
t = aa_table[ES]("RUN TIME ERROR");
|
||||||
|
}
|
||||||
|
b_free(lex_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
t = aa_table[ES](" ");
|
||||||
|
t.attribute.err_lex[0] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* special cases or token driven processing */
|
///* special cases or token driven processing */
|
||||||
|
//
|
||||||
WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE.
|
//WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE.
|
||||||
COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE.
|
//COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE.
|
||||||
|
//
|
||||||
WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT
|
//WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT
|
||||||
INSTEAD OF if-else TO PROCESS THE SPECIAL CASES
|
//INSTEAD OF if-else TO PROCESS THE SPECIAL CASES
|
||||||
DO NOT FORGET TO COUNT THE PROGRAM LINES
|
//DO NOT FORGET TO COUNT THE PROGRAM LINES
|
||||||
|
//
|
||||||
|
//
|
||||||
IF (c == SOME CHARACTER)
|
// IF (c == SOME CHARACTER)
|
||||||
...
|
// ...
|
||||||
SKIP CHARACTER (FOR EXAMPLE SPACE)
|
// SKIP CHARACTER (FOR EXAMPLE SPACE)
|
||||||
continue;
|
// continue;
|
||||||
OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE))
|
// OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE))
|
||||||
return t;
|
// return t;
|
||||||
EXAMPLE:
|
// EXAMPLE:
|
||||||
if (c == ' ') continue;
|
// if (c == ' ') continue;
|
||||||
if (c == '{'){ t.code = RBR_T; /*no attribute */ return t;
|
// if (c == '{'){ t.code = RBR_T; /*no attribute */ return t;
|
||||||
if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t;
|
// if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t;
|
||||||
...
|
// ...
|
||||||
|
//
|
||||||
IF (c == '.') TRY TO PROCESS .AND. or .OR.
|
// IF (c == '.') TRY TO PROCESS .AND. or .OR.
|
||||||
IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING
|
// IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING
|
||||||
RETURN AN ERROR TOKEN
|
// RETURN AN ERROR TOKEN
|
||||||
IF (c == '!') TRY TO PROCESS COMMENT
|
// IF (c == '!') TRY TO PROCESS COMMENT
|
||||||
IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR
|
// IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR
|
||||||
ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue;
|
// ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue;
|
||||||
...
|
// ...
|
||||||
IF STRING (FOR EXAMPLE, "text") IS FOUND
|
// IF STRING (FOR EXAMPLE, "text") IS FOUND
|
||||||
SET MARK TO MARK THE BEGINNING OF THE STRING
|
// SET MARK TO MARK THE BEGINNING OF THE STRING
|
||||||
IF THE STRING IS LEGAL
|
// IF THE STRING IS LEGAL
|
||||||
USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL
|
// USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL
|
||||||
ADD '\0' at the end make the string C-type string
|
// ADD '\0' at the end make the string C-type string
|
||||||
SET STRING TOKEN
|
// SET STRING TOKEN
|
||||||
(the attribute of the string token is the offset from
|
// (the attribute of the string token is the offset from
|
||||||
the beginning of the str_LTBL char buffer to the beginning
|
// the beginning of the str_LTBL char buffer to the beginning
|
||||||
of the string (TEXT in the example))
|
// of the string (TEXT in the example))
|
||||||
|
//
|
||||||
return t;
|
// return t;
|
||||||
ELSE
|
// ELSE
|
||||||
THE STRING LITERAL IS ILLEGAL
|
// THE STRING LITERAL IS ILLEGAL
|
||||||
SET ERROR TOKEN FOR ILLEGAL STRING (see assignment)
|
// SET ERROR TOKEN FOR ILLEGAL STRING (see assignment)
|
||||||
DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL
|
// DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL
|
||||||
|
//
|
||||||
return t;
|
// return t;
|
||||||
|
//
|
||||||
IF (c == ANOTHER CHARACTER)
|
// IF(c == ANOTHER CHARACTER)
|
||||||
SET TOKEN
|
// SET TOKEN
|
||||||
return t;
|
// return t;
|
||||||
/* Process state transition table */
|
/* Process state transition table */
|
||||||
|
|
||||||
IF (c is a digit OR c is a letter){
|
//IF (c is a digit OR c is a letter){
|
||||||
|
//
|
||||||
SET THE MARK AT THE BEGINING OF THE LEXEME
|
//SET THE MARK AT THE BEGINING OF THE LEXEME
|
||||||
b_setmark(sc_buf,forward);
|
//b_setmark(sc_buf,forward);
|
||||||
....
|
// ....
|
||||||
CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA)
|
//CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA)
|
||||||
IT IMPLEMENTS THE FOLLOWING ALGORITHM:
|
//IT IMPLEMENTS THE FOLLOWING ALGORITHM:
|
||||||
|
//
|
||||||
FSM0. Begin with state = 0 and the input character c
|
//FSM0. Begin with state = 0 and the input character c
|
||||||
FSM1. Get the next state from the transition table calling
|
//FSM1. Get the next state from the transition table calling
|
||||||
state = get_next_state(state, c, &accept);
|
// state = get_next_state(state, c, &accept);
|
||||||
FSM2. Get the next character
|
//FSM2. Get the next character
|
||||||
FSM3. If the state is not accepting (accept == NOAS), go to step FSM1
|
//FSM3. If the state is not accepting (accept == NOAS), go to step FSM1
|
||||||
If the step is accepting, token is found, leave the machine and
|
// If the step is accepting, token is found, leave the machine and
|
||||||
call an accepting function as described below.
|
// call an accepting function as described below.
|
||||||
|
//
|
||||||
|
//
|
||||||
RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE
|
//RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE
|
||||||
GET THE BEGINNING AND THE END OF THE LEXEME
|
//GET THE BEGINNING AND THE END OF THE LEXEME
|
||||||
lexstart = b_getmark(sc_buf);
|
//lexstart = b_getmark(sc_buf);
|
||||||
SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION
|
//SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION
|
||||||
CREATE A TEMPORRARY LEXEME BUFFER HERE;
|
//CREATE A TEMPORRARY LEXEME BUFFER HERE;
|
||||||
lex_buf = b_create(...);
|
//lex_buf = b_create(...);
|
||||||
. RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND
|
// . RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND
|
||||||
. USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...),
|
// . USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...),
|
||||||
. WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED
|
// . WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED
|
||||||
. YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH
|
// . YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH
|
||||||
. CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE
|
// . CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE
|
||||||
. CALLED IS STORED IN THE VARIABLE state.
|
// . CALLED IS STORED IN THE VARIABLE state.
|
||||||
. YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME.
|
// . YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME.
|
||||||
. THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf.
|
// . THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf.
|
||||||
....
|
// ....
|
||||||
b_free(lex_buf);
|
// b_free(lex_buf);
|
||||||
return t;
|
// return t;
|
||||||
|
//
|
||||||
CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT.
|
// CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT.
|
||||||
FOR ILLEGAL CHARACTERS SET ERROR TOKEN.
|
// FOR ILLEGAL CHARACTERS SET ERROR TOKEN.
|
||||||
THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN
|
// THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN
|
||||||
IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE
|
// IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE
|
||||||
A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum
|
// A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum
|
||||||
AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST
|
// AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST
|
||||||
BE THE STRING "RUN TIME ERROR: "
|
// BE THE STRING "RUN TIME ERROR: "
|
||||||
}//end while(1)
|
}//end while(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
DO NOT MODIFY THE CODE OF THIS FUNCTION
|
/* DO NOT MODIFY THE CODE OF THIS FUNCTION
|
||||||
YOU CAN REMOVE THE COMMENTS
|
YOU CAN REMOVE THE COMMENTS */
|
||||||
|
|
||||||
int get_next_state(int state, char c, int *accept)
|
int get_next_state(int state, char c, int *accept)
|
||||||
{
|
{
|
||||||
|
@ -275,80 +383,133 @@ or #undef DEBUF is used - see the top of the file.
|
||||||
return next;
|
return next;
|
||||||
}
|
}
|
||||||
|
|
||||||
int char_class (char c)
|
int char_class(char c)
|
||||||
{
|
{
|
||||||
int val;
|
int val;
|
||||||
|
if (isalpha(c))
|
||||||
|
val = 0;
|
||||||
|
else if (c == '0')
|
||||||
|
val = 1;
|
||||||
|
else if (c > '0' && c < '8')
|
||||||
|
val = 2;
|
||||||
|
else if (c == '8' || c == '9')
|
||||||
|
val = 3;
|
||||||
|
else if (c == '.')
|
||||||
|
val = 4;
|
||||||
|
else if (c == '#')
|
||||||
|
val = 5;
|
||||||
|
else
|
||||||
|
val = 6;
|
||||||
|
|
||||||
THIS FUNCTION RETURNS THE COLUMN NUMBER IN THE TRANSITION
|
return val;
|
||||||
TABLE st_table FOR THE INPUT CHARACTER c.
|
|
||||||
SOME COLUMNS MAY REPRESENT A CHARACTER CLASS .
|
|
||||||
FOR EXAMPLE IF COLUMN 1 REPRESENTS [A-Z]
|
|
||||||
THE FUNCTION RETURNS 1 EVERY TIME c IS ONE
|
|
||||||
OF THE LETTERS A,B,...,Z.
|
|
||||||
|
|
||||||
return val;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
HERE YOU WRITE THE DEFINITIONS FOR YOUR ACCEPTING FUNCTIONS.
|
HERE YOU WRITE THE DEFINITIONS FOR YOUR ACCEPTING FUNCTIONS.
|
||||||
************************************************************
|
************************************************************
|
||||||
|
|
||||||
ACCEPTING FUNCTION FOR THE arithmentic variable identifier AND keywords (VID - AVID/KW)
|
ACCEPTING FUNCTION FOR THE arithmentic variable identifier AND keywords (VID - AVID/KW)
|
||||||
REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER
|
REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER
|
||||||
|
*/
|
||||||
|
|
||||||
Token aa_funcXX(char lexeme[]){
|
Token aa_func02(char lexeme[]) {
|
||||||
|
unsigned int kw_i; /* Variable to contain keyword table index */
|
||||||
|
Token t;
|
||||||
|
char* temp_str;
|
||||||
|
|
||||||
WHEN CALLED THE FUNCTION MUST
|
if ((kw_i = iskeyword(lexeme)) > -1) { /* Keyword check */
|
||||||
1. CHECK IF THE LEXEME IS A KEYWORD.
|
t.code = KW_T;
|
||||||
IF YES, IT MUST RETURN A TOKEN WITH THE CORRESPONDING ATTRIBUTE
|
t.attribute.kwt_idx = kw_i;
|
||||||
FOR THE KEYWORD. THE ATTRIBUTE CODE FOR THE KEYWORD
|
return t;
|
||||||
IS ITS INDEX IN THE KEYWORD LOOKUP TABLE (kw_table in table.h).
|
}
|
||||||
IF THE LEXEME IS NOT A KEYWORD, GO TO STEP 2.
|
/* Not a keyword? Must be AVID*/
|
||||||
|
if ((temp_str = (char*)calloc(VID_LEN + 1, sizeof(char))) == NULL) {
|
||||||
|
return aa_table[ES]("RUN TIME ERROR");
|
||||||
|
}
|
||||||
|
strncpy(temp_str, lexeme, VID_LEN);
|
||||||
|
|
||||||
2. SET a AVID TOKEN.
|
strncpy(t.attribute.vid_lex, temp_str, VID_LEN);
|
||||||
IF THE lexeme IS LONGER than VID_LEN (see token.h) CHARACTERS,
|
free(temp_str);
|
||||||
ONLY FIRST VID_LEN CHARACTERS ARE STORED
|
|
||||||
INTO THE VARIABLE ATTRIBUTE ARRAY vid_lex[](see token.h) .
|
switch (lexeme[0]) { /* Read first character of lexeme for implicit type (not used yet?)*/
|
||||||
ADD \0 AT THE END TO MAKE A C-type STRING.
|
case 'i':
|
||||||
return t;
|
case 'o':
|
||||||
|
case 'd':
|
||||||
|
case 'n':
|
||||||
|
/* Integer */
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
/* Floating point*/
|
||||||
|
}
|
||||||
|
|
||||||
|
return t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
WHEN CALLED THE FUNCTION MUST
|
||||||
|
1. CHECK IF THE LEXEME IS A KEYWORD.
|
||||||
|
IF YES, IT MUST RETURN A TOKEN WITH THE CORRESPONDING ATTRIBUTE
|
||||||
|
FOR THE KEYWORD. THE ATTRIBUTE CODE FOR THE KEYWORD
|
||||||
|
IS ITS INDEX IN THE KEYWORD LOOKUP TABLE (kw_table in table.h).
|
||||||
|
IF THE LEXEME IS NOT A KEYWORD, GO TO STEP 2.
|
||||||
|
|
||||||
|
2. SET a AVID TOKEN.
|
||||||
|
IF THE lexeme IS LONGER than VID_LEN (see token.h) CHARACTERS,
|
||||||
|
ONLY FIRST VID_LEN CHARACTERS ARE STORED
|
||||||
|
INTO THE VARIABLE ATTRIBUTE ARRAY vid_lex[](see token.h) .
|
||||||
|
ADD \0 AT THE END TO MAKE A C-type STRING.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
ACCEPTING FUNCTION FOR THE string variable identifier (VID - SVID)
|
ACCEPTING FUNCTION FOR THE string variable identifier (VID - SVID)
|
||||||
REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER
|
REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER
|
||||||
|
*/
|
||||||
|
Token aa_func03(char lexeme[]) {
|
||||||
|
Token t;
|
||||||
|
int offset;
|
||||||
|
int i;
|
||||||
|
char* temp_str;
|
||||||
|
if ((temp_str = (char*)calloc(VID_LEN + 2, sizeof(char))) == NULL) {
|
||||||
|
return aa_table[ES]("RUN TIME ERROR");
|
||||||
|
}
|
||||||
|
|
||||||
Token aa_funcXX(char lexeme[]){
|
strcpy(temp_str, lexeme, VID_LEN);
|
||||||
|
temp_str[strlen(temp_str)] = "#"; /* Append # to end of the SVID */
|
||||||
|
|
||||||
WHEN CALLED THE FUNCTION MUST
|
strncpy(t.attribute.vid_lex, temp_str, VID_LEN);
|
||||||
1. SET a SVID TOKEN.
|
free(temp_str);
|
||||||
IF THE lexeme IS LONGER than VID_LEN characters,
|
|
||||||
ONLY FIRST VID_LEN-1 CHARACTERS ARE STORED
|
t.code = SVID_T;
|
||||||
INTO THE VARIABLE ATTRIBUTE ARRAY vid_lex[],
|
return t;
|
||||||
AND THEN THE # CHARACTER IS APPENDED TO THE NAME.
|
|
||||||
ADD \0 AT THE END TO MAKE A C-type STRING.
|
/*
|
||||||
|
WHEN CALLED THE FUNCTION MUST
|
||||||
return t;
|
1. SET a SVID TOKEN.
|
||||||
|
IF THE lexeme IS LONGER than VID_LEN characters,
|
||||||
|
ONLY FIRST VID_LEN-1 CHARACTERS ARE STORED
|
||||||
|
INTO THE VARIABLE ATTRIBUTE ARRAY vid_lex[],
|
||||||
|
AND THEN THE # CHARACTER IS APPENDED TO THE NAME.
|
||||||
|
ADD \0 AT THE END TO MAKE A C-type STRING.
|
||||||
|
*/
|
||||||
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
ACCEPTING FUNCTION FOR THE floating-point literal (FPL)
|
/*ACCEPTING FUNCTION FOR THE integer literal(IL)-decimal constant(DIL)*/
|
||||||
|
|
||||||
Token aa_funcXX(char lexeme[]){
|
Token aa_func05(char lexeme[]) {
|
||||||
|
Token t;
|
||||||
|
long temp_num;
|
||||||
|
|
||||||
THE FUNCTION MUST CONVERT THE LEXEME TO A FLOATING POINT VALUE,
|
temp_num = strtol(lexeme, NULL, 10);
|
||||||
WHICH IS THE ATTRIBUTE FOR THE TOKEN.
|
|
||||||
THE VALUE MUST BE IN THE SAME RANGE AS the value of 4-byte float in C.
|
|
||||||
IN CASE OF ERROR (OUT OF RANGE) THE FUNCTION MUST RETURN ERROR TOKEN
|
|
||||||
THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER
|
|
||||||
than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE
|
|
||||||
STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE
|
|
||||||
err_lex C-type string.
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
ACCEPTING FUNCTION FOR THE integer literal(IL) - decimal constant (DIL)
|
if (temp_num > SHRT_MAX || temp_num < 0) {
|
||||||
|
t = aa_table[ES](lexeme);
|
||||||
Token aa_funcXX(char lexeme[]){
|
}
|
||||||
|
t.code = INL_T;
|
||||||
|
t.attribute.int_value = temp_num;
|
||||||
|
|
||||||
|
return t;
|
||||||
|
/*
|
||||||
THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING A DECIMAL CONSTANT
|
THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING A DECIMAL CONSTANT
|
||||||
TO A DECIMAL INTEGER VALUE, WHICH IS THE ATTRIBUTE FOR THE TOKEN.
|
TO A DECIMAL INTEGER VALUE, WHICH IS THE ATTRIBUTE FOR THE TOKEN.
|
||||||
THE VALUE MUST BE IN THE SAME RANGE AS the value of 2-byte integer in C.
|
THE VALUE MUST BE IN THE SAME RANGE AS the value of 2-byte integer in C.
|
||||||
|
@ -356,14 +517,64 @@ IN CASE OF ERROR (OUT OF RANGE) THE FUNCTION MUST RETURN ERROR TOKEN
|
||||||
THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER
|
THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER
|
||||||
than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE
|
than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE
|
||||||
STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE
|
STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE
|
||||||
err_lex C-type string.
|
err_lex C-type string. */
|
||||||
return t;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ACCEPTING FUNCTION FOR THE integer literal(IL) - octal constant (OIL)
|
/*ACCEPTING FUNCTION FOR THE floating - point literal (FPL)*/
|
||||||
|
|
||||||
Token aa_funcXX(char lexeme[]){
|
Token aa_func08(char lexeme[]) {
|
||||||
|
Token t;
|
||||||
|
double temp_dbl;
|
||||||
|
|
||||||
|
t.code = FPL_T;
|
||||||
|
if (strstr(lexeme, "0.0")) {
|
||||||
|
t.attribute.flt_value = 0.0f;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
temp_dbl = atof(lexeme);
|
||||||
|
|
||||||
|
if ((temp_dbl > FLT_MAX) || (temp_dbl < 0)) {
|
||||||
|
t = aa_table[ES](lexeme);
|
||||||
|
}
|
||||||
|
t.attribute.flt_value = (float)temp_dbl;
|
||||||
|
|
||||||
|
return t;
|
||||||
|
/*
|
||||||
|
THE FUNCTION MUST CONVERT THE LEXEME TO A FLOATING POINT VALUE,
|
||||||
|
WHICH IS THE ATTRIBUTE FOR THE TOKEN.
|
||||||
|
THE VALUE MUST BE IN THE SAME RANGE AS the value of 4-byte float in C.
|
||||||
|
IN CASE OF ERROR (OUT OF RANGE) THE FUNCTION MUST RETURN ERROR TOKEN
|
||||||
|
THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER
|
||||||
|
than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE
|
||||||
|
STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE
|
||||||
|
err_lex C-type string. */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*ACCEPTING FUNCTION FOR THE integer literal(IL) - octal constant (OIL)*/
|
||||||
|
|
||||||
|
Token aa_func10(char lexeme[]) {
|
||||||
|
Token t;
|
||||||
|
int new_olval;
|
||||||
|
|
||||||
|
if (strlen(lexeme) > INL_LEN + 1) {
|
||||||
|
t = aa_table[ES](lexeme);
|
||||||
|
}
|
||||||
|
|
||||||
|
t.code = INL_T;
|
||||||
|
new_olval = atool(lexeme);
|
||||||
|
|
||||||
|
if (new_olval < SHRT_MIN || new_olval > SHRT_MAX) {
|
||||||
|
t = aa_table[ES](lexeme);
|
||||||
|
}
|
||||||
|
|
||||||
|
t.code = INL_T;
|
||||||
|
t.attribute.int_value = new_olval;
|
||||||
|
|
||||||
|
return t;
|
||||||
|
/*
|
||||||
THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING AN OCTAL CONSTANT
|
THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING AN OCTAL CONSTANT
|
||||||
TO A DECIMAL INTEGER VALUE WHICH IS THE ATTRIBUTE FOR THE TOKEN.
|
TO A DECIMAL INTEGER VALUE WHICH IS THE ATTRIBUTE FOR THE TOKEN.
|
||||||
THE VALUE MUST BE IN THE SAME RANGE AS the value of 2-byte integer in C.
|
THE VALUE MUST BE IN THE SAME RANGE AS the value of 2-byte integer in C.
|
||||||
|
@ -376,35 +587,62 @@ IN CASE OF ERROR (OUT OF RANGE) THE FUNCTION MUST RETURN ERROR TOKEN
|
||||||
THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER
|
THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER
|
||||||
than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE
|
than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE
|
||||||
STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE
|
STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE
|
||||||
err_lex C-type string.
|
err_lex C-type string.
|
||||||
|
*/
|
||||||
return t;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ACCEPTING FUNCTION FOR THE ERROR TOKEN
|
/*ACCEPTING FUNCTION FOR THE ERROR TOKEN */
|
||||||
|
|
||||||
Token aa_funcXX(char lexeme[]){
|
Token aa_func12(char lexeme[]) {
|
||||||
|
Token t;
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
t.code = ERR_T;
|
||||||
|
|
||||||
|
for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++) {
|
||||||
|
t.attribute.err_lex[i] = lexeme[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
t.attribute.err_lex[i] = '\0';
|
||||||
|
|
||||||
|
return t;
|
||||||
|
/*
|
||||||
THE FUNCTION SETS THE ERROR TOKEN. lexeme[] CONTAINS THE ERROR
|
THE FUNCTION SETS THE ERROR TOKEN. lexeme[] CONTAINS THE ERROR
|
||||||
THE ATTRIBUTE OF THE ERROR TOKEN IS THE lexeme ITSELF
|
THE ATTRIBUTE OF THE ERROR TOKEN IS THE lexeme ITSELF
|
||||||
AND IT MUST BE STORED in err_lex. IF THE ERROR lexeme IS LONGER
|
AND IT MUST BE STORED in err_lex. IF THE ERROR lexeme IS LONGER
|
||||||
than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE
|
than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE
|
||||||
STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE
|
STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE
|
||||||
err_lex C-type string.
|
err_lex C-type string.
|
||||||
|
*/
|
||||||
return t;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
CONVERSION FUNCTION
|
/*CONVERSION FUNCTION*/
|
||||||
|
|
||||||
long atool(char * lexeme){
|
long atool(char * lexeme) {
|
||||||
|
int i, x = 1;
|
||||||
|
long result = 0;
|
||||||
|
|
||||||
|
for (i = strlen(lexeme); i > 0; i--, x *= 8) {
|
||||||
|
result += x*(lexeme[i - 1] - '0');
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
/*
|
||||||
THE FUNCTION CONVERTS AN ASCII STRING
|
THE FUNCTION CONVERTS AN ASCII STRING
|
||||||
REPRESENTING AN OCTAL INTEGER CONSTANT TO INTEGER VALUE
|
REPRESENTING AN OCTAL INTEGER CONSTANT TO INTEGER VALUE
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
HERE YOU WRITE YOUR ADDITIONAL FUNCTIONS (IF ANY).
|
/*HERE YOU WRITE YOUR ADDITIONAL FUNCTIONS (IF ANY).
|
||||||
FOR EXAMPLE
|
FOR EXAMPLE*/
|
||||||
|
|
||||||
int iskeyword(char * kw_lexeme){}
|
int iskeyword(char * kw_lexeme) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (kw_lexeme == NULL) return -1;
|
||||||
|
|
||||||
|
for (i = 0; i < KWT_SIZE; i++) {
|
||||||
|
if (strcmp(kw_table[i], kw_lexeme) == 0) return i;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
53
table.h
53
table.h
|
@ -36,33 +36,34 @@
|
||||||
|
|
||||||
|
|
||||||
//REPLACE *ESN* WITH YOUR ERROR STATE NUMBER
|
//REPLACE *ESN* WITH YOUR ERROR STATE NUMBER
|
||||||
#define ES -2 /* Error state */
|
#define ES 12 /* Error state */
|
||||||
#define IS -1 /* Inavalid state */
|
#define IS -1 /* Invalid state */
|
||||||
|
|
||||||
/* State transition table definition */
|
/* State transition table definition */
|
||||||
|
|
||||||
//REPLACE *CN* WITH YOUR COLUMN NUMBER
|
//REPLACE *CN* WITH YOUR COLUMN NUMBER
|
||||||
|
|
||||||
#define TABLE_COLUMNS 14
|
#define TABLE_COLUMNS 7
|
||||||
/*transition table - type of states defined in separate table */
|
/*transition table - type of states defined in separate table */
|
||||||
int st_table[][TABLE_COLUMNS] = {
|
int st_table[][TABLE_COLUMNS] = {
|
||||||
/* INPUT COLUMNS:
|
/* INPUT COLUMNS:
|
||||||
[a-zA-Z]| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | . | # | other
|
COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 |
|
||||||
*/
|
[a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other
|
||||||
/* State 0 */ {1, 6 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , IS , IS ,IS},
|
*/
|
||||||
/* State 1 */ {1, 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , ES , 3 , 2},
|
/* State 0 */ {1, 6 , 4 , 4 , IS , IS , IS},
|
||||||
/* State 2 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS},
|
/* State 1 */ {1, 1 , 1 , 1 , ES , 3 , 2},
|
||||||
/* State 3 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS},
|
/* State 2 */ {IS, IS , IS, IS, IS , IS , IS},
|
||||||
/* State 4 */ {ES, 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 7 , 5 , 5},
|
/* State 3 */ {IS, IS , IS, IS, IS , IS , IS},
|
||||||
/* State 5 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS},
|
/* State 4 */ {ES, 4 , 4 , 4 , 7 , 5 , 5},
|
||||||
/* State 6 */ {ES, 9 , 9, 9, 9, 9, 9, 9, 9, ES, ES, 7 , ES , 5},
|
/* State 5 */ {IS, IS , IS, IS, IS , IS , IS},
|
||||||
/* State 7 */ {ES, 7 , 7, 7, 7, 7, 7, 7, 7, 7, 7, ES , 8 , 8},
|
/* State 6 */ {ES, 9 , 9, ES, 7 , ES , 5},
|
||||||
/* State 8 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS},
|
/* State 7 */ {ES, 7 , 7, 7, ES , 8 , 8},
|
||||||
/* State 9 */ {ES, 9 , 9, 9, 9, 9, 9, 9, 9, ES, ES, ES , ES , 10},
|
/* State 8 */ {IS, IS , IS, IS, IS , IS , IS},
|
||||||
/* State 10 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS},
|
/* State 9 */ {ES, 9 , 9, ES, ES , ES , 10},
|
||||||
/* State 11 */ {ES, ES , ES, ES, ES, ES, ES, ES, ES, ES, ES, ES , ES , ES},
|
/* State 10 */ {IS, IS , IS, IS, IS , IS , IS},
|
||||||
/* State 12 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS},
|
/* State 11 */ {IS, IS , IS, IS, IS , IS , IS},
|
||||||
/* State 13 */ {IS, IS , IS, IS, IS, IS, IS, IS, IS, IS, IS, IS , IS , IS}
|
/* State 12 */ {IS, IS , IS, IS, IS , IS , IS},
|
||||||
|
/* State 13 */ {IS, IS , IS, IS, IS , IS , IS}
|
||||||
//
|
//
|
||||||
//. YOUR TABLE INITIALIZATION HERE
|
//. YOUR TABLE INITIALIZATION HERE
|
||||||
//.
|
//.
|
||||||
|
@ -70,8 +71,8 @@ int st_table[][TABLE_COLUMNS] = {
|
||||||
};
|
};
|
||||||
/* Accepting state table definition */
|
/* Accepting state table definition */
|
||||||
//REPLACE *N1*, *N2*, and *N3* WITH YOUR NUMBERS
|
//REPLACE *N1*, *N2*, and *N3* WITH YOUR NUMBERS
|
||||||
#define ASWR 1 /* accepting state with retract */
|
#define ASWR 2 /* accepting state with retract */
|
||||||
#define ASNR 2 /* accepting state with no retract */
|
#define ASNR 3 /* accepting state with no retract */
|
||||||
#define NOAS 0 /* not accepting state */
|
#define NOAS 0 /* not accepting state */
|
||||||
|
|
||||||
int as_table[] = {
|
int as_table[] = {
|
||||||
|
@ -87,7 +88,7 @@ int as_table[] = {
|
||||||
/* State 8 */ ASWR,
|
/* State 8 */ ASWR,
|
||||||
/* State 9 */ NOAS,
|
/* State 9 */ NOAS,
|
||||||
/* State 10 */ ASWR,
|
/* State 10 */ ASWR,
|
||||||
/* State 11 */ ASNR,
|
/* State 11 */ ASWR,
|
||||||
/* State 12 */ ASNR,
|
/* State 12 */ ASNR,
|
||||||
/* State 13 */ ASWR
|
/* State 13 */ ASWR
|
||||||
|
|
||||||
|
@ -106,7 +107,7 @@ Token aa_func03(char *lexeme); // VID SVID
|
||||||
Token aa_func05(char *lexeme); // DIL
|
Token aa_func05(char *lexeme); // DIL
|
||||||
Token aa_func08(char *lexeme); // FPL
|
Token aa_func08(char *lexeme); // FPL
|
||||||
Token aa_func10(char *lexeme); // OIL
|
Token aa_func10(char *lexeme); // OIL
|
||||||
Token aa_func11(char *lexeme); // ES
|
Token aa_func12(char *lexeme); // ES
|
||||||
//Replace XX with the number of the accepting state: 02, 03 and so on.
|
//Replace XX with the number of the accepting state: 02, 03 and so on.
|
||||||
|
|
||||||
/* defining a new type: pointer to function (of one char * argument)
|
/* defining a new type: pointer to function (of one char * argument)
|
||||||
|
@ -134,9 +135,9 @@ PTR_AAF aa_table[] = {
|
||||||
/* State 8 */ aa_func08,
|
/* State 8 */ aa_func08,
|
||||||
/* State 9 */ NULL,
|
/* State 9 */ NULL,
|
||||||
/* State 10 */ aa_func10,
|
/* State 10 */ aa_func10,
|
||||||
/* State 11 */ aa_func11,
|
/* State 11 */ NULL,
|
||||||
/* State 12 */ aa_func11,
|
/* State 12 */ aa_func12,
|
||||||
/* State 13 */ aa_func11
|
/* State 13 */ NULL
|
||||||
|
|
||||||
//HERE YOU MUST PROVIDE AN INITIALIZATION FOR AN ARRAY OF POINTERS
|
//HERE YOU MUST PROVIDE AN INITIALIZATION FOR AN ARRAY OF POINTERS
|
||||||
//TO ACCEPTING FUNCTIONS. THE ARRAY HAS THE SAME SIZE AS as_table[ ].
|
//TO ACCEPTING FUNCTIONS. THE ARRAY HAS THE SAME SIZE AS as_table[ ].
|
||||||
|
|
Loading…
Reference in New Issue