Scanner source code
This commit is contained in:
parent
f3c285ec50
commit
db10e77bc7
|
@ -0,0 +1,372 @@
|
||||||
|
/* Filename: scanner.c
|
||||||
|
/* PURPOSE:
|
||||||
|
* SCANNER.C: Functions implementing a Lexical Analyzer (Scanner)
|
||||||
|
* as required for CST8152, Assignment #2
|
||||||
|
* scanner_init() must be called before using the scanner.
|
||||||
|
* The file is incomplete;
|
||||||
|
* Provided by: Svillen Ranev
|
||||||
|
* Version: 1.16.02
|
||||||
|
* Date: 29 September 2016
|
||||||
|
*******************************************************************
|
||||||
|
* REPLACE THIS HEADER WITH YOUR HEADER
|
||||||
|
*******************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* The #define _CRT_SECURE_NO_WARNINGS should be used in MS Visual Studio projects
|
||||||
|
* to suppress the warnings about using "unsafe" functions like fopen()
|
||||||
|
* and standard sting library functions defined in string.h.
|
||||||
|
* The define does not have any effect in Borland compiler projects.
|
||||||
|
*/
|
||||||
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
|
|
||||||
|
#include <stdio.h> /* standard input / output */
|
||||||
|
#include <ctype.h> /* conversion functions */
|
||||||
|
#include <stdlib.h> /* standard library functions and constants */
|
||||||
|
#include <string.h> /* string functions */
|
||||||
|
#include <limits.h> /* integer types constants */
|
||||||
|
#include <float.h> /* floating-point types constants */
|
||||||
|
|
||||||
|
/*#define NDEBUG to suppress assert() call */
|
||||||
|
#include <assert.h> /* assert() prototype */
|
||||||
|
|
||||||
|
/* project header files */
|
||||||
|
#include "buffer.h"
|
||||||
|
#include "token.h"
|
||||||
|
#include "table.h"
|
||||||
|
|
||||||
|
#define DEBUG /* for conditional processing */
|
||||||
|
#undef DEBUG
|
||||||
|
|
||||||
|
/* Global objects - variables */
|
||||||
|
/* This buffer is used as a repository for string literals.
|
||||||
|
It is defined in platy_st.c */
|
||||||
|
extern Buffer * str_LTBL; /*String literal table */
|
||||||
|
int line; /* current line number of the source code */
|
||||||
|
extern int scerrnum; /* defined in platy_st.c - run-time error number */
|
||||||
|
|
||||||
|
/* Local(file) global objects - variables */
|
||||||
|
static Buffer *lex_buf;/*pointer to temporary lexeme buffer*/
|
||||||
|
|
||||||
|
/* No other global variable declarations/definitiond are allowed */
|
||||||
|
|
||||||
|
/* scanner.c static(local) function prototypes */
|
||||||
|
static int char_class(char c); /* character class function */
|
||||||
|
static int get_next_state(int, char, int *); /* state machine function */
|
||||||
|
static int iskeyword(char * kw_lexeme); /*keywords lookup functuion */
|
||||||
|
static long atool(char * lexeme); /* converts octal string to decimal value */
|
||||||
|
|
||||||
|
int scanner_init(Buffer * sc_buf) {
|
||||||
|
if(b_isempty(sc_buf)) return EXIT_FAILURE;/*1*/
|
||||||
|
/* in case the buffer has been read previously */
|
||||||
|
b_setmark(sc_buf, 0);
|
||||||
|
b_retract_to_mark(sc_buf);
|
||||||
|
b_reset(str_LTBL);
|
||||||
|
line = 1;
|
||||||
|
return EXIT_SUCCESS;/*0*/
|
||||||
|
/* scerrnum = 0; *//*no need - global ANSI C */
|
||||||
|
}
|
||||||
|
|
||||||
|
Token mlwpar_next_token(Buffer * sc_buf)
|
||||||
|
{
|
||||||
|
Token t; /* token to return after recognition */
|
||||||
|
unsigned char c; /* input symbol */
|
||||||
|
int state = 0; /* initial state of the FSM */
|
||||||
|
short lexstart; /*start offset of a lexeme in the input buffer */
|
||||||
|
short lexend; /*end offset of a lexeme in the input buffer */
|
||||||
|
int accept = NOAS; /* type of state - initially not accepting */
|
||||||
|
/*
|
||||||
|
lexstart is the offset from the beginning of the char buffer of the
|
||||||
|
input buffer (sc_buf) to the first character of the current lexeme,
|
||||||
|
which is being processed by the scanner.
|
||||||
|
lexend is the offset from the beginning of the char buffer of the
|
||||||
|
input buffer (sc_buf) to the last character of the current lexeme,
|
||||||
|
which is being processed by the scanner.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
DECLARE YOUR VARIABLES HERE IF NEEDED
|
||||||
|
|
||||||
|
|
||||||
|
while (1){ /* endless loop broken by token returns it will generate a warning */
|
||||||
|
|
||||||
|
GET THE NEXT SYMBOL FROM THE INPUT BUFFER
|
||||||
|
|
||||||
|
c = b_getc(sc_buf);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* special cases or token driven processing */
|
||||||
|
|
||||||
|
WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE.
|
||||||
|
COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE.
|
||||||
|
|
||||||
|
WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT
|
||||||
|
INSTEAD OF if-else TO PROCESS THE SPECIAL CASES
|
||||||
|
DO NOT FORGET TO COUNT THE PROGRAM LINES
|
||||||
|
|
||||||
|
|
||||||
|
IF (c == SOME CHARACTER)
|
||||||
|
...
|
||||||
|
SKIP CHARACTER (FOR EXAMPLE SPACE)
|
||||||
|
continue;
|
||||||
|
OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE))
|
||||||
|
return t;
|
||||||
|
EXAMPLE:
|
||||||
|
if (c == ' ') continue;
|
||||||
|
if (c == '{'){ t.code = RBR_T; /*no attribute */ return t;
|
||||||
|
if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t;
|
||||||
|
...
|
||||||
|
|
||||||
|
IF (c == '.') TRY TO PROCESS .AND. or .OR.
|
||||||
|
IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING
|
||||||
|
RETURN AN ERROR TOKEN
|
||||||
|
IF (c == '!') TRY TO PROCESS COMMENT
|
||||||
|
IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR
|
||||||
|
ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue;
|
||||||
|
...
|
||||||
|
IF STRING (FOR EXAMPLE, "text") IS FOUND
|
||||||
|
SET MARK TO MARK THE BEGINNING OF THE STRING
|
||||||
|
IF THE STRING IS LEGAL
|
||||||
|
USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL
|
||||||
|
ADD '\0' at the end make the string C-type string
|
||||||
|
SET STRING TOKEN
|
||||||
|
(the attribute of the string token is the offset from
|
||||||
|
the beginning of the str_LTBL char buffer to the beginning
|
||||||
|
of the string (TEXT in the example))
|
||||||
|
|
||||||
|
return t;
|
||||||
|
ELSE
|
||||||
|
THE STRING LITERAL IS ILLEGAL
|
||||||
|
SET ERROR TOKEN FOR ILLEGAL STRING (see assignment)
|
||||||
|
DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL
|
||||||
|
|
||||||
|
return t;
|
||||||
|
|
||||||
|
IF (c == ANOTHER CHARACTER)
|
||||||
|
SET TOKEN
|
||||||
|
return t;
|
||||||
|
/* Process state transition table */
|
||||||
|
|
||||||
|
IF (c is a digit OR c is a letter){
|
||||||
|
|
||||||
|
SET THE MARK AT THE BEGINING OF THE LEXEME
|
||||||
|
b_setmark(sc_buf,forward);
|
||||||
|
....
|
||||||
|
CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA)
|
||||||
|
IT IMPLEMENTS THE FOLLOWING ALGORITHM:
|
||||||
|
|
||||||
|
FSM0. Begin with state = 0 and the input character c
|
||||||
|
FSM1. Get the next state from the transition table calling
|
||||||
|
state = get_next_state(state, c, &accept);
|
||||||
|
FSM2. Get the next character
|
||||||
|
FSM3. If the state is not accepting (accept == NOAS), go to step FSM1
|
||||||
|
If the step is accepting, token is found, leave the machine and
|
||||||
|
call an accepting function as described below.
|
||||||
|
|
||||||
|
|
||||||
|
RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE
|
||||||
|
GET THE BEGINNING AND THE END OF THE LEXEME
|
||||||
|
lexstart = b_getmark(sc_buf);
|
||||||
|
SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION
|
||||||
|
CREATE A TEMPORRARY LEXEME BUFFER HERE;
|
||||||
|
lex_buf = b_create(...);
|
||||||
|
. RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND
|
||||||
|
. USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...),
|
||||||
|
. WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED
|
||||||
|
. YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH
|
||||||
|
. CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE
|
||||||
|
. CALLED IS STORED IN THE VARIABLE state.
|
||||||
|
. YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME.
|
||||||
|
. THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf.
|
||||||
|
....
|
||||||
|
b_free(lex_buf);
|
||||||
|
return t;
|
||||||
|
|
||||||
|
CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT.
|
||||||
|
FOR ILLEGAL CHARACTERS SET ERROR TOKEN.
|
||||||
|
THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN
|
||||||
|
IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE
|
||||||
|
A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum
|
||||||
|
AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST
|
||||||
|
BE THE STRING "RUN TIME ERROR: "
|
||||||
|
}//end while(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
DO NOT MODIFY THE CODE OF THIS FUNCTION
|
||||||
|
YOU CAN REMOVE THE COMMENTS
|
||||||
|
|
||||||
|
int get_next_state(int state, char c, int *accept)
|
||||||
|
{
|
||||||
|
int col;
|
||||||
|
int next;
|
||||||
|
col = char_class(c);
|
||||||
|
next = st_table[state][col];
|
||||||
|
#ifdef DEBUG
|
||||||
|
printf("Input symbol: %c Row: %d Column: %d Next: %d \n",c,state,col,next);
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
The assert(int test) macro can be used to add run-time diagnostic to programs
|
||||||
|
and to "defend" from producing unexpected results.
|
||||||
|
assert() is a macro that expands to an if statement;
|
||||||
|
if test evaluates to false (zero) , assert aborts the program
|
||||||
|
(by calling abort()) and sends the following message on stderr:
|
||||||
|
|
||||||
|
Assertion failed: test, file filename, line linenum
|
||||||
|
|
||||||
|
The filename and linenum listed in the message are the source file name
|
||||||
|
and line number where the assert macro appears.
|
||||||
|
If you place the #define NDEBUG directive ("no debugging")
|
||||||
|
in the source code before the #include <assert.h> directive,
|
||||||
|
the effect is to comment out the assert statement.
|
||||||
|
*/
|
||||||
|
assert(next != IS);
|
||||||
|
|
||||||
|
/*
|
||||||
|
The other way to include diagnostics in a program is to use
|
||||||
|
conditional preprocessing as shown bellow. It allows the programmer
|
||||||
|
to send more details describing the run-time problem.
|
||||||
|
Once the program is tested thoroughly #define DEBUG is commented out
|
||||||
|
or #undef DEBUF is used - see the top of the file.
|
||||||
|
*/
|
||||||
|
#ifdef DEBUG
|
||||||
|
if(next == IS){
|
||||||
|
printf("Scanner Error: Illegal state:\n");
|
||||||
|
printf("Input symbol: %c Row: %d Column: %d\n",c,state,col);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
*accept = as_table[next];
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
|
||||||
|
int char_class (char c)
|
||||||
|
{
|
||||||
|
int val;
|
||||||
|
|
||||||
|
THIS FUNCTION RETURNS THE COLUMN NUMBER IN THE TRANSITION
|
||||||
|
TABLE st_table FOR THE INPUT CHARACTER c.
|
||||||
|
SOME COLUMNS MAY REPRESENT A CHARACTER CLASS .
|
||||||
|
FOR EXAMPLE IF COLUMN 1 REPRESENTS [A-Z]
|
||||||
|
THE FUNCTION RETURNS 1 EVERY TIME c IS ONE
|
||||||
|
OF THE LETTERS A,B,...,Z.
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
HERE YOU WRITE THE DEFINITIONS FOR YOUR ACCEPTING FUNCTIONS.
|
||||||
|
************************************************************
|
||||||
|
|
||||||
|
ACCEPTING FUNCTION FOR THE arithmentic variable identifier AND keywords (VID - AVID/KW)
|
||||||
|
REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER
|
||||||
|
|
||||||
|
Token aa_funcXX(char lexeme[]){
|
||||||
|
|
||||||
|
WHEN CALLED THE FUNCTION MUST
|
||||||
|
1. CHECK IF THE LEXEME IS A KEYWORD.
|
||||||
|
IF YES, IT MUST RETURN A TOKEN WITH THE CORRESPONDING ATTRIBUTE
|
||||||
|
FOR THE KEYWORD. THE ATTRIBUTE CODE FOR THE KEYWORD
|
||||||
|
IS ITS INDEX IN THE KEYWORD LOOKUP TABLE (kw_table in table.h).
|
||||||
|
IF THE LEXEME IS NOT A KEYWORD, GO TO STEP 2.
|
||||||
|
|
||||||
|
2. SET a AVID TOKEN.
|
||||||
|
IF THE lexeme IS LONGER than VID_LEN (see token.h) CHARACTERS,
|
||||||
|
ONLY FIRST VID_LEN CHARACTERS ARE STORED
|
||||||
|
INTO THE VARIABLE ATTRIBUTE ARRAY vid_lex[](see token.h) .
|
||||||
|
ADD \0 AT THE END TO MAKE A C-type STRING.
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
ACCEPTING FUNCTION FOR THE string variable identifier (VID - SVID)
|
||||||
|
REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER
|
||||||
|
|
||||||
|
Token aa_funcXX(char lexeme[]){
|
||||||
|
|
||||||
|
WHEN CALLED THE FUNCTION MUST
|
||||||
|
1. SET a SVID TOKEN.
|
||||||
|
IF THE lexeme IS LONGER than VID_LEN characters,
|
||||||
|
ONLY FIRST VID_LEN-1 CHARACTERS ARE STORED
|
||||||
|
INTO THE VARIABLE ATTRIBUTE ARRAY vid_lex[],
|
||||||
|
AND THEN THE % CHARACTER IS APPENDED TO THE NAME.
|
||||||
|
ADD \0 AT THE END TO MAKE A C-type STRING.
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
ACCEPTING FUNCTION FOR THE floating-point literal (FPL)
|
||||||
|
|
||||||
|
Token aa_funcXX(char lexeme[]){
|
||||||
|
|
||||||
|
THE FUNCTION MUST CONVERT THE LEXEME TO A FLOATING POINT VALUE,
|
||||||
|
WHICH IS THE ATTRIBUTE FOR THE TOKEN.
|
||||||
|
THE VALUE MUST BE IN THE SAME RANGE AS the value of 4-byte float in C.
|
||||||
|
IN CASE OF ERROR (OUT OF RANGE) THE FUNCTION MUST RETURN ERROR TOKEN
|
||||||
|
THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER
|
||||||
|
than ERR_LEN caharacters, only the first ERR_LEN character are
|
||||||
|
stored in err_lex.
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
ACCEPTING FUNCTION FOR THE integer literal(IL) - decimal constant (DIL) AND ZERO (0)
|
||||||
|
|
||||||
|
Token aa_funcXX(char lexeme[]){
|
||||||
|
|
||||||
|
THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING A DECIMAL CONSTANT AND 0
|
||||||
|
TO A DECIMAL INTEGER VALUE, WHICH IS THE ATTRIBUTE FOR THE TOKEN.
|
||||||
|
THE VALUE MUST BE IN THE SAME RANGE AS the value of 2-byte integer in C.
|
||||||
|
IN CASE OF ERROR (OUT OF RANGE) THE FUNCTION MUST RETURN ERROR TOKEN
|
||||||
|
THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER
|
||||||
|
than ERR_LEN caharacters, only the first ERR_LEN character are
|
||||||
|
stored in err_lex.
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
ACCEPTING FUNCTION FOR THE integer literal(IL) - octal constant (OIL)
|
||||||
|
|
||||||
|
Token aa_funcXX(char lexeme[]){
|
||||||
|
|
||||||
|
THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING AN OCTAL CONSTANT
|
||||||
|
TO A DECIMAL INTEGER VALUE WHICH IS THE ATTRIBUTE FOR THE TOKEN.
|
||||||
|
THE VALUE MUST BE IN THE SAME RANGE AS the value of 2-byte integer in C.
|
||||||
|
THIS FUNCTION IS SIMILAR TO THE FUNCTION ABOVE AND THEY CAN BE
|
||||||
|
COMBINED INTO ONE FUNCTION
|
||||||
|
THE MAIN DIFFERENCE IE THAT THIS FUNCTION CALLS
|
||||||
|
THE FUNCTION atool(char * lexeme) WHICH CONVERTS AN ASCII STRING
|
||||||
|
REPRESENTING AN OCTAL NUMBER TO INTEGER VALUE
|
||||||
|
IN CASE OF ERROR (OUT OF RANGE) THE FUNCTION MUST RETURN ERROR TOKEN
|
||||||
|
THE ERROR TOKEN ATTRIBUTE IS lexeme. IF THE ERROR lexeme IS LONGER
|
||||||
|
than ERR_LEN caharacters, only the first ERR_LEN character are
|
||||||
|
stored in err_lex.
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
ACCEPTING FUNCTION FOR THE ERROR TOKEN
|
||||||
|
|
||||||
|
Token aa_funcXX(char lexeme[]){
|
||||||
|
|
||||||
|
THE FUNCTION SETS THE ERROR TOKEN. lexeme[] CONTAINS THE ERROR
|
||||||
|
THE ATTRIBUTE OF THE ERROR TOKEN IS THE lexeme ITSELF
|
||||||
|
AND IT MUST BE STORED in err_lex. IF THE ERROR lexeme IS LONGER
|
||||||
|
than ERR_LEN caharacters, only the first ERR_LEN character are
|
||||||
|
stored in err_lex.
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
CONVERSION FUNCTION
|
||||||
|
|
||||||
|
long atool(char * lexeme){
|
||||||
|
|
||||||
|
THE FUNCTION CONVERTS AN ASCII STRING
|
||||||
|
REPRESENTING AN OCTAL INTEGER CONSTANT TO INTEGER VALUE
|
||||||
|
}
|
||||||
|
|
||||||
|
HERE YOU WRITE YOUR ADDITIONAL FUNCTIONS (IF ANY).
|
||||||
|
FOR EXAMPLE
|
||||||
|
|
||||||
|
int iskeyword(char * kw_lexeme){}
|
|
@ -0,0 +1,112 @@
|
||||||
|
/* Filename: table.h
|
||||||
|
* Transition Table and function declarations necessary for the scanner implementation
|
||||||
|
* as required for CST8152 - Assignment #2.
|
||||||
|
* Version: 1.16.02
|
||||||
|
* Date: 29 September 2016
|
||||||
|
* Provided by: Svillen Ranev
|
||||||
|
* The file is incomplete. You are to complete it.
|
||||||
|
***************************************************
|
||||||
|
* REPLACE THIS HEADER WITH YOUR HEADER
|
||||||
|
***************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TABLE_H_
|
||||||
|
#define TABLE_H_
|
||||||
|
|
||||||
|
#ifndef BUFFER_H_
|
||||||
|
#include "buffer.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef NULL
|
||||||
|
#include <_null.h> /* NULL pointer constant is defined there */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Source end-of-file (SEOF) sentinel symbol
|
||||||
|
* '\0' or only one of the folowing constants: 255, 0xFF , EOF
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Single-lexeme tokens processed separately one by one
|
||||||
|
* in the token-driven part of the scanner
|
||||||
|
* '=' , ' ' , '(' , ')' , '{' , '}' , == , <> , '>' , '<' ,
|
||||||
|
* space
|
||||||
|
* !<comment , ',' , '"' , ';' , '-' , '+' , '*' , '/', # ,
|
||||||
|
* .AND., .OR. , SEOF, 'wrong symbol',
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
REPLACE *ESN* WITH YOUR ERROR STATE NUMBER
|
||||||
|
#define ES *ESN* /* Error state */
|
||||||
|
#define IS -1 /* Inavalid state */
|
||||||
|
|
||||||
|
/* State transition table definition */
|
||||||
|
|
||||||
|
REPLACE *CN* WITH YOUR COLUMN NUMBER
|
||||||
|
|
||||||
|
#define TABLE_COLUMNS *CN*
|
||||||
|
/*transition table - type of states defined in separate table */
|
||||||
|
int st_table[ ][TABLE_COLUMNS] = {
|
||||||
|
/* State 0 */ {YOUR INITIALIZATION},
|
||||||
|
/* State 1 */ {YOUR INITIALIZATION},
|
||||||
|
.
|
||||||
|
. YOUR TABLE INITIALIZATION HERE
|
||||||
|
.
|
||||||
|
/* State N */ {YOUR INITIALIZATION},
|
||||||
|
|
||||||
|
/* Accepting state table definition */
|
||||||
|
REPLACE *N1*, *N2*, and *N3* WITH YOUR NUMBERS
|
||||||
|
#define ASWR *N1* /* accepting state with retract */
|
||||||
|
#define ASNR *N2* /* accepting state with no retract */
|
||||||
|
#define NOAS *N3* /* not accepting state */
|
||||||
|
|
||||||
|
int as_table[ ] = {YOUR INITIALIZATION HERE - USE ASWR, ASNR, NOAS };
|
||||||
|
|
||||||
|
/* Accepting action function declarations */
|
||||||
|
|
||||||
|
FOR EACH OF YOUR ACCEPTING STATES YOU MUST PROVIDE
|
||||||
|
ONE FUNCTION PROTOTYPE. THEY ALL RETURN Token AND TAKE
|
||||||
|
ONE ARGUMENT: A string REPRESENTING A TOKEN LEXEME.
|
||||||
|
|
||||||
|
Token aa_funcXX(char *lexeme);
|
||||||
|
|
||||||
|
Replace XX with the number of the accepting state: 02, 03 and so on.
|
||||||
|
|
||||||
|
/* defining a new type: pointer to function (of one char * argument)
|
||||||
|
returning Token
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef Token (*PTR_AAF)(char *lexeme);
|
||||||
|
|
||||||
|
|
||||||
|
/* Accepting function (action) callback table (array) definition */
|
||||||
|
/* If you do not want to use the typedef, the equvalent declaration is:
|
||||||
|
* Token (*aa_table[])(char lexeme[]) = {
|
||||||
|
*/
|
||||||
|
|
||||||
|
PTR_AAF aa_table[ ] ={
|
||||||
|
|
||||||
|
|
||||||
|
HERE YOU MUST PROVIDE AN INITIALIZATION FOR AN ARRAY OF POINTERS
|
||||||
|
TO ACCEPTING FUNCTIONS. THE ARRAY HAS THE SAME SIZE AS as_table[ ].
|
||||||
|
YOU MUST INITIALIZE THE ARRAY ELEMENTS WITH THE CORRESPONDING
|
||||||
|
ACCEPTING FUNCTIONS (FOR THE STATES MARKED AS ACCEPTING IN as_table[]).
|
||||||
|
THE REST OF THE ELEMENTS MUST BE SET TO NULL.
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Keyword lookup table (.AND. and .OR. are not keywords) */
|
||||||
|
|
||||||
|
#define KWT_SIZE 8
|
||||||
|
|
||||||
|
char * kw_table []= {
|
||||||
|
"ELSE",
|
||||||
|
"IF",
|
||||||
|
"INPUT",
|
||||||
|
"OUTPUT",
|
||||||
|
"PLATYPUS",
|
||||||
|
"REPEAT",
|
||||||
|
"THEN",
|
||||||
|
"USING"
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -0,0 +1,74 @@
|
||||||
|
/* Filename: token.h
|
||||||
|
* Token declarations necessary for the scanner implementation
|
||||||
|
* CST8152, Assignment #2
|
||||||
|
* Version: 1.16.02
|
||||||
|
* Date: 29 September 2016
|
||||||
|
* Provided by: Svillen Ranev
|
||||||
|
* The file is complete and MUST NOT be modified.
|
||||||
|
*/
|
||||||
|
#ifndef TOKEN_H_
|
||||||
|
#define TOKEN_H_
|
||||||
|
|
||||||
|
/*#pragma warning(1:4001) *//*to enforce C89 type comments - to make //comments an warning */
|
||||||
|
|
||||||
|
/*#pragma warning(error:4001)*//* to enforce C89 comments - to make // comments an error */
|
||||||
|
|
||||||
|
/* Constants */
|
||||||
|
|
||||||
|
#define VID_LEN 8 /* variable identifier length */
|
||||||
|
#define ERR_LEN 20 /* error message length */
|
||||||
|
#define INL_LEN 5 /* maximum number of digits for IL */
|
||||||
|
|
||||||
|
/* Token codes */
|
||||||
|
|
||||||
|
#define ERR_T 0 /* Error token */
|
||||||
|
#define SEOF_T 1 /* Source end-of-file token */
|
||||||
|
#define AVID_T 2 /* Arithmetic Variable identifier token */
|
||||||
|
#define SVID_T 3 /* String Variable identifier token */
|
||||||
|
#define FPL_T 4 /* Floating point literal token */
|
||||||
|
#define INL_T 5 /* Integer literal token */
|
||||||
|
#define STR_T 6 /* String literal token */
|
||||||
|
#define SCC_OP_T 7 /* String concatenation operator token */
|
||||||
|
#define ASS_OP_T 8 /* Assignment operator token */
|
||||||
|
#define ART_OP_T 9 /* Arithmetic operator token */
|
||||||
|
#define REL_OP_T 10 /* Relational operator token */
|
||||||
|
#define LOG_OP_T 11 /* Logical operator token */
|
||||||
|
#define LPR_T 12 /* Left parenthesis token */
|
||||||
|
#define RPR_T 13 /* Right parenthesis token */
|
||||||
|
#define LBR_T 14 /* Left brace token */
|
||||||
|
#define RBR_T 15 /* Right brace token */
|
||||||
|
#define KW_T 16 /* Keyword token */
|
||||||
|
#define COM_T 17 /* Comma token */
|
||||||
|
#define EOS_T 18 /* End of statement *(semi - colon) */
|
||||||
|
|
||||||
|
|
||||||
|
/* Operators token attributes */
|
||||||
|
|
||||||
|
typedef enum ArithmeticOperators {PLUS, MINUS, MULT, DIV} Arr_Op;
|
||||||
|
typedef enum RelationalOperators {EQ, NE, GT, LT} Rel_Op;
|
||||||
|
typedef enum LogicalOperators {AND,OR} Log_Op;
|
||||||
|
|
||||||
|
|
||||||
|
/* Structure declaring the token and its attributes */
|
||||||
|
|
||||||
|
typedef union TokenAttribute{
|
||||||
|
int get_int; /* integer attributes accessor */
|
||||||
|
Arr_Op arr_op; /* arithmetic operator attribute code */
|
||||||
|
Rel_Op rel_op; /* relational operator attribute code */
|
||||||
|
Log_Op log_op; /* logical operator attribute code */
|
||||||
|
int int_value; /* integer literal attribute (value) */
|
||||||
|
int kwt_idx; /* keyword index in the keyword table */
|
||||||
|
short str_offset; /* sring literal offset from the beginning of */
|
||||||
|
/* the string literal buffer (str_LTBL->cb_head) */
|
||||||
|
float flt_value; /* floating-point literal attribute (value) */
|
||||||
|
char vid_lex[VID_LEN+1]; /* variable identifier token attribute */
|
||||||
|
char err_lex[ERR_LEN+1]; /* error token attribite */
|
||||||
|
} TA;
|
||||||
|
|
||||||
|
typedef struct Token
|
||||||
|
{
|
||||||
|
int code; /* token code */
|
||||||
|
TA attribute; /* token attribute */
|
||||||
|
} Token;
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in New Issue