From b78f4a831de7c8c4a8510aaaea3de6727790ff6d Mon Sep 17 00:00:00 2001 From: Victor Fernandes Date: Wed, 15 Mar 2017 20:57:28 -0400 Subject: [PATCH] Fix transition table. Need to fix string literal read --- PLATYPUS_Transition_Table.xlsx | Bin 11644 -> 11631 bytes buffer.h | 1 - buffer/buffer.sln | 10 + scanner.c | 640 ++++++++++++++++++--------------- table.h | 45 +-- 5 files changed, 366 insertions(+), 330 deletions(-) diff --git a/PLATYPUS_Transition_Table.xlsx b/PLATYPUS_Transition_Table.xlsx index fb1de27028813947481e569236c1382e59e45e39..4448f811e1d248dc4deda3bcc0a930c9d53ad267 100644 GIT binary patch delta 3488 zcmZ`+c{mgfx1Je;8C#NFLS-2vOIe0&5!tsH3Rxm)L6m(PBTPud$P!~6W6K%}F$!fH zW2Z3;(+{CWNS1K#@80`-_xry4-1Fx-=XuY0-}j$${x~t6>Vi30S=M6w{4ydqrO+zi0#_in=k>&O&OhRd3zq- zbW=0Vv(D|M77-n9<(`g~O-ng)n4z055>Nt~tIgEZa!HXVk_!UJGwX9}ssaNYaIJmA za7u{RQd_@-y@5^H&C*Dr35rN>lHC2+F=eM-KKBu1#7YHv)_fFD@KZ5aP z7cXcle>(z~0770@I0i7sR3O_=T(6UWK-UWvR^pg7VlejQ0dA%1d)&bpc58`Own*Kb?ef8+ zj%cTsM(gU|{FbCKt^*gsOwH5lw>N$>Y3f^JI{RV|-I~|eGcyBzKVSwmqLrM-Oc}M8 z;S6^2(Zo!6M2pX1K-3!Z6Z&{t*w3E^M>X|MSi5FpPr*q?|h$Z|Fuk}v1|2>(6oT# z^T*StBQtzkUY1fdKS>l|J16IU0AQ}_RLc!ni~C-|YX>_AbC1yPvj#8BL8r>}HQ4U#?SfEAw5onb`iR z1bA)XUA@7Qq~OV}C1Ta%L0^-_V~y&ReJ-`W{7SxKpGx!ugc=+;*xve(ysRF&6WugB zbZ`w|ByHKk``c*BbUDu9&fz!qIlmgyTddZmo3?zrXfqF?8b3vo0C7Uhv2BvUPR$L4*;$%_ z&eI0Yk$CimUh9TTFiUrQn#26w_1E!6U&-;;Mvaz36IeNOmEhq~ zRpLR+3E5v;Ax3S&tJZSv^TJTNVc@qQDd+b0tsUjREOY-{&owxLb58`g$8d^Cosh8L z{?SmzEUg{<_CrjoWC=`Wd>lE*#B`azTc(#6Q997W^G+h)pR8YB*34mCei#Eo6({wh znhTNoEx)^J^(409v9s12bgg0(x6zEL%T^qV+UatJJ_~y;g7N{Q&J*Zgn9QPcdWAs( zuqPmtlUN@W?wCGGlamfRQjBP*x)G3B?t{s)N^+CxQe%nSoRBf?&A621I{2}zy6MXtgX#ckI%<722aPmS-VJ|@V=~1eRex^;c zyRhaxuaYx4cld2nRk_q+3y*Fld&)g_@eDzSe>Gk5=~fwM%@u{mPZa^H1U}j5Py^Ow z99?Cf(`4%XPPdtRyhk{gc^tlZR6c$s7t?>HOr)HxM3v><=3w*BY+nX+q<3t4!Xz=r z7Pe$^z9&!ML03}K?H;Fk4}1Btn)@!Izt2AFgDT{Y52bm&f;;^U*|g6)dtx&uj51Ur>`bwTb$ftVnE>J4^lW6{eG#X(gk zZ~Xg&dux&|;b8QnhNh>E=ur8S(0YhiugcfB7riEpystH7egqr1BSEiDnhl0Y3&!!8 z8zc(UUL|7 z2M!h4s?#MKaeB?IFZ_!yx|5q%pBjas8)rt5M8(Z#REGUkUNYko9->y z^|nqyOKT^c87$y3(xDd(JODe$*0EvKpw&k1zMwVU8`nLTCB`Au62pnAV%;jL&fHL+ z()zd;8<#{zay>iLWaIe$TOp{lU^h(>>eB=P3T3g zY7@GUx!Qyt8-8ngz$oSaeTly4!oKdz4@mXxW0C*|s_Fx(tIcJ9d_D*tp5$qy_O ziS=udrEi~#r+>4Q+*N)7NU17;oR`(A?E4feY9W_j#*a7!QAHEMPLRr$Qm_*g980^< zZ|DV|Wl7&<)te1H>n52H=&3k9IRSb03Z5FB7OfS4duJ0X$~pkhL&z0i4s(V~8e}2rH{O!x=LpZeZ;D*h6C}9u>^2S{Z&%mvAS27FR zTx~+$my$B|(R|8jlV6`<#*X&f)|Ha!vsNOJCY-` zo8>^%n;@9AIP0gCM@K++~jjV#2pT1{;Ptc$>vdQ z{6r;6pd_8^RA$pih9oYx#PcGAD~Bo?nQ#bEhOWwnyFX?7_>_k}Gy|^cT@$x;dnp`3x8Ff1+soLt7Mi@zX-q+I>tDvaz zEPCP$O?|G9Plmzuv2&{yx;f68mk=|?@9zY%awdI)AZSkw#f%@A1nkE)uz|=nBoU+2 zUzAb9BAjM95CjtFEn^d^__>kS|0RfQR(M8uw@pW}IlLYn-IcD^5!Tg9z4_sG9%hdl zsCEVae|^8pvel;XzsPxUn4co}r>D3NA>#ipKDV-ejYgD><yn(YEfC?+`&hB0%<-i?6qvW8Hm^%x zdCSTEp{8NO{dg%1vj>VVZ}-&L)xO=%V0$i#C3X!=3O;{*@IYmm{=v}#aP$)}Gsz3wd)MJ*6A{KsDPMd3QTpD-J$pX<2 zo+t~)GuaPLq(@j5f7gn2nl`R=kQr|wJBzLA0wL(5DIfojJ?!&<=g@au0tm%-jJGTaLy1p$Okh@@ zm&R~9`W#%+sKWdn)#BYxYb4ap;+5KGd6|z6dPB6j#TyC2-5C+y6MfUozT+}cTm=P- zEq^hPZfdAjsu`dk3CmU{x%h4hN&k_(5i=5PQU^PeNK2D$@%yIX22Tq);B+|Z#4~34 z<-Y4LsT4J)otMhzKY#1?hn01m9&B4lA9htkI32?k30L>=N4aJkU_CJ8P?cAF#%1{5 zYaI_X0ab?k$Gk+NJmuA*z^mGzP2KIif&Nb5-zqmI$n=5#=Tkz#uLB_&)o>diN5&ys zADEb-s~{=%k5gU`01yWJb3|o?DZs`4%O)TIPzC}3PX2QlFqP4%pbLDEaio9(24)~H MoMXEr|98WG0EBaqy#N3J delta 3501 zcmZu!S5y;2Y z%g`Z8^!Z%B?-Y|-Hh{}zT7}mp#Zr-c%yCWkOR6*RCP=h84sx*HpJfL4;4&JGH|yh& z5P&K|oBLSawJbe)M1*-Kp)=x(an4XK1Ixk^pBE>|ei=xnJNC#$%EmlhOhyGwO5{ue zy*!Bw^j=Iglz^Eln>D1!ECcWAaw}=7Gg*Tv`EA*Kza4-NQ*~1U#qKk7s*vJGBhJY+ zB>U%q%smb32yO*xMx8FeU}=h_macKb-8~PCnx46rjuk}gT}&^Q2TO9#NIDn4p~{(4 z`s%g9$_6)tJ=oK{wy*0K*X#_RC~hsW`8_j{uh=(^ zxA{^xeCr){B3+3XHX?23#KHX9xn>s}^)TG^nOL(sJp5}LqjjzJvD}mc8NimrdLiQ+ zSL@~TbaVmgbaec51aG(?j+k%j|4oG}VY6-Z7klO=A0nE*D*e(!)kN)hh2RShI6dgd zUvjDIjc1rF<8|eg4XQB^E7rDd?P1tI`a}KY{tN1jr3LS4?GFfYC}dt(d1Gh)Cq}__ z>=VreGJmu`H!;%zs&xGwgqkmUadr~;YpyUd#5Z-`P5kw%S^?i~9Io!W-^}+96JPB= zp|65W?We|O_UG1Cl$Jzpq@D;1x6l^PAN>yW@%3d-NHs-Yztb)+|25@qNB%*V|Ifg5 z0+}44g4^^C+bwnfl_EJcqjnOae%kfmK%Z%=W!n3MrfCWx(=1*zf)1S3f6P9|&;s`B<$5lJ6{R_?%1p zZoAa;C}Qw%f2?;xRfravkbp#{Q4??Xsceo`}p)foB>nb+DXAap^D<7aC;0eo%0>92r3Tv6jz1 zuH8<_S>5q{zok5kGL2~E(3x)kn+bqZ_*K8^=Gb^5dwA9qu}|~RQ?-R>`n;2#&4s?% zv9;wH9q^h#)28JWool#Ipo?*uvuCOgZKw`7NUtS7@H9~(X~M7CqEn7W3FQdn>p3@v z3z-dE2R_7|lc?+79wPHv-P42Cs>MB-arUK#FB%lo4J#^K_1}|7HcPrqR-_(Xo$Z-8 z(dux{0-iC0sA~hjLk>}_B{j=u+nVNa0yCOZFlzcKkg%$9{|1X-37uG}0?e(fo1liQ zeBqlekoDtha2xaha_&HOxp-#8Dp;M2X}^rT%8qL;Dp(>4J1jEpy75#s{58(H%su=A zJup!e9RJOG1G+FPts@97_0tgXPh)}G4fir!vK#Ja5V0HXV?f&tzhPi+cZ_05Xm^Zb z>2Hb=ZjWdch8x(}!`y~eWPiR!naQ^}4UTi}~ z_SpbNWh9O{<>*P`7{^8WgP`$7x9W`08t2C{HY5Wpr?=y(+j(8(STw4NKRJFY6@-== z*|HeziK^Y?#oDp@Fs+KxT`5&>2UQ7rFizU@dC#0bV47U(ZOE&8%XZ_PiiMBv z-!NJDwixQB*eZjWUXfqW(e|v+&{lmQhWnhZYh|bHJtf(~Tk?##Ufcb))*2Y&;Q75` zDnh(PlUpIENUJSxj)A`8?{8(vO2C~LLqc}|Y??Y>O&+!uaC-j@LFIFG!w+RK+ue1C zFwdE6Ec$$`xjuTgud*&+6?WA?1jT>vhuGREbFI@a0|)Xq^4=?JPe3}=DIskhej;*M|1H%bQ1XEp%G=3h_!`eM*P9nyYM+++ zW(9CsAu4ZxkSZI~5#gy|_Q+27RNOt3&o7AB2<`p$SURmGr2GR!M092QTAKc`AYL7n zGw>WFD)5tsE!4ReayNxS8_Lof!_DZYELStF!nnE zIsaI7b0Q|KdEqfnT3zVS>K=^M#ClKqI!kJjv2t2KNIC931TDJa87KUQI>lOX;W0QZSt(;c zb9a6LGICQooXuP`cWZ!n;Pp!1W_ut>|5K;i9+sQYt@e23OsLEzO5Y~QRUpOGsQlvb z`2H!i3c!Zvl~TUgq>J}9SU0_s%^8;Yg}qN_rsRDD2O?HSDx{c;U(JAC<6!&&EJIs# z9Jj#rO2Fu1k!z+3AAd_RE9Sh3udYA~FF$?lZHo)ztX=Dpr#szPhr@z}(aoeib0v>8 zJq|-l&M;bZWPP`~f=q`g-6PKY%rB+A%2GkFCPO^hU_G(4P^ok+ytp@-(&tp_p?H`K zdmx+?a21?_0uyI4{ap3%{_k?*i&v!raa*zmb}Ynb)aWO>0mvRwsPV?gBdKN83V|+| zHFI=cGdO9X&Wv&kNV@2V0g^5`UImh59kD=?7;<6PjKWGE$i_guMoHF|F05eK-I#y8 zs=}oyWe!l_Lik6AvS<=3qHLIRo>5TOoC|{ZIBmL^vd@A_t(Xdnm`B#D!AZDXFLTiF z=Dr!(Sj(%h%ps?;71m^o7cp2jyOYNGtK`~{^4_8uRbzI=l#=`u>7dNTiw~Mn>gcJMS<&)s>qoa^E?uDUU)igPdMfY}n=O88`#R^#)NVdRR@A-o zIe!vMPHNQ3`JBS2l|9M*ui8q=6x`&CmLC7;qi$ODq394CP!h~sl03F>$( zp4zjR_Gge%1Sc^kJbOd%jLdMQz?oc>JGEWPmN`3YGD66vCP-^>CRO22bUu_YX4<>gIGkgfyESl{-v10hT1)r zuUWAl7L@LSUd(i*LP)3tNT+v-#qXCBD%tMSJ4 z(k#$_>0~?jjNrt=!@#lPmSsm?g+Ku~!yW{G{3gSh_S=a3fN5e}>$xYlRrznJ|CE#5 zY<0}flAQcO%a5E~k?WLMBp%EWwZDxw4Rpy5>t)GkI%%YXlCrgGB4P^3&v1`WcXK)6 z@;s8u^o;eRYqdGnnc0G^&+9a7nfv`EU0{uz0Z&^?`Gq2*Vsa^}nOwmYayfyxm)JDf ze)TV+Ka-bM$Pj3!8B}Qd&GOy?OC29_}DQn-{9qF z%La3?;CFwPjQ+vMz{su7fu{Q2tl8?lzWHw|TWjPk$vmLU{uuLy=n1n*`HUB7ZH)C( z>5g|$;Cu;x*Y$qyZi4uR3EyuE9J*)1%Fso#T!B+Ag7at&1p!O>3(C>%PSMz;`D&(% z4H;ki*Soj9D(=)fV4n~y*m4cfgxRRfH2xyFfZ)fwXA*KqK6$_4`&z /* standard input / output */ @@ -24,7 +24,7 @@ #include /* integer types constants */ #include /* floating-point types constants */ -/*#define NDEBUG to suppress assert() call */ + /*#define NDEBUG to suppress assert() call */ #include /* assert() prototype */ /* project header files */ @@ -33,7 +33,7 @@ #include "table.h" #define DEBUG /* for conditional processing */ -#undef DEBUG +/*#undef DEBUG*/ @@ -49,14 +49,14 @@ static Buffer *lex_buf;/*pointer to temporary lexeme buffer*/ /* No other global variable declarations/definitiond are allowed */ -/* scanner.c static(local) function prototypes */ +/* scanner.c static(local) function prototypes */ static int char_class(char c); /* character class function */ static int get_next_state(int, char, int *); /* state machine function */ static int iskeyword(char * kw_lexeme); /*keywords lookup functuion */ static long atool(char * lexeme); /* converts octal string to decimal value */ int scanner_init(Buffer * sc_buf) { - if(b_isempty(sc_buf)) return EXIT_FAILURE;/*1*/ + if (b_isempty(sc_buf)) return EXIT_FAILURE;/*1*/ /* in case the buffer has been read previously */ b_setmark(sc_buf, 0); b_retract_to_mark(sc_buf); @@ -68,272 +68,301 @@ int scanner_init(Buffer * sc_buf) { Token malar_next_token(Buffer * sc_buf) { - Token t; /* token to return after recognition */ - unsigned char c; /* input symbol */ - int state = 0; /* initial state of the FSM */ - short lexstart; /*start offset of a lexeme in the input buffer */ - short lexend; /*end offset of a lexeme in the input buffer */ - int accept = NOAS; /* type of state - initially not accepting */ -/* -lexstart is the offset from the beginning of the char buffer of the -input buffer (sc_buf) to the first character of the current lexeme, -which is being processed by the scanner. -lexend is the offset from the beginning of the char buffer of the -input buffer (sc_buf) to the last character of the current lexeme, -which is being processed by the scanner. + Token t; /* token to return after recognition */ + unsigned char c; /* input symbol */ + int state = 0; /* initial state of the FSM */ + short lexstart; /*start offset of a lexeme in the input buffer */ + short lexend; /*end offset of a lexeme in the input buffer */ + int accept = NOAS; /* type of state - initially not accepting */ + /* + lexstart is the offset from the beginning of the char buffer of the + input buffer (sc_buf) to the first character of the current lexeme, + which is being processed by the scanner. + lexend is the offset from the beginning of the char buffer of the + input buffer (sc_buf) to the last character of the current lexeme, + which is being processed by the scanner. -*/ - - - //DECLARE YOUR VARIABLES HERE IF NEEDED - int i; /* Counter for loop in string error case */ - static int str_offset = 0; - - if (sc_buf == NULL) { - return aa_func12("RUN TIME ERROR"); /* WHOOPS */ - } - - while (1){ /* endless loop broken by token returns it will generate a warning */ - - //GET THE NEXT SYMBOL FROM THE INPUT BUFFER - + */ + + + //DECLARE YOUR VARIABLES HERE IF NEEDED + int i; /* Counter for loop in string error case */ + static int str_offset = 0; + + if (sc_buf == NULL) { + scerrnum = 1; + return aa_table[ES]("RUN TIME ERROR"); /* WHOOPS */ + } + + while (1) { /* endless loop broken by token returns it will generate a warning */ + + /* GET THE NEXT SYMBOL FROM THE INPUT BUFFER */ + + c = b_getc(sc_buf); + + switch (c) { + case 255: t.code = SEOF_T; return t; /* EOF */ + case '\0': t.code = SEOF_T; return t; /* Source EOF */ + case '\n': line++; continue; /* Ignore new line, increment line count */ + case '\r': line++; continue; /* CR, increment line count*/ + case ' ': continue; /* Ignore white space */ + case '\t': continue; /* Ignore tabs */ + case ';': t.code = EOS_T; return t; /* End of statement */ + case ',': t.code = COM_T; return t; /* Comma */ + case '{': t.code = LBR_T; return t; /* Left brace */ + case '}': t.code = RBR_T; return t; /* Right brace */ + case '(': t.code = LPR_T; return t; /* Left parenthesis */ + case ')': t.code = RPR_T; return t; /* Right parenthesis */ + case '+': t.code = ART_OP_T; t.attribute.arr_op = PLUS; return t; /* Addition operator */ + case '-': t.code = ART_OP_T; t.attribute.arr_op = MINUS; return t; /* Substraction operator */ + case '*': t.code = ART_OP_T; t.attribute.arr_op = MULT; return t; /* Multiplication operator */ + case '/': t.code = ART_OP_T; t.attribute.arr_op = DIV; return t; /* Devision operator */ + case '>': t.code = REL_OP_T; t.attribute.rel_op = GT; return t; /* Greater-than relational operator */ + case '<': c = b_getc(sc_buf); - switch (c) { - case 255: t.code = SEOF_T; return t; /* EOF */ - case '\0': t.code = SEOF_T; return t; /* Source EOF */ - case '\n': line++; continue; /* Ignore new line, increment line count */ - case '\r': line++; continue; /* CR, increment line count*/ - case ' ': continue; /* Ignore white space */ - case ';': t.code = EOS_T; return t; /* End of statement */ - case ',': t.code = COM_T; return t; /* Comma */ - case '{': t.code = RBR_T; return t; /* Right brace */ - case '}': t.code = LBR_T; return t; /* Left brace */ - case '(': t.code = RPR_T; return t; /* Right parenthesis */ - case ')': t.code = LPR_T; return t; /* Left parenthesis */ - case '+': t.code = ART_OP_T; t.attribute.arr_op = PLUS; return t; /* Addition operator */ - case '-': t.code = ART_OP_T; t.attribute.arr_op = MINUS; return t; /* Substraction operator */ - case '*': t.code = ART_OP_T; t.attribute.arr_op = MULT; return t; /* Multiplication operator */ - case '/': t.code = ART_OP_T; t.attribute.arr_op = DIV; return t; /* Devision operator */ - case '>': t.code = REL_OP_T; t.attribute.rel_op = GT; return t; /* Greater-than relational operator */ - case '<': - /* MSVC will complain about this assignment inside a conditional expression*/ - if (c = b_getc(sc_buf) == '>') { - t.code = REL_OP_T; - t.attribute.rel_op = NE; /* Negation operator */ - return t; - } - else if (c == '<') { - t.code = SCC_OP_T; /* String concatenation operator */ - } - else { - t.code = REL_OP_T; - t.attribute.rel_op = LT; /* Less-than operator */ - } + if (c == '>') { + t.code = REL_OP_T; + t.attribute.rel_op = NE; /* Negation operator */ + return t; + } + else if (c == '<') { + t.code = SCC_OP_T; /* String concatenation operator */ + } + else { + t.code = REL_OP_T; + t.attribute.rel_op = LT; /* Less-than operator */ + } + b_retract(sc_buf); + /*c = b_getc(sc_buf);*/ + return t; + case '.': + b_setmark(sc_buf, b_getcoffset(sc_buf)); /* Set mark before continuing (AND|OR case) */ + c = b_getc(sc_buf); + if (c == 'A' && b_getc(sc_buf) == 'N' && b_getc(sc_buf) == 'D' && b_getc(sc_buf) == '.') { + t.code = LOG_OP_T; + t.attribute.log_op = AND; + return t; + } + else if (c == 'O' && b_getc(sc_buf) == 'R' && b_getc(sc_buf) == '.') { + t.code = LOG_OP_T; + t.attribute.log_op = OR; + return t; + } + t.code = ERR_T; /* "That character's not supposed to be here" case */ + t.attribute.err_lex[0] = '.'; + t.attribute.err_lex[1] = '\0'; + b_retract_to_mark(sc_buf); + return t; + case '!': + c = b_getc(sc_buf); + if (c == '<') { /* It's a comment line */ + for (; c != '\0' && c != '\r' && c != '\n' && c != 255; c = b_getc(sc_buf)); /* Consume chars until line ends */ + line++; + continue; + } + else { /* Bad character, pump out an error token */ + b_retract(sc_buf); + b_retract(sc_buf); + t = aa_table[ES](" "); + t.attribute.err_lex[0] = c = b_getc(sc_buf); + t.attribute.err_lex[1] = c = b_getc(sc_buf); b_retract(sc_buf); - c = b_getc(sc_buf); return t; - case '.': - b_setmark(sc_buf, b_getcoffset(sc_buf)); /* Set mark before continuing (AND|OR case) */ - if (c == 'A' && b_getc(sc_buf) == 'N' && b_getc(sc_buf) == 'D' && b_getc(sc_buf) == '.') { - t.code = LOG_OP_T; - t.attribute.log_op = AND; - return t; - } - else if (c == 'O' && b_getc(sc_buf) == 'R' && b_getc(sc_buf) == '.') { - t.code = LOG_OP_T; - t.attribute.log_op = OR; - } - t.code = ERR_T; /* "That character's not supposed to be here" case */ - t.attribute.err_lex[0] = '.'; - t.attribute.err_lex[1] = '\0'; - b_retract_to_mark(sc_buf); + } + case '=': + c = b_getc(sc_buf); + if (c == '=') { /* Relational equals-to operator */ + t.code = REL_OP_T; + t.attribute.rel_op = EQ; return t; - case '!': - c = b_getc(sc_buf); - if (c == '<') { /* It's a comment line */ - for (; c != '\0' && c != '\r' && c != '\n' && c != 255; c = b_getc(sc_buf)); /* Consume chars until line ends */ + } + b_retract(sc_buf); + t.code = ASS_OP_T; /* Assignment operator */ + return t; + case '\"': /* Don't quote me on this */ + c = b_getc(sc_buf); + t.code = STR_T; /* String literal */ + b_setmark(sc_buf, b_getcoffset(sc_buf)); + lexstart = (short)str_offset; + lexend = lexstart; + for (; c != '\"'; c = b_getc(sc_buf)) { + b_addc(str_LTBL, c); + if (b_isfull(str_LTBL)) { + t = aa_table[ES]("\"There is always ..."); /* String too big :( */ + } + if (c == '\n' || c == '\r') { line++; - continue; } - else { /* Bad character, pump out an error token */ - t = aa_table[ES](" "); - t.attribute.err_lex[0] = c; + if (c == 255 || c == '\0') { + b_retract_to_mark(sc_buf); + t.code = ERR_T; /* Illegal string, make it an error token */ + for (i = 0; i < ERR_LEN; i++) { + t.attribute.err_lex[i] = b_getc(sc_buf); + } + t.attribute.err_lex[i] = '\0'; return t; } - case '=': - c = b_getc(sc_buf); - if (c == '=') { /* Relational equals-to operator */ - t.code = REL_OP_T; - t.attribute.rel_op = EQ; - } - b_retract(sc_buf); - t.code = ASS_OP_T; /* Assignment operator */ - return t; - case '\"': /* Don't quote me on this */ - c = b_getc(sc_buf); - b_setmark(sc_buf, b_getcoffset(sc_buf)); - lexstart = (short)str_offset; - lexend = lexstart; - for (; c != '\"'; c = b_getc(sc_buf)) { - b_addc(str_LTBL, c); - if (b_isfull(str_LTBL)) { - return aa_table[ES]("\"Imagine all the .."); /* String too big :( */ - } - if (c == '\n' || c == '\r') { - line++; - } - if (c == 255 || c == '\0') { - b_retract_to_mark(sc_buf); - for (i = 0; i < ERR_LEN; i++) { - t.attribute.err_lex[i] = b_getc(sc_buf); - } - } - lexend++; - str_offset++; - } /*end for loop, string finished*/ + lexend++; str_offset++; - b_addc(str_LTBL, '\0'); - - t.code = STR_T; - t.attribute.str_offset = lexstart; - return t; /* String literal */ - default: - if (isalnum(c) || isalpha(c)) { - lexend = 0; - state = 0; - lex_buf = b_create(1, 1, 'a'); + } /*end for loop, string finished*/ - while (accept == NOAS) { - b_addc(lex_buf, c); - state = get_next_state(state, c, &accept); + b_addc(str_LTBL, '\0'); + t.attribute.str_offset = lexstart; + + return t; + default: + if (isalpha(c) || isalnum(c)) { - if (accept != NOAS) - break; - c = b_getc(sc_buf); - lexend++; - } - /* Entering Accepting State */ - b_addc(lex_buf, '\0'); + /*Set mark to beginning of lexeme*/ + b_setmark(sc_buf, b_getcoffset(sc_buf) - 1); + lexstart = 0; + lexend = 0; + state = 0; - if (as_table[state] == ASWR) - b_retract(sc_buf); - if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) { - t.code = KW_T; - b_free(lex_buf); - return t; - } + while (accept == NOAS) { + state = get_next_state(state, c, &accept); - if (aa_table[state] != NULL) { - t = aa_table[state](b_setmark(lex_buf, 0)); - } - else { - t = aa_table[ES]("RUN TIME ERROR"); - } + if (accept != NOAS) { break; } + + c = b_getc(sc_buf); + } + + /* + * Entering Accepting State + */ + + if (as_table[state] == ASWR) + b_retract(sc_buf); + + /* Get start/end of lexeme */ + lexstart = b_mark(sc_buf); + lexend = b_getcoffset(sc_buf); + lex_buf = b_create(1, 1, 'a'); + + b_retract_to_mark(sc_buf); + for (; lexstart < lexend; lexstart++) { + b_addc(lex_buf, b_getc(sc_buf)); + } + b_addc(lex_buf, '\0'); + /*if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) { + t.code = KW_T; b_free(lex_buf); - } + return t; + }*/ - else { - t = aa_table[ES](" "); - t.attribute.err_lex[0] = c; + if (aa_table[state] != NULL) { + t = aa_table[state](b_setmark(lex_buf, 0)); } + else { + scerrnum = 1; + t = aa_table[ES]("RUN TIME ERROR"); + } + b_free(lex_buf); } - -///* special cases or token driven processing */ -// -//WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE. -//COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE. -// -//WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT -//INSTEAD OF if-else TO PROCESS THE SPECIAL CASES -//DO NOT FORGET TO COUNT THE PROGRAM LINES -// -// -// IF (c == SOME CHARACTER) -// ... -// SKIP CHARACTER (FOR EXAMPLE SPACE) -// continue; -// OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE)) -// return t; -// EXAMPLE: -// if (c == ' ') continue; -// if (c == '{'){ t.code = RBR_T; /*no attribute */ return t; -// if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t; -// ... -// -// IF (c == '.') TRY TO PROCESS .AND. or .OR. -// IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING -// RETURN AN ERROR TOKEN -// IF (c == '!') TRY TO PROCESS COMMENT -// IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR -// ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue; -// ... -// IF STRING (FOR EXAMPLE, "text") IS FOUND -// SET MARK TO MARK THE BEGINNING OF THE STRING -// IF THE STRING IS LEGAL -// USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL -// ADD '\0' at the end make the string C-type string -// SET STRING TOKEN -// (the attribute of the string token is the offset from -// the beginning of the str_LTBL char buffer to the beginning -// of the string (TEXT in the example)) -// -// return t; -// ELSE -// THE STRING LITERAL IS ILLEGAL -// SET ERROR TOKEN FOR ILLEGAL STRING (see assignment) -// DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL -// -// return t; -// -// IF(c == ANOTHER CHARACTER) -// SET TOKEN -// return t; -/* Process state transition table */ - - //IF (c is a digit OR c is a letter){ - // - //SET THE MARK AT THE BEGINING OF THE LEXEME - //b_setmark(sc_buf,forward); - // .... - //CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA) - //IT IMPLEMENTS THE FOLLOWING ALGORITHM: - // - //FSM0. Begin with state = 0 and the input character c - //FSM1. Get the next state from the transition table calling - // state = get_next_state(state, c, &accept); - //FSM2. Get the next character - //FSM3. If the state is not accepting (accept == NOAS), go to step FSM1 - // If the step is accepting, token is found, leave the machine and - // call an accepting function as described below. - // - // - //RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE - //GET THE BEGINNING AND THE END OF THE LEXEME - //lexstart = b_getmark(sc_buf); - //SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION - //CREATE A TEMPORRARY LEXEME BUFFER HERE; - //lex_buf = b_create(...); - // . RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND - // . USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...), - // . WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED - // . YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH - // . CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE - // . CALLED IS STORED IN THE VARIABLE state. - // . YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME. - // . THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf. - // .... - // b_free(lex_buf); - // return t; - // - // CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT. - // FOR ILLEGAL CHARACTERS SET ERROR TOKEN. - // THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN - // IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE - // A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum - // AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST - // BE THE STRING "RUN TIME ERROR: " - }//end while(1) + else { + t = aa_table[ES](" "); + t.attribute.err_lex[0] = c; + } + return t; + } + + + /* special cases or token driven processing */ + // + //WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE. + //COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE. + // + //WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT + //INSTEAD OF if-else TO PROCESS THE SPECIAL CASES + //DO NOT FORGET TO COUNT THE PROGRAM LINES + // + // + // IF (c == SOME CHARACTER) + // ... + // SKIP CHARACTER (FOR EXAMPLE SPACE) + // continue; + // OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE)) + // return t; + // EXAMPLE: + // if (c == ' ') continue; + // if (c == '{'){ t.code = RBR_T; /*no attribute */ return t; + // if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS */ return t; + // ... + // + // IF (c == '.') TRY TO PROCESS .AND. or .OR. + // IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING + // RETURN AN ERROR TOKEN + // IF (c == '!') TRY TO PROCESS COMMENT + // IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR + // ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue; + // ... + // IF STRING (FOR EXAMPLE, "text") IS FOUND + // SET MARK TO MARK THE BEGINNING OF THE STRING + // IF THE STRING IS LEGAL + // USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL + // ADD '\0' at the end make the string C-type string + // SET STRING TOKEN + // (the attribute of the string token is the offset from + // the beginning of the str_LTBL char buffer to the beginning + // of the string (TEXT in the example)) + // + // return t; + // ELSE + // THE STRING LITERAL IS ILLEGAL + // SET ERROR TOKEN FOR ILLEGAL STRING (see assignment) + // DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL + // + // return t; + // + // IF(c == ANOTHER CHARACTER) + // SET TOKEN + // return t; + /* Process state transition table */ + + //IF (c is a digit OR c is a letter){ + // + //SET THE MARK AT THE BEGINING OF THE LEXEME + //b_setmark(sc_buf,forward); + // .... + //CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA) + //IT IMPLEMENTS THE FOLLOWING ALGORITHM: + // + //FSM0. Begin with state = 0 and the input character c + //FSM1. Get the next state from the transition table calling + // state = get_next_state(state, c, &accept); + //FSM2. Get the next character + //FSM3. If the state is not accepting (accept == NOAS), go to step FSM1 + // If the step is accepting, token is found, leave the machine and + // call an accepting function as described below. + // + // + //RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE + //GET THE BEGINNING AND THE END OF THE LEXEME + //lexstart = b_getmark(sc_buf); + //SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION + //CREATE A TEMPORRARY LEXEME BUFFER HERE; + //lex_buf = b_create(...); + // . RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND + // . USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...), + // . WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED + // . YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH + // . CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE + // . CALLED IS STORED IN THE VARIABLE state. + // . YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME. + // . THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf. + // .... + // b_free(lex_buf); + // return t; + // + // CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT. + // FOR ILLEGAL CHARACTERS SET ERROR TOKEN. + // THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN + // IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE + // A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum + // AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST + // BE THE STRING "RUN TIME ERROR: " + }//end while(1) } @@ -347,37 +376,37 @@ int get_next_state(int state, char c, int *accept) col = char_class(c); next = st_table[state][col]; #ifdef DEBUG -printf("Input symbol: %c Row: %d Column: %d Next: %d \n",c,state,col,next); + printf("Input symbol: %c Row: %d Column: %d Next: %d \n", c, state, col, next); #endif -/* -The assert(int test) macro can be used to add run-time diagnostic to programs -and to "defend" from producing unexpected results. -assert() is a macro that expands to an if statement; -if test evaluates to false (zero) , assert aborts the program -(by calling abort()) and sends the following message on stderr: + /* + The assert(int test) macro can be used to add run-time diagnostic to programs + and to "defend" from producing unexpected results. + assert() is a macro that expands to an if statement; + if test evaluates to false (zero) , assert aborts the program + (by calling abort()) and sends the following message on stderr: -Assertion failed: test, file filename, line linenum + Assertion failed: test, file filename, line linenum -The filename and linenum listed in the message are the source file name -and line number where the assert macro appears. -If you place the #define NDEBUG directive ("no debugging") -in the source code before the #include directive, -the effect is to comment out the assert statement. -*/ - assert(next != IS); + The filename and linenum listed in the message are the source file name + and line number where the assert macro appears. + If you place the #define NDEBUG directive ("no debugging") + in the source code before the #include directive, + the effect is to comment out the assert statement. + */ + assert(next != IS); -/* -The other way to include diagnostics in a program is to use -conditional preprocessing as shown bellow. It allows the programmer -to send more details describing the run-time problem. -Once the program is tested thoroughly #define DEBUG is commented out -or #undef DEBUF is used - see the top of the file. -*/ + /* + The other way to include diagnostics in a program is to use + conditional preprocessing as shown bellow. It allows the programmer + to send more details describing the run-time problem. + Once the program is tested thoroughly #define DEBUG is commented out + or #undef DEBUF is used - see the top of the file. + */ #ifdef DEBUG - if(next == IS){ - printf("Scanner Error: Illegal state:\n"); - printf("Input symbol: %c Row: %d Column: %d\n",c,state,col); - exit(1); + if (next == IS) { + printf("Scanner Error: Illegal state:\n"); + printf("Input symbol: %c Row: %d Column: %d\n", c, state, col); + exit(1); } #endif *accept = as_table[next]; @@ -387,6 +416,7 @@ or #undef DEBUF is used - see the top of the file. int char_class(char c) { int val; + if (isalpha(c)) val = 0; else if (c == '0') @@ -415,22 +445,32 @@ REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER */ Token aa_func02(char lexeme[]) { - unsigned int kw_i; /* Variable to contain keyword table index */ + unsigned int i, kw_idx; /* Variable to contain keyword table index */ Token t; char* temp_str; - if ((kw_i = iskeyword(lexeme)) > -1) { /* Keyword check */ +#ifdef DEBUG + printf("Lexeme: '%s'\n size of: %lu\n", lexeme, sizeof(lexeme)); +#endif + + kw_idx = iskeyword(lexeme); + if (kw_idx != -1) { /* Keyword check */ t.code = KW_T; - t.attribute.kwt_idx = kw_i; + t.attribute.kwt_idx = kw_idx; return t; } /* Not a keyword? Must be AVID*/ if ((temp_str = (char*)calloc(VID_LEN + 1, sizeof(char))) == NULL) { return aa_table[ES]("RUN TIME ERROR"); } - strncpy(temp_str, lexeme, VID_LEN); + + for (i = 0; i < (VID_LEN) && i < strlen(lexeme); i++) { + temp_str[i] = lexeme[i]; + }/* + temp_str[strlen(temp_str)] = '\0';*/ strncpy(t.attribute.vid_lex, temp_str, VID_LEN); + t.attribute.vid_lex[strlen(temp_str)] = '\0'; free(temp_str); switch (lexeme[0]) { /* Read first character of lexeme for implicit type (not used yet?)*/ @@ -444,7 +484,7 @@ Token aa_func02(char lexeme[]) { /* Floating point*/ break; } - + t.code = AVID_T; return t; /* @@ -468,15 +508,21 @@ REPLACE XX WITH THE CORRESPONDING ACCEPTING STATE NUMBER */ Token aa_func03(char lexeme[]) { Token t; + unsigned int i; char* temp_str; if ((temp_str = (char*)calloc(VID_LEN + 2, sizeof(char))) == NULL) { return aa_table[ES]("RUN TIME ERROR"); } - strncpy(temp_str, lexeme, VID_LEN); - temp_str[strlen(temp_str)] = '#'; /* Add# to end of the SVID */ + for (i = 0; i < (VID_LEN) && i < strlen(lexeme); i++) { + temp_str[i] = lexeme[i]; + } + + temp_str[strlen(temp_str) - 1] = '#'; /* Add# to end of the SVID */ + temp_str[strlen(temp_str)] = '\0'; strncpy(t.attribute.vid_lex, temp_str, VID_LEN); + t.attribute.vid_lex[strlen(temp_str)] = '\0'; free(temp_str); t.code = SVID_T; @@ -506,7 +552,6 @@ Token aa_func05(char lexeme[]) { } t.code = INL_T; t.attribute.int_value = temp_num; - return t; /* THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING A DECIMAL CONSTANT @@ -532,12 +577,13 @@ Token aa_func08(char lexeme[]) { } temp_dbl = atof(lexeme); - +#ifdef DEBUG + printf("Lexeme: '%s' | FLT value: %f \n", lexeme, temp_dbl); +#endif if ((temp_dbl > FLT_MAX) || (temp_dbl < 0)) { t = aa_table[ES](lexeme); } t.attribute.flt_value = (float)temp_dbl; - return t; /* THE FUNCTION MUST CONVERT THE LEXEME TO A FLOATING POINT VALUE, @@ -592,16 +638,12 @@ err_lex C-type string. /*ACCEPTING FUNCTION FOR THE ERROR TOKEN */ -Token aa_func12(char lexeme[]) { +Token aa_func13(char lexeme[]) { Token t; unsigned int i; - t.code = ERR_T; - - for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++) { + for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++) t.attribute.err_lex[i] = lexeme[i]; - } - t.attribute.err_lex[i] = '\0'; return t; @@ -641,7 +683,7 @@ int iskeyword(char * kw_lexeme) { if (kw_lexeme == NULL) return -1; for (i = 0; i < KWT_SIZE; i++) { - if (strcmp(kw_table[i], kw_lexeme) == 0) return i; + if (strcmp(kw_table[i], kw_lexeme) == 0) { return i; } } return -1; } \ No newline at end of file diff --git a/table.h b/table.h index f0c6300..11d3208 100755 --- a/table.h +++ b/table.h @@ -34,21 +34,17 @@ * .AND., .OR. , SEOF, 'wrong symbol', */ - - //REPLACE *ESN* WITH YOUR ERROR STATE NUMBER -#define ES 12 /* Error state */ +#define ES 13 /* Error state */ #define IS -1 /* Invalid state */ /* State transition table definition */ -//REPLACE *CN* WITH YOUR COLUMN NUMBER - #define TABLE_COLUMNS 7 /*transition table - type of states defined in separate table */ int st_table[][TABLE_COLUMNS] = { /* INPUT COLUMNS: - COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 | - [a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other + COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 | + |[a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other| */ /* State 0 */ {1, 6 , 4 , 4 , IS , IS , IS}, /* State 1 */ {1, 1 , 1 , 1 , ES , 3 , 2}, @@ -57,22 +53,18 @@ int st_table[][TABLE_COLUMNS] = { /* State 4 */ {ES, 4 , 4 , 4 , 7 , 5 , 5}, /* State 5 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 6 */ {ES, 9 , 9, ES, 7 , ES , 5}, - /* State 7 */ {ES, 7 , 7, 7, ES , 8 , 8}, + /* State 7 */ {ES, 7 , 7, 7, 8 , 8 , 8}, /* State 8 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 9 */ {ES, 9 , 9, ES, ES , ES , 10}, /* State 10 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 11 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 12 */ {IS, IS , IS, IS, IS , IS , IS}, /* State 13 */ {IS, IS , IS, IS, IS , IS , IS} - // - //. YOUR TABLE INITIALIZATION HERE - //. - ///* State N */ {YOUR INITIALIZATION}, + }; /* Accepting state table definition */ -//REPLACE *N1*, *N2*, and *N3* WITH YOUR NUMBERS -#define ASWR 2 /* accepting state with retract */ -#define ASNR 3 /* accepting state with no retract */ +#define ASWR 1 /* accepting state with retract */ +#define ASNR 2 /* accepting state with no retract */ #define NOAS 0 /* not accepting state */ int as_table[] = { @@ -96,19 +88,12 @@ int as_table[] = { /* Accepting action function declarations */ -//FOR EACH OF YOUR ACCEPTING STATES YOU MUST PROVIDE -//ONE FUNCTION PROTOTYPE. THEY ALL RETURN Token AND TAKE -//ONE ARGUMENT: A string REPRESENTING A TOKEN LEXEME. - -// Example: Token aa_funcXX(char *lexeme); - -Token aa_func02(char* lexeme); // VID AVID/KW -Token aa_func03(char *lexeme); // VID SVID -Token aa_func05(char *lexeme); // DIL -Token aa_func08(char *lexeme); // FPL -Token aa_func10(char *lexeme); // OIL -Token aa_func12(char *lexeme); // ES -//Replace XX with the number of the accepting state: 02, 03 and so on. +Token aa_func02(char* lexeme); /* AVID/KW */ +Token aa_func03(char* lexeme); /* SVID */ +Token aa_func05(char* lexeme); /* DIL */ +Token aa_func08(char* lexeme); /* FPL */ +Token aa_func10(char* lexeme); /* OIL */ +Token aa_func13(char* lexeme); /* ES */ /* defining a new type: pointer to function (of one char * argument) returning Token @@ -136,8 +121,8 @@ PTR_AAF aa_table[] = { /* State 9 */ NULL, /* State 10 */ aa_func10, /* State 11 */ NULL, - /* State 12 */ aa_func12, - /* State 13 */ NULL + /* State 12 */ NULL, + /* State 13 */ aa_func13 //HERE YOU MUST PROVIDE AN INITIALIZATION FOR AN ARRAY OF POINTERS //TO ACCEPTING FUNCTIONS. THE ARRAY HAS THE SAME SIZE AS as_table[ ].