From d5b6ff2ee38191a3054797d7f23c120cded1d818 Mon Sep 17 00:00:00 2001 From: Victor Fernandes Date: Sat, 18 Mar 2017 17:23:57 -0400 Subject: [PATCH] Fix transition table(finally) --- PLATYPUS_Regex.md | 4 +- PLATYPUS_Transition_Table.xlsx | Bin 11631 -> 11793 bytes buffer.h | 2 +- scanner.c | 245 +++++++++++++-------------------- table.h | 46 ++++--- 5 files changed, 124 insertions(+), 173 deletions(-) diff --git a/PLATYPUS_Regex.md b/PLATYPUS_Regex.md index 3a82507..671203f 100644 --- a/PLATYPUS_Regex.md +++ b/PLATYPUS_Regex.md @@ -23,7 +23,7 @@ L(SVID) = AVID# ``` ## Integer Literals ``` -L(DIL) = 0 | [1-9]* +L(DIL) = 0|([1-9][0-9]*) L(NzD) = [1-9] @@ -31,7 +31,7 @@ L(D) = [0-9] L(OD) = [0-7] -L(OIL) = 0([0-7])* +L(OIL) = 0(0|[1-7][0-7]*) L(IL) = (DIL | OIL) diff --git a/PLATYPUS_Transition_Table.xlsx b/PLATYPUS_Transition_Table.xlsx index 4448f811e1d248dc4deda3bcc0a930c9d53ad267..61224f6b3c6ddfb92e19f011f6d8490f6b181b43 100644 GIT binary patch delta 4738 zcma)gXHb({)OAGZHT2#I(nWgc-B3i34vKUTq(~DH9;Jq+!AJ=agir#4^d?nQl$Jn1 ziV_eIq*p-*_4D3$zIpGxzuxof%A|n;H zaG$lJPBOh0Kqgo#)rN@`D>C7%HtTi1))f9X7sR;VPVSh|1TBLV`P#BMn#Qlu+4&?v z;5~48K5<0IbOt#Ul7x_4^3iMbZoTmih=5*rr{R7YaPV|dgYCt~I@aZ3*JsbIieHJD z;dRclTAQxl;+ck(vr;v1wJKK>DDVm!m&Ju>+Z&1!HMFHU&Y7=;N^@8Gxd`vfbJ8gK zv6P=((!Lk4>gW`Zr?IwY`&m6~_BF#bGaaRchvU)u11=9;-Mc7up|6;SG}e91rLE@^ z*0Q14*Hjf)ljHQg&`uvAJU5O%?)Nu#5(-sjsBQm%0I^RHwQ^o&O}_~H_`UwqxwMlp zk)hJ{G$>QbxOKKRxS}?XA`G;O*V~Qa^cq}?xri*h$Xd@%|AdML^|n}!=@JMOz=fGQt$- z^W?i>*Eu`u!w)MPDOTw^@zQ$jBT$~Ha4=9^CfJkNUluP{-cL@rg3AuiSbQOx;2_$v zp@x6Q9uIRi6fe!FAm^0mw_4AyJ7p zNZ2_Rx=K%YQ#WDNfwL<0i;{ON`?5lZF(9G4@Zr?P_Rv=ah{1^>a^=7uB_75`8pq)uwI~CKP%Rt z+`HSi@mW&8Dr_hK4ii7sNlV0)I_u{_KeodSBg71#m%`xtG_iG>u4a&iVi=tAlqS`Z z;G+ufdT*tx2ImuOhBMArs~nun{I*{#hTi@f<7C$;pH-Al6kM3IXmmq)RfC0TW)ijjKtgo*5@}h*ZbCef7 z74KDBs3UDBQDk;7wXp%l9KQrSLL6<6Rq#h_&MhV{aRrc3Eybmt*y#1stoszdavh^~ zN2BOskWBe_GRFCPfd(5GECGOt@2)Tb1XMh0NKV4_?2M<4f~vpFV7sE0r*y>Bd;RM)bpW~!?#Q^#|mLE*v~OphC4scEde8qIhg}dYGMO` z50TKOb{@4kX6)XVW7+IH9+q-!Hn$n8V4W$<`@CskF}+d5hQhnSwb8b+TRFkCUd{@n zj|v#KYE@$bh(aaa zy+?jO+h#(CUl9)#y5gp_>jX9?tEPSznNA4!E!W+LIJq?PgIBG(VOLIjAnW5Szc&TQ z35>`K^lS`F#9mYu4G2U71%Wt0$a~VY-w{;{n4-Qw^VvFlJZ;zfG*1J zvnKmJyRFUf9om_hY0JKfh)L*$)5r%>N5^q%?eUpSXt%f4tpmf}c@`WFs`Q|RBU`(o97b8FzJ^d9G-g#h zHSZfW=E}`{PoTj~Y+{GBmdKVO=L3i~vz<4L7ej!knee*%%9}dZ+J$T}c$Vqi^~moA z`}|y4Wn$Wu5Xbwr-A8T5vjM&B-nVW{oXPn+KPpX3{hUSe<}*}ZH#tT`0l_YL&&;p< z)@s{4+R!X0?X$VuzcDaWx>jB0SGJ<3w%59=ybybayEoyMmeGKJ=9o`0zH}t^faA$h zNCl(zb`veR!IjT6DzC{-t0k=m$SWLMMGs*6@l!Qo6^~v!v~;$V-M;NoV*O_M^`5TM zLz|X56BRbgUtjm*emiPJ+y?x)yA$bNcX@kztuh9{wqL)mJd=10kc?J#p1CP?(nr84NE z2#NNZKrg75366smrvg5ODWTwUGo%6D?Ct%eo6E0;xCuHk7E)h*M>zSC*g1^4d}-wu z(*gqB?zvxYDkjGrjJDKZ=o<)qo?kF=>~@x^vX0KiG7o($m5kyW3IYXp-t>DjuSArh zL^Z~C?dEnpuM0WYW|^W1lE#T?b@OC&OIe#71;D*xWP2P-W7xzwIA4v-QRV~uKWs&8|ipK`~{ z%O;SX$&wX_8=j7#_~N?lVjJcBN}&NCd5!do`Z-CVNUv6yY3}gByd2M6Y}86II&{?h z;fUynOrq>FNx*rX{ph{s&1Nc#R;M)GF@$IG^z(obGZkML@?!w(U<9VJQi|Ryh7~&v za$I7Q4NvW@8mfm-|CPP?R%k$Y+AT)Gk7WnvZygAE(dglD(o9zE{irYe0SauYQdlWI zz?={4%A?2kmT9VOiR5W zg5f22v6exalG()4^cp_*N%WQpmwzsozhgfn*t~xhi|j5%cb8)Qg*82-X`exV=I}E+ z5K*P^W>lVx-M5j_X18-j?&W4RQAaeSX$6#zERaifWa4WxUBsiNeMF5C$8VPO{W3CQ zRtdys>p5v8H4Taaj+u}cq<4+4SB>IXKi8HtOx-Bb-PHuXt<~9FP zZj6CZ;_2Halz+v@0*a-tlAaM)lys<8q_E}kY(G4a6Uyb^O1O3h@6p?rnbe{d(pl;3 z-|Xc~;p z{tk|h03EuNLw(yYgMBeY?7bL%Y=5Ff*)!XV2Kol!mkfU_l&^#0o#+N`L2ltXk;~lTL&H};mS1J$ z7cYq68U_)VGWc`}s<>{1s?V`!eH>-VaL{N{wZ4He28@x%P-7%8To^SBIi?z(&m;pL zTa&Ns9!t(YI2|wBu=~RFh8Ixc`>QfIU6BT;WafYR3~a6lrE%=%eSWyUwYmM{wneQ??8cFakTjja$ z>f+o&&v1_rva!kLURwF`0@g;4fQ$d(U8V3d?k^*SKJBAaa)%C7`0c(Q9@lJ7$R?f; z`;VFI2$Fk%Uy(@W!G=#ZNtt-RMANIN%(Am2O0ah&g@1Yw-1S4PF$idmm?3tCJaF1N zA%2Rcm+K@%E$PmrNwF=5%x?6AwQR5Ea13jzoKC1uCx!Z=-&hwA_EsaGoW*g2W5fft zYlk9}lx~&LR;!Nh^0cO|RKM&n6_2;xZZ;M}|BMkew^Qg>8AA_L?9!7kJbB%@vXpw= zbK&OAWQ-jgB*D*4FAvUSTMTgN%dxd9a!F&4&F>H%|3ha@;LuUI=_` zpSU%$i6KPLk5vN$%3@Y=kA2@%c0uw)GDXg|O-syH z1}udiVbttd5-(-XTHJzrDHmw9uQ-y@&-Aq!I1EYNl@tHj+X zX^TsX$Y(BO-KLvO=fUb&8xU8t3vB`HaRii#bHin;CP5^SJTj7G?3aAsDp^;j8US5kVNmIk4{Dz zB#1Uz)C@Q8d+%NE_kDNW^XIH}_Otfc``PQ1XQ$l5+tvh7Q4(*3dwUZB09M4va4HTs z%-*C^iZN7vUsd*(7PmH05zm~qprc0j2MjN7cZ{NlU2T2c&a1txop$o^#y5f|^Up^J zoBlDXTC~R&Utiy%3$XQ~5H>iKbG}U{vC4v`Ls&$5>5Z`c!XfmbN7dE$z)YBXg36g9 zmbCP#diqp+cFo0x&s#qWzZx%@<+Y)N>o>@tESMBF8?Z^GJZup>%S^Bv+lyPWOoOeW ziszakm_V1crU7nC4bzf)#V=T);?Bl{Drpx$l%DdKw0( z3$lCnSkbb9CDYN+zIKJM%%DLMZ^Q74G)jZ2)g!S=u=aJ&JJUD0zoznsFol%3r^YEq zU4j`h3zwCoeqNFk0Rk(_Y<%$((iBG#4tLv$h#bTXjkyq8=zy4vi?odn{seR#nL@?H zv$9awvcO}jM`AAZkG`@GzYO9MmwrY1Kn8YBp=PM^Y%6AngKFt02OU4f8_IGi2|yMQ zGa?1W=nGP=oFOIRdeGMZBlnp0NU};cf z2nri?kh3A=kf69^s=i5OX$E?tk0O<>ne=F@k`>VMk_YYTs7oD_K4AF7;L4|}^@`_w zr{UnKfQZv33C{w9kjS zV#kM-{L9nJgv4dzSAGSoyWVi zm2>3)SeE80>#W?@*|8`O@QE#t6qRkNxcnetkdZqV zjwZ!YZs;ghsm|ue7~hGb8?D}9c>rxt%y$X0?-JttT~z4`zFc>`#2%fyUJFJ#s_bjO zkv%Ti|NSjq=kDRwR*&qfmlY=wYy)n04Nd2p$rW3CWGfVgZ2SUeX@Bn^GFCbYqwwrn z;mX}A3B4VIQ0nF$7y6JQ>9;}1eV1+V6{*G>No(@^d^a4E@}PB~-%{Idaz?D9)P{Ue zWJ`4j9ZeaI-#ToCJ`4}Kk}@2;B>n5q(_)i#v-l|i0Ej98zzRUpiL=AA?XXWb6&XVF z%FoCL@Z?TFJ>976mp*MKpAXmQg$m>x$!=dfZ4I-{(At*!>Afa^a2OO1)-y=`aI|-V zUywVTP(J5)a=&4FJ3Za^!~^eN2bZ*)&|9s(ExJlRayc~@^8Ax0!8ddZ-v__iT7DDw zdl~-OI~hU!yv;L|q+=Pb^5$@cdv|2NZn0y5_+X7I&=_z(39oSF8DV1{(i*?RuqcOYc(Rr@H7js$} zWtC=c9Btb6+oB7r29e`Xl+&B{^Q)Sr7|+$q1LwS3h(noxPrHgf_X`T%8{)V^cJ21Q zdw+7-Ql&rbI}BK@uk$C@e@5}`E+(ry$3Y1nGcXe^TAlw$b*LyVV7g-sUGZ$lOLyf; z{(ZtZHKaeMoZ-q7UOq*E4e%RkZhSJbE*EqhRzE*{p##ujQ5O}MBW&emZB-Xk=5Khm zmey9ah!=;qyVF+izRXEZz&BWxfYFgh-az6hri`gItn)x^4KBJ8-t3c_jn z{q*OxYi?<5E&XMb_4{s?#-%9Dl&|BTH=UP`+mL3rwgg{XJ@j*zL$PR$WMs8#>>?e} zVen-gS6YC9w~j8^vh74{-(6RDyL$UcIM|{vZor};AEx%{q@!Au`!FhI-eeD_SZG0` zHK%8P7)cD_EL~M2CeMOmIuUZTem?o}#LAo-tL*^tXsA>i>C^mUy{E~-0>PIO&uc5J zebY-l5pRs+?(=p)h+iB`3F`GCA7l!aEvs>nvqLfPZUi&!N8kJiVtdYwWi@VcGN@EH z!pt^*>-&ys9YZ2Sb695S&#ftZ=96Vg2+4{YGEWH>snylWF#CR<7TmLFj$*L4pjht= z>Wj!t-3S^ulA7Bb(-@6k4$toeu)exXl2%JMZE$2&aCa%X@z#~mEJ>Cc@-qMOUc9sL zGka%vV90m9HP3G8NzyEK(WsdMq6(%yQ)R60wxErJP~w8%2XD?M1E&_KmA(_<YKD@VmFNtzZPb~Gj(9}V-3!QUP5=U#-L%>NQq~ciOkIs z1}cMbGbnRu_Zol40zZU5Bi5SSM4SEg6E0xIHS7P}!F`LzJ_JrOEwA9L%syNi5%htA zqgVQSWJ<4Y9et&O;BJ71BMkWNn*LC*0COaRfkrG-^&ON`Ro^jyAW;+J#PK};xb!jg zz!`fp=FWaM?6DGR1jep}>WA4Xp@w0KN~j9(ehe9h-pndEv^Af|gzX?LrKEkSIzxMK z!OdvQfPT`f7ybftJy+%j_?APRLY_+pEJMkq$HS^DQ+Bp!FVYX8!-NfDOh8*Eq}=!c zhUA*BgK2Wdzk|hd$JfAIx#Lre<3&oSQ5IPoSTk2e`-&Eh>>FnHH`wM$k}tnmfh=v5d0??cOAUFJinu3E_^eN8x@N@tjJf01_&SDENgG z%lrFYvKtELlrTcCQ@~}>B(@agpNdIrK_o91#0NB8MCXZ9Pf1nhgZS?AMEf~QOioWx zynZK|6qX#O=$rP%^d&p#G`L0`VMOU^=J*bHn{hTlt;Ooj$y`x;t6GnG;Un2IYOoI@ z;$zOyZ8$*`RDNM?+5$oir2*a2WI~W>o89I3{3Zv}z?S0nbhsLWRPyxq*NDmZO)Bb# z4xxEpV#+JkpI!swCP}gz!zbu{vVg`0?e0`iE+RCC@7Jl~@ClYQ2)$1k14qo;Z9v_`7wm2*<~N&D$E3$oPdfcY^LxxK zy(p-K=4R&MC;G8P=Y*Oet$ zn?`|9Gk+2SO0U0U7bD3Q08i@kFlG+>t)|UQ4(?G6F)vw!W8as{jW~T;W_b=YHfk@U zg3=J7!MKkk*$v?n*L)hU#pH?&!>!`+_w$cQU1&(M3&L;3L1X(F+ScNsA(_3(ZeqeN zd^khW?6+#mUwB3}+HEMIqKFVj9A2)nw2t%*t99E#QWg>34umZGp|tPn&|U_D`V*mf zak8Y@8Q~MmK8iqN`4tSA99C7BIB$;ZCD{ZbUqzG1jnS8ZthZW%Z{!S~drKwV>G-*c zd5XJ;%x!mw?L==z?&tR(}En6|TNaekAs7-gftm8Uu3;~z>fVpW}4uAd%3*m+NHG^a-+BR|ys9X*( z@q_~^8S2@#S%g6o749g$w4K$i zwh|r+t!YYb_Ue+=PaQs9l6;Jvt7K_nP*@Lzt|$AoRt(^%UR&0W&?it(s>R!iwDe=1 zeV}+^=EyaZ`lddbdZ(i>I4m?hE;{yG`@kd(E4rO5w^;nTQFOxygxXoO!PJ){IOcez zQH}{7csb+g)73-11V{vZ>0p9Nd|CZW96cQSidGkF{B><2O<9eSTEHSdr$@H%v~R23 z;&?^9`gL~t(~I6f#cr-T6l*u~x!Y9#Y=hUNU?_E7-pZ#RtFZeJi$<(IU;qZrlxny4 zI$#m_Eo2=&7N%PRz7e~SEbz(ur~G}<3+0f{D@SOi&;-joUF|=eni3!Y yaP42sfSH>}OYxhWf5-X9utx>}2>lztLl7x01V*-qivazEkUz!w$Zm=J1^6$}*QEOZ diff --git a/buffer.h b/buffer.h index c402364..7ac4029 100755 --- a/buffer.h +++ b/buffer.h @@ -14,7 +14,7 @@ #define BUFFER_H_ #define MACOS_DEP - +#undef MACOS_DEP /*#pragma warning(1:4001) *//*to enforce C89 type comments - to make //comments an warning */ /*#pragma warning(error:4001)*//* to enforce C89 comments - to make // comments an error */ diff --git a/scanner.c b/scanner.c index ec1575c..bff8482 100755 --- a/scanner.c +++ b/scanner.c @@ -33,7 +33,7 @@ #include "table.h" #define DEBUG /* for conditional processing */ -/*#undef DEBUG*/ +#undef DEBUG @@ -86,12 +86,15 @@ Token malar_next_token(Buffer * sc_buf) /*DECLARE YOUR VARIABLES HERE IF NEEDED */ - int i; /* Counter for loop in string error case */ - static int str_offset = 0; + /* Counter for loops in string error case */ + int i; + + /*String offset for the str_LTBL*/ + static short str_offset = 0; if (sc_buf == NULL) { scerrnum = 1; - return aa_table[ES]("RUN TIME ERROR"); /* WHOOPS */ + return aa_table[ES]("RUN TIME ERROR: "); /* WHOOPS */ } while (1) { /* endless loop broken by token returns it will generate a warning */ @@ -132,8 +135,6 @@ Token malar_next_token(Buffer * sc_buf) t.code = REL_OP_T; t.attribute.rel_op = LT; /* Less-than operator */ } - b_retract(sc_buf); - /*c = b_getc(sc_buf);*/ return t; case '.': b_setmark(sc_buf, b_getcoffset(sc_buf)); /* Set mark before continuing (AND|OR case) */ @@ -162,12 +163,14 @@ Token malar_next_token(Buffer * sc_buf) continue; } else { /* Bad character, pump out an error token */ + t.code = ERR_T; b_retract(sc_buf); b_retract(sc_buf); t.attribute.err_lex[0] = c = b_getc(sc_buf); t.attribute.err_lex[1] = c = b_getc(sc_buf); t.attribute.err_lex[2] = '\0'; - b_retract(sc_buf); + /* Consume the rest of the caracters to ignore the line*/ + for (; c != '\0' && c != '\r' && c != '\n' && c != 255; c = b_getc(sc_buf)); return t; } case '=': @@ -181,197 +184,108 @@ Token malar_next_token(Buffer * sc_buf) t.code = ASS_OP_T; /* Assignment operator */ return t; case '\"': /* Don't quote me on this */ - t.code = STR_T; /* String literal */ + + /* Track the beginning of string */ b_setmark(sc_buf, b_getcoffset(sc_buf)); - lexstart = b_getcoffset(sc_buf); + lexstart = b_mark(sc_buf); lexend = lexstart; c = b_getc(sc_buf); - for (; c != '\"'; c = b_getc(sc_buf)) { - /* Step through the string literal and track progress */ - /* b_addc(str_LTBL, c); */ - if (c == '\n' || c == '\r') { + /* Step through the string literal and track progress *//* + c = b_getc(sc_buf);*/ + for (; c != '\"' || c!= 255; c = b_getc(sc_buf), ++lexend) { + if (c == '\n' || c == '\r') ++line; - } - if (c == '\0') { + else if (c == '\0') { /* Illegal string, make it an error token */ b_retract_to_mark(sc_buf); - t.code = ERR_T; /* Illegal string, make it an error token */ - for (i = 0; i < ERR_LEN; i++) { + b_retract(sc_buf); + t.code = ERR_T; + + for (i = 0; i < ERR_LEN; ++i) t.attribute.err_lex[i] = b_getc(sc_buf); - } + /* If the erroneous string is too long, * replace last three characterss with '...'' */ if ((lexend - lexstart) > ERR_LEN) { - t.attribute.err_lex[i-1] = '.'; - t.attribute.err_lex[i-2] = '.'; - t.attribute.err_lex[i-3] = '.'; + t.attribute.err_lex[i - 1] = '.'; + t.attribute.err_lex[i - 2] = '.'; + t.attribute.err_lex[i - 3] = '.'; } t.attribute.err_lex[i] = '\0'; + scerrnum = 1; return t; } - ++lexend; - ++str_offset; } /* end for loop, string finished and considered valid */ + lexend = b_getcoffset(sc_buf); b_retract_to_mark(sc_buf); + /* Copy the matched string literal to str_LTBL */ - for (; lexstart < lexend; ++lexstart){ + for (; lexstart < lexend; ++lexstart, ++str_offset) { b_addc(str_LTBL, b_getc(sc_buf)); } - t.attribute.str_offset = lexstart; + b_addc(str_LTBL, '\0'); + t.code = STR_T; + t.attribute.str_offset = str_offset; return t; default: if (isalpha(c) || isalnum(c)) { /*Set mark to beginning of lexeme*/ - b_setmark(sc_buf, b_getcoffset(sc_buf) - 1); - lexstart = 0; - lexend = 0; + b_retract(sc_buf); + b_setmark(sc_buf, b_getcoffset(sc_buf)); + lexstart = b_mark(sc_buf); + lexend = lexstart; state = 0; while (accept == NOAS) { - state = get_next_state(state, c, &accept); + state = get_next_state(state, b_getc(sc_buf), &accept); if (accept != NOAS) { break; } - c = b_getc(sc_buf); + /*c = b_getc(sc_buf);*/ } /* * Entering Accepting State */ - if (as_table[state] == ASWR) - b_retract(sc_buf); + if (as_table[state] == ASWR) { b_retract(sc_buf); } - /* Get start/end of lexeme */ - lexstart = b_mark(sc_buf); + /* Get end of lexeme */ lexend = b_getcoffset(sc_buf); - lex_buf = b_create(1, 1, 'a'); b_retract_to_mark(sc_buf); - for (; lexstart < lexend; lexstart++) { + + lex_buf = b_create(1, 1, 'a'); + + /* Copy the scanned lexeme into lexical buffer */ + for (; lexstart < lexend; ++lexstart) { b_addc(lex_buf, b_getc(sc_buf)); } b_addc(lex_buf, '\0'); - /*if ((t.attribute.kwt_idx = iskeyword(b_setmark(lex_buf, 0))) != -1) { - t.code = KW_T; - b_free(lex_buf); - return t; - }*/ if (aa_table[state] != NULL) { t = aa_table[state](b_setmark(lex_buf, 0)); } else { scerrnum = 1; - t = aa_table[ES]("RUN TIME ERROR"); + t = aa_table[ES]("RUN TIME ERROR: "); + return t; } b_free(lex_buf); } + /* Invalid character */ else { - t = aa_table[ES](" "); + t.code = ERR_T; t.attribute.err_lex[0] = c; + t.attribute.err_lex[1] = '\0'; } return t; } - /* special cases or token driven processing - WRITE YOUR CODE FOR PROCESSING THE SPECIAL CASES HERE. - COMMENTS AND STRING LITERALS ARE ALSO PROCESSED HERE. - - WHAT FOLLOWS IS A PSEUDO CODE. YOU CAN USE switch STATEMENT - INSTEAD OF if-else TO PROCESS THE SPECIAL CASES - DO NOT FORGET TO COUNT THE PROGRAM LINES - - - IF (c == SOME CHARACTER) - ... - SKIP CHARACTER (FOR EXAMPLE SPACE) - continue; - OR SET TOKEN (SET TOKEN CODE AND TOKEN ATTRIBUTE(IF AVAILABLE)) - return t; - EXAMPLE: - if (c == ' ') continue; - if (c == '{'){ t.code = RBR_T; (no attribute) return t; - if (c == '+'){ t.code = ART_OP_T; t.attribute.arr_op = PLUS return t; - ... - - IF (c == '.') TRY TO PROCESS .AND. or .OR. - IF SOMETHING ELSE FOLLOWS . OR THE LAST . IS MISSING - RETURN AN ERROR TOKEN - IF (c == '!') TRY TO PROCESS COMMENT - IF THE FOLLOWING IS NOT CHAR IS NOT < REPORT AN ERROR - ELSE IN A LOOP SKIP CHARACTERS UNTIL line terminator is found THEN continue; - ... - IF STRING (FOR EXAMPLE, "text") IS FOUND - SET MARK TO MARK THE BEGINNING OF THE STRING - IF THE STRING IS LEGAL - USING b_addc(..)COPY THE text FROM INPUT BUFFER INTO str_LTBL - ADD '\0' at the end make the string C-type string - SET STRING TOKEN - (the attribute of the string token is the offset from - the beginning of the str_LTBL char buffer to the beginning - of the string (TEXT in the example)) - - return t; - ELSE - THE STRING LITERAL IS ILLEGAL - SET ERROR TOKEN FOR ILLEGAL STRING (see assignment) - DO NOT STORE THE ILLEGAL STRINg IN THE str_LTBL - - return t; - - IF(c == ANOTHER CHARACTER) - SET TOKEN - return t; - Process state transition table - - IF (c is a digit OR c is a letter){ - - SET THE MARK AT THE BEGINING OF THE LEXEME - b_setmark(sc_buf,forward); - .... - CODE YOUR FINATE STATE MACHINE HERE (FSM or DFA) - IT IMPLEMENTS THE FOLLOWING ALGORITHM: - - FSM0. Begin with state = 0 and the input character c - FSM1. Get the next state from the transition table calling - state = get_next_state(state, c, &accept); - FSM2. Get the next character - FSM3. If the state is not accepting (accept == NOAS), go to step FSM1 - If the step is accepting, token is found, leave the machine and - call an accepting function as described below. - - - RETRACT getc_offset IF THE FINAL STATE IS A RETRACTING FINAL STATE - GET THE BEGINNING AND THE END OF THE LEXEME - lexstart = b_getmark(sc_buf); - SET lexend TO getc_offset USING AN APPROPRIATE BUFFER FUNCTION - CREATE A TEMPORRARY LEXEME BUFFER HERE; - lex_buf = b_create(...); - . RETRACT getc_offset to the MARK SET PREVIOUSLY AT THE BEGINNING OF THE LEXEME AND - . USING b_getc() COPY THE LEXEME BETWEEN lexstart AND lexend FROM THE INPUT BUFFER INTO lex_buf USING b_addc(...), - . WHEN VID (KEYWORDS INCLUDED), FPL OR IL IS RECOGNIZED - . YOU MUST CALL THE ACCEPTING FUNCTION USING THE ARRAY aa_table ,WHICH - . CONTAINS POINTERS TO FUNCTIONS. THE ARRAY INDEX OF THE FUNCTION TO BE - . CALLED IS STORED IN THE VARIABLE state. - . YOU ARE NOT ALLOWED TO CALL ANY OF THE ACCEPTING FUNCTIONS BY NAME. - . THE ARGUMENT TO THE FUNCTION IS THE STRING STORED IN lex_buf. - .... - b_free(lex_buf); - return t; - - CHECK OTHER CHARS HERE if NEEDED, SET A TOKEN AND RETURN IT. - FOR ILLEGAL CHARACTERS SET ERROR TOKEN. - THE ILLEGAL CHAR IS THE ATTRIBUTE OF THE ERROR TOKEN - IN A CASE OF RUNTIME ERROR, THE FUNCTION MUST STORE - A NON-NEGATIVE NUMBER INTO THE GLOBAL VARIABLE scerrnum - AND RETURN AN ERROR TOKEN. THE ERROR TOKEN ATTRIBUTE MUST - BE THE STRING "RUN TIME ERROR: " - */ }//end while(1) } @@ -460,7 +374,7 @@ Token aa_func02(char lexeme[]) { char* temp_str; #ifdef DEBUG - printf("Lexeme: '%s'\n size of: %ld\n", lexeme, sizeof(&lexeme)*sizeof(char)); + printf("Lexeme: '%s'\n", lexeme); #endif kw_idx = iskeyword(lexeme); @@ -469,15 +383,16 @@ Token aa_func02(char lexeme[]) { t.attribute.kwt_idx = kw_idx; return t; } + /* Not a keyword? Must be AVID*/ + t.code = AVID_T; if ((temp_str = (char*)calloc(VID_LEN + 1, sizeof(char))) == NULL) { - return aa_table[ES]("RUN TIME ERROR"); + return aa_table[ES]("RUN TIME ERROR: "); } for (i = 0; i < (VID_LEN) && i < strlen(lexeme); i++) { temp_str[i] = lexeme[i]; - }/* - temp_str[strlen(temp_str)] = '\0';*/ + } strncpy(t.attribute.vid_lex, temp_str, VID_LEN); t.attribute.vid_lex[strlen(temp_str)] = '\0'; @@ -494,7 +409,7 @@ Token aa_func02(char lexeme[]) { /* Floating point*/ break; } - t.code = AVID_T; + return t; /* @@ -521,7 +436,7 @@ Token aa_func03(char lexeme[]) { unsigned int i; char* temp_str; if ((temp_str = (char*)calloc(VID_LEN + 2, sizeof(char))) == NULL) { - return aa_table[ES]("RUN TIME ERROR"); + return aa_table[ES]("RUN TIME ERROR: "); } for (i = 0; i < (VID_LEN) && i < strlen(lexeme); i++) { @@ -555,13 +470,14 @@ Token aa_func05(char lexeme[]) { Token t; long temp_num; - temp_num = strtol(lexeme, NULL, 10); + temp_num = atol(lexeme); - if (temp_num > SHRT_MAX || temp_num < 0) { + if (temp_num > SHRT_MAX || temp_num < 0) { /* Overflow error */ t = aa_table[ES](lexeme); + return t; } t.code = INL_T; - t.attribute.int_value = temp_num; + t.attribute.int_value = (int)temp_num; return t; /* THE FUNCTION MUST CONVERT THE LEXEME REPRESENTING A DECIMAL CONSTANT @@ -583,15 +499,15 @@ Token aa_func08(char lexeme[]) { t.code = FPL_T; if (strstr(lexeme, "0.0")) { t.attribute.flt_value = 0.0f; - return t; } temp_dbl = atof(lexeme); #ifdef DEBUG printf("Lexeme: '%s' | FLT value: %f \n", lexeme, temp_dbl); #endif - if ((temp_dbl > FLT_MAX) || (temp_dbl < 0)) { + if ((temp_dbl > FLT_MAX) || (temp_dbl < 0)) { /* Overflow error */ t = aa_table[ES](lexeme); + return t; } t.attribute.flt_value = (float)temp_dbl; return t; @@ -612,7 +528,7 @@ err_lex C-type string. */ Token aa_func10(char lexeme[]) { Token t; - int new_olval; + long new_olval; if (strlen(lexeme) > INL_LEN + 1) { t = aa_table[ES](lexeme); @@ -623,10 +539,11 @@ Token aa_func10(char lexeme[]) { if (new_olval < SHRT_MIN || new_olval > SHRT_MAX) { t = aa_table[ES](lexeme); + return t; } t.code = INL_T; - t.attribute.int_value = new_olval; + t.attribute.int_value = (int)new_olval; return t; /* @@ -648,12 +565,40 @@ err_lex C-type string. /*ACCEPTING FUNCTION FOR THE ERROR TOKEN */ +Token aa_func12(char lexeme[]) { +/* + Token t; + unsigned int i; + t.code = ERR_T; + for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++) + t.attribute.err_lex[i] = lexeme[i]; + t.attribute.err_lex[i] = '\0'; + + return t;*/ + return aa_table[ESWR](lexeme); + /* + THE FUNCTION SETS THE ERROR TOKEN. lexeme[] CONTAINS THE ERROR + THE ATTRIBUTE OF THE ERROR TOKEN IS THE lexeme ITSELF + AND IT MUST BE STORED in err_lex. IF THE ERROR lexeme IS LONGER + than ERR_LEN characters, ONLY THE FIRST ERR_LEN-3 characters ARE + STORED IN err_lex. THEN THREE DOTS ... ARE ADDED TO THE END OF THE + err_lex C-type string. + */ +} + + Token aa_func13(char lexeme[]) { Token t; unsigned int i; t.code = ERR_T; for (i = 0; i < (ERR_LEN - 1) && i < strlen(lexeme); i++) t.attribute.err_lex[i] = lexeme[i]; + + if (strlen(lexeme) > ERR_LEN) { + t.attribute.err_lex[i - 1] = '.'; + t.attribute.err_lex[i - 2] = '.'; + t.attribute.err_lex[i - 3] = '.'; + } t.attribute.err_lex[i] = '\0'; return t; diff --git a/table.h b/table.h index 9ba67b2..4a9105c 100755 --- a/table.h +++ b/table.h @@ -18,6 +18,10 @@ #include "buffer.h" #endif +#ifndef TOKEN_H_ +#include "token.h" +#endif + #ifndef NULL #include <_null.h> /* NULL pointer constant is defined there */ #endif @@ -34,7 +38,8 @@ * .AND., .OR. , SEOF, 'wrong symbol', */ -#define ES 13 /* Error state */ +#define ES 12 /* Error state */ +#define ESWR 13 /* Error state (no retract) */ #define IS -1 /* Invalid state */ /* State transition table definition */ @@ -43,23 +48,23 @@ /*transition table - type of states defined in separate table */ int st_table[][TABLE_COLUMNS] = { /* INPUT COLUMNS: - COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 | - |[a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other| + COLUMN # | 0 | 1 | 2 | 3 | 4 | 5 | 6 | + |[a-zA-Z]| 0 |[1-7]|[8-9]| . | # | other | */ - /* State 0 */ {1, 6 , 4 , 4 , IS , IS , IS}, - /* State 1 */ {1, 1 , 1 , 1 , ES , 3 , 2}, - /* State 2 */ {IS, IS , IS, IS, IS , IS , IS}, - /* State 3 */ {IS, IS , IS, IS, IS , IS , IS}, - /* State 4 */ {ES, 4 , 4 , 4 , 7 , 5 , 5}, - /* State 5 */ {IS, IS , IS, IS, IS , IS , IS}, - /* State 6 */ {ES, 9 , 9, ES, 7 , ES , 5}, - /* State 7 */ {ES, 7 , 7, 7, 8 , 8 , 8}, - /* State 8 */ {IS, IS , IS, IS, IS , IS , IS}, - /* State 9 */ {ES, 9 , 9, ES, ES , ES , 10}, - /* State 10 */ {IS, IS , IS, IS, IS , IS , IS}, - /* State 11 */ {IS, IS , IS, IS, IS , IS , IS}, - /* State 12 */ {IS, IS , IS, IS, IS , IS , IS}, - /* State 13 */ {IS, IS , IS, IS, IS , IS , IS} + /* State 0 */ {1, 6 , 4 , 4 , ES , ES , ES}, + /* State 1 */ {1, 1 , 1 , 1 , 2 , 3 , 2 }, + /* State 2 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 3 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 4 */ {ES, 4 , 4 , 4 , 7 , 5 , 5 }, + /* State 5 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 6 */ {ES, 9 , 9 , ES, 7 , ES , 5 }, + /* State 7 */ {8 , 7 , 7 , 7, 8 , 8 , 8 }, + /* State 8 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 9 */ {ES, ES , 11, ES, ES , ES , 10}, + /* State 10 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 11 */ {ES, 11 , 11, ES, ES , ES , 10}, + /* State 12 */ {IS, IS , IS, IS, IS , IS , IS}, + /* State 13 */ {IS, IS , IS, IS, IS , IS , IS} }; /* Accepting state table definition */ @@ -80,7 +85,7 @@ int as_table[] = { /* State 8 */ ASWR, /* State 9 */ NOAS, /* State 10 */ ASWR, - /* State 11 */ ASWR, + /* State 11 */ NOAS, /* State 12 */ ASNR, /* State 13 */ ASWR @@ -93,7 +98,8 @@ Token aa_func03(char* lexeme); /* SVID */ Token aa_func05(char* lexeme); /* DIL */ Token aa_func08(char* lexeme); /* FPL */ Token aa_func10(char* lexeme); /* OIL */ -Token aa_func13(char* lexeme); /* ES */ +Token aa_func12(char* lexeme); /* ES ASNR */ +Token aa_func13(char* lexeme); /* ES ASWR */ /* defining a new type: pointer to function (of one char * argument) returning Token @@ -121,7 +127,7 @@ PTR_AAF aa_table[] = { /* State 9 */ NULL, /* State 10 */ aa_func10, /* State 11 */ NULL, - /* State 12 */ NULL, + /* State 12 */ aa_func12, /* State 13 */ aa_func13 };