From 031286afc10831f28adb75ee394b3853f1fb80e7 Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Mon, 25 Mar 2019 14:17:02 -0400 Subject: [PATCH] removed erroneously checked in files and fleshed out parser and pretty printer to dump the fully parsed AST --- a.out | Bin 24048 -> 0 bytes example.src | 20 +- example.src2 | 480 - example.src3 | 4800 - example.src4 | 48000 ----- example.src5 | 480000 --------------------------------------------- lexer.c | 136 - lexer2.c | 224 - modded.src | 48 - source/ast.c | 6 +- source/lex.c | 13 +- source/parser.c | 27 +- source/pprint.c | 107 +- source/sclpl.h | 5 +- 14 files changed, 128 insertions(+), 533738 deletions(-) delete mode 100755 a.out delete mode 100644 example.src2 delete mode 100644 example.src3 delete mode 100644 example.src4 delete mode 100644 example.src5 delete mode 100644 lexer.c delete mode 100644 lexer2.c delete mode 100644 modded.src diff --git a/a.out b/a.out deleted file mode 100755 index 5fb20dfeca1b29df6450fc55a526e9c93d8b943c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24048 zcmb<-^>JfjWMqH=W(GS35N`t)M8p9?F?7uL?Ap!Kr%8gfy+!#z5t1cWM=X* zFfcPLg65S{sZ36g`ZXX)1_pS!2n+8GponH*VE6zn_h8}$Q1KNCsPO~iA5caWZxDu9 zZ~;xc6)OG!O}rZ_{sB$AA1cm(mM$hi#RbsBr$NOP(8T9K#SPHJmqEoH(8Sk5#RJg9 zw?M@c(8PB_#S75H_d&%Q(8P~G#c#+%!VOd=g33%ts4*}+KoW<=0Z8rzk~j}Y0E#~# zi9?MB%ltqRhgLCQaacYB)vciN7c9uYz#svQUv>rta5{pCD;{Q}um_SjCrAK_1CYd_O&73C1d=#3$%4fbki>bx zA`l`2Nt_QN1SSiR#QDJ@5TXJ}9G1sGG7U)Lf*=7X?m!YpF3&xh-*9+zvxbT(FnF{c zC}H}4!K3*I$6<#5rb%K74F6SA#1t6#&eX<)f%u@He`x^bGlBS^r20|;%>N~- zz>ooof|mkd{wEM06!b3{!2BRl1%|W6d1EYoXJGK? z<=xB%qHK-X7#RMGvWY4%eBqaOVPFXL=(SxTsledT%eoLmbw2g5{81w5(am~G1QOJ| zvHu^W>G8|AfQ)B|Jq!wE1mC0AHd9goWQ=JdRKMmwkAwf1Jvu*l9DHH#!FbH$;=d9J zk8ak5Py=~D2I4ZW!PZbxfq|h^-=o>KTtornz|N;7JRaS)QlbhBFL?j||Np|{|NsBT zSUDvX7#Km$X#hFwFuy#Q4)y4K8v8Qs|Ns9WmPg~83k(bl9-VtF{{R1<;?d2bssZM^ z{xCdnycHZF|Ns9#-Wu}%|9_A%$6I}%ERWu)p!fr|ix_r-Qldxa_0JxwJ&w0Vz*Ka% zf=t~7(zg#pcyt~ID}Q1B@BjZ!u<6ZvL3S|kcWeS#3o)`AEaTC6t@HScxvUHftq1r! zCWEBAT_SsWmU&jkk+IjrNoj?Eo!;I+m{gL8f%mWV664nH-ks#HIQeHAt^o5;L%yzffhF&$6Y}Y$KY|?^#(X-kGnpA(l4O& z2Qc03+5z=0h@SCMmze>fsXG)D4ju4NdEn9QIs>YFhDUDzqeo}x4E}8l{QC}gBwz68 zW$j~OVBp`z!o0B>OnnN9O`c&sehdXvK-AluS3DH zfMKJJQt1b<3tpQb>Ehpa!K0fsRTNaDn6|NiG9NToJpA?l|9((%exd&R|9?=-+yEu_ zmMT!#bcfCWr&L%1Uf|K~y1=9P03#^59RCGM`>roMxU&f_nVA+kF>I$dA9Y=_!}Qg*z6+V|mgw#RYT1K<>Q-1P*Q2Dz%!^?*mO>j9r$ z)){~P|KAC!O?QFn1&>}+F<}J;kPGjDEa`Mz;n7^Xf}w=hquX^ww*!X{VSRD?2U4z9=&XI?(OLT7 zxa${CM803Q-nqIOA};27>0=Gl4OG1M`{r&l$M2VB+Yg6Lq!&cB|W z-+VNmg4+fM|1!V0{{8=dkJh&(3Wm2~^@~sEFQ3kDE}ick|BKx5Xg3sQ-ao22GTa5w(E{m1bmMXJTMr0|90>W;O;E zIACJ~4?eNt1jNZh?HMR4HK#b0AuSP1mgX^JrZMECmM|0)<(Flqq%stx7M5lff#{Ob zqC8L!v^2Sdp`@rZm7%1vAeEssFEc-np)9e8!7VW}CpAT(BwwK*v8Xszp(H;$HBUiZ zHCdgD!Rhz^|8Bqj|IhjL|34_zuK)G_|NUS8|MUF*|6lR<|No5A%+?VM3=E(~+nlfe z|1V)+V3_mm|Nk=#3=C_&{r_LWz`(HW`~Uw>7#J9IfByf^!pOi7@$>(GQ0>9|_y2zv zMh1qOzyJTIFfuT_`TPI>8b$_&KY#!KKf=hs0PABgRs}IIRtPXk^RRPFU}P5niGv1s z48HyUuZHdvSpVPs%1`1Ak24M@O+Pa&O)kHZJl@&@ro z@n{H)hQMeDjE2By2#kinhzkMOI6iFL9@GQ^1puu8gK;hH-JVW85qD#c@TdA zR36k81@Zra2nGfQSThFH&H)J_k5Pm8pe7lJW`K^bLXClpj4~kFp`d|GkUDV?0h*J7 zIuIr<02=vZU|@g>GYCKpf(8o%%>Lj1A^w3W{RibgfO_mdln)Ev4^aL(sQgbTAJof* zI%Pm8*gQ76<=dbhJ_e<)LFs2u`WuvHgGQ7Xlvab%W>DGsLrQ4wNG$@Vk z4j3OcuLR>`&^Y^#7%E4!03#fX%wT|fAC@r@0_ZGI05LM4iz4_S`2okUc~w5Be_->e zpkY6d<6!fs98h`K{3$P#51Th-gYseXrL0gsY@QU<=LeYwn;+$Y%ERVGL4%GUdDwg? z%zoHBC?`}sZ2prQ%7@K+vP1dq&dyc}8t$oiCHeUZ=6c3@2D(MY3I&FGCYlf~$Py&1 z1D+EH2|!A0*t#X;F$It;41?+=CI&rNu?-W3)1baH6GJ2?tOkJ9htO(}0ahPEt1Y-b z7z5&-YPkLe=;U7!OaU4VQE$u*vk$hY1>_DirC=^doQYvJFHHR`XgLh6Ru~+h4u$c- znve)BM0kNi92Q=;k;-3CT^|nkDt2Z@4mMT>1{Qq=1_ov(_6tl549qO-wIGU-je`{=$ZEpMz`)AFnweL^ zzyexa!py;Xi;aPS3A7M|sV5oayfdr}3{1TsCL0GkM+e9-MmBzs!5}urP8J4+8_Zut zm>3v1|FbYKFnlVl$Mli>Ms~Thy6PV+`^$Dbf1vT-6J#0}V*)D!Lk3qE$TS{Ckb?`jM3@;Em<7O`60Tby86n0j z1_p)-u67VdjM18bfuVsb3}m!8qbCCcLlf6-kZBT(;v5VN&0KsS17sN0IT#o^xbA`U zs4|u^FfdHung-%%FeosLR2?uz)KZ23rV71E&$l(69zjZsy#~%D})D4w};8;QR<; zM}l+-aHg^`Ft9~~l7UAb14khn1A_ru5y;D&4j?N__`u%y4boN$Vsmi*0~uWj zGD3h;0i>oD#1`Rv3^J|}#FpUP1aeCAL>2}H1L z;M4>;qzB|C22N>^-+Mv&IUqsS2T~%yzy}Jk>nscmqM$U$AmG5R`2(aEG_Av~Jsliw zpde<~0Vxz<;BjDA6K7>$-~}tt04d;I!OFnEt_w1RfytDSfq`8QWQ5o{P+p(Kk-)^j zAi=xTBz_&lZeS1wl`29Yf`Od_Y@7}wg99adH`Ze zKZ*na#w& zAguwawd@)FIT#qEQ$gv<0m427s@fbOY*2JFyEC?ctpu4S?E*@@9$*n^ACT`n!E71O zViJyaPzICnU}0e3=m0YVKslik%=BetVBnMj#V=nE3j@PV4h9BU29|Ri3=A^s85tNj zr9mPxJ|IsZY}pJ93?R(Tz`;=navdmaxV%Ar0JA^@ z0|y7Foy5U031k|iS;Y-%O)zRCfQDIuIT#oiwZKeJslliNW~PHg^*~HE(6SW}23c~A zfq{XC`vyA$1ET?`q~vA;sWSvIdB1@~<3B)~ztB+>2F5vjMj$oLFh7Im!a!}!IiOiX zeoz~Q(HZ1qP@9$C39J_6ZXX5)2L8(+Go3-)8K9}OZjfx4Jp%&+Xl7DyBh)TckX8l; zhIxz(3__Uys4UKzHn;c~4Qc%1xf}6lhOyJfp6QuRa1c@~! zNF$hu3ET)~hBShq5zXSt#lXPS3aSHG(m~8NP~(qf4v5(fX6k~N9bg65K}&YKz@iU8 z%x%pCAQ`z|4st(dA%fK1g&2h{*&=1FUge3=9IyoS=RjTLdQq z12gv~ke}EZI2ah1c`k!HOsrox85kz8=$!^t+j3kC3@irV_+xto>L-|jB8G#l5yZ9y z&6NtU<%87NO$4nYWith_9ST76xNH|dY8*jp-4)oxLE_G!jHSX>!NI`5;tDcTgRKL^ z_FB#WYULaSsrTE(zR#yLJr3=CXpDGZEYc{@f12Ck&iGzLZ^ zki=$21_o|(XaWdgVqmaiVqoB53}rOrVGLpZ%oxhU7|8sYF@%8^)?tn{F@s%@QHUKJ^w6;7V_;y0xfT>WSb`oFEbJib zIAAfr3G+M`Brv#<-2rkgk0^t>Bm)Dl9)rCkB$m}BLAE_hNg9GvA`g;Jco9C~gGB;AikDa!7z7Xk zVE-`)u%V;`(8?Jm2GD*UM#kTKyx;*tkU|CqMm;_K;>zNb)PiFD3JbG%GZO}e;*!Lo z61`vsa0`Kx2{!z~Y{$qfpy#A)=c}SpTE;9;lEDNTdIa_Bm>8@;{Qz{c^-GJ3K%21i z)02~-rt20Y>q2&8rRSCEg9p#_GxL&jN>fr9^z`%#@{2Po^pi46ib1@b%%seGIIldp zI5(*n%+&|2ECMUZOwP|M1F401qBJkF0_3AIeXuoP;PfLq0VTeacyYZQM zC81t3?%=O$L5 znF!C6pj~kJDXGb2C7?`}U&Ig}pP5&dpPX2dnV%P*oL>rZH7Iw2O9`-P`K2&7KsDxP zGsLH5=A`E3gRKSoC^avIAwC|Q!Q)f&QsUE+OTdyK2ZH62ON-)@^K(jbA;FLgF$=6F zC$YE$v`wxg6(t1W<`shjBOa_`8g?|fG`5C)D#8K zxOhC=K}D&>r8$uBar5+XjW5V41zDStSds`1kn*C;l2k|{fd(l!IEz6^0h}h#4Thz) z_)=(6C{8R(g(i&n^5hJL^5l%fB2aAR*;AhW@zB(jnHSFxkDQ1~ z^1%t-+0YO&S)-tvYi6RLn_E(vmuj1ynwMIXnXI6j3))U;3)wNOpqmax{zeMAAY5FM zVw;{cl9`vTo10jYp`Z&Yl9ID^3yShfQj<&ai$I3Jv?d}; zDCnk@!By^09@FBk^(48rh%fG0iF;+>3|^~ zlqQlu3B4q>0Hha|wV?R}lE%Pk7UUgJYKNpvnCls!sUM#G5Q!(hv;>rTU?!&IgA-Ct zYF>It2G|d!d7w}PyEd&fhao<`EHS4P6#mKixk;IMsUSz^=BGd-B{?M?9PD|i6(DW- zX=!FA(7*wgJMh8}oZ=y24ll((T9ZoCAfg3{DW(kZrFo^rsVShOkK{tI4W9n-Nr}nX zr3J8jgQNiFd`L+S+FuOM>!5ZDXqRwNel9E_K}tbTVG<7sHk1+(k~$FXE=dHHd(ecE zlb;Bx2lA7$Qj<$S)i11yhBP$wq3sxO;Q}VWkrWS3Ux`IUiIt#ow5VjrXV(BvXWW@K zHLnb`Upp-`FEbw5(@1SB=x*-h;!@Ct?@UkxAyh-tLs2S5uq76ygUSR@^&bz?3sS(4 zSd?A{Db7>!OCcM(3sCez%mb-~ItL_BToPZDl3K=4QWl?AoC?~i9$yUFb`EMe43UBq zZzPnI#Y4T7T#}g!Zyv^j+Ca&PIXUsgp!69J+R7gfD(bTt$Zck&B;&B29()lvP{7OEXg-IziP81IEDx z@Ph>6L0(A)b&5bmKUfoH8#TG81k}(44b_1#s2d8R85u%hOi=#^A`}Z_f(FmtFrwy^FIOdX5{=>=gJAEs|UXgnNr4iI!m)D=p@)Dc57 zGcYp1&H+MJ4?61tv>E_e48&$;U}gXWP0~b7;ktLWJxEU5e!wprOhhYKIdK{P>GXpOJ zY`_R62BMi6_~7f3KztZxX5eRl&riVwL6d9@0`PN%kc1hS83Y*;K#P5l1;Oft;OBxM zOEEJD!_Ngl6&GQ^wyuFQJm_a-QW(MyDL-1iNW(M$_FN_PPnZZN%a4wv|%pebEBQuzxXHp@H zfkv5G7(RkG=_8AS#tE^BgOUd$13yCpET4fCLf1KCQx9?pHgS+$*u(=sb6KEO;862n z#fmye1A`Doyn{+vMh47ycZ8~kowEhzg3iMNGceO(I%tTB0W&?+g3lG=Wk8=$>;;R1 z4MQP7wFlJcDB@c{YrauLAe>A_(D_i9=?yg1DZn7kFadgA3q&;o189e{5SDy*9fx~f z;}8eUOA0aYG8jS^+-pNE;0H}!LfOca5)N?_9OA(^#M2o;XJujfw*-g!4o1*_R5lKnul?1)ywW&_ptW1j7cj_{{~IgD7ud@gf5@hnFD+8eibOpbQKQprvbq z4B`w2VC5Awe6*QB;VglXe$1ifutTS$!Fxs-7#M88;!wvSQGOtCCSLfVXR!Pj23iRR zvR4T;USdJ&k@Ulu`5^U7nCZD1hx!RjpnL(-ji6V7)$=m!f^N(J@2O>AVA#Th{XDNX z;PAw(2SIB@!Qloy=LwYGK(^cjxfAR8U#S(TMMiqb3~rf4#U;)eiABgm=HM2+9;j=E z*2{`_iS%>y^>l^}{2_M-9DU;b-Q0p*L*hdm zoqSwDqZRNDAE-|WKKRSQ0e0XQ=$J3)ab&21c+dVy1^4`mONw$*^U$<`y6DM>Bh3)9 zCJfoBmF1wr$iSzUAtXRf0F8E_ia`6g;NfiWiDjtLpiWqRGOAcSct8T=fFe-GtSC9R z09ApDr@Lne?8G!wsrY!5ILvQZJUY$GiHu z#K*(VfkRUOI@b;BVQrv+?|9HK4QMb6={z}9{objS<@rS^#h@9p)S@C(Nd~>*%G{E~ zBnCZDAcE-(7z;csSiqo{mtT^q=ji0ATauX0pqHLks+W;ioWY=%Qkhp=nG2yyiXbv& zsYS)00|UUypq%(32EC%xoJ5cYD61f+gh3A+_Id?5C3@-kB@B8cpkXyVaH`iU$_J-e zz0{2Ow4%h^)cA~)A_h1Qq7^d0!2s3)W2aQ+CFW)(Gk{D4narREF(;|Gm_aW&KQ}iu zuY^G_B{iuuJwCB0F)ux}7{&#UdBUWUl8RExU^1Y=VHmp@HJFo5PRL6e~%F3xj7!IGd=cQ6K&2K8{i z{r?Xt#=#=6{tt`>os$5PfYp=e-4ReX5maQu+z;y~!D!fiH}KLD@S-1(X$+iv6@JVdY zcoV}CexNl^pfmwmj0(~Z>wm*&*uF%N90-H#0?{yB25uiC?U#i1AL0E-kl`@J;I<8f z1KVfVh^`;hkzfFA41lPE`4_^2?J_~oU1<7Y{ZSa*1KUUmH2`KWl*<4*$PMH~nEzqp z4KNz&9hh>^!T>lM#Fz%P9~w>Y{w=JZy98u7NFfwMTSZVF1H(Kt{kYE8g=s(!f6zIZ zAS>YZL;K6Helw_@4ATpu(Zh2a0|Nu-a63>Q0?EVrMX>XC(bdEFF!~T^9VEC<4$_S{ zm-7R(YYI~bkwwHCO#B>ZwLg-6SU(+hzA@|^V^ID9nE}%Wi$~BwVIcp*+z;!|-vD(m z85kJA>+3-pp%|tgMuYlC$ok>^erQC2(g##4n1bnp(V$b(koCjH2Npo}e}Fj*tQ)iK z`V5*UVaj0VUQU1(oG^bvwPWbuYAE0v5z#7pPx}P5)X@M~H!e0kkUyrWZt`tCa<< zMFL5nVJGO2pxQy}hrmpjIiP+nj1886631O3 z3=LSugmB^dpTKv)nGJ^Vp-qgeq~Sl|Js7#yGp6_gG@!?{o{ ry#550Z)iaT*0>=6OfkGgGaO_eL -#include -#include -#include -#include -#include -#include - -typedef struct { - int type; - char* text; -} LexTok; - -typedef struct { - int type; - char* patt; - void (*actfn)(LexTok* tok); - regex_t regex; -} LexRule; - -enum { - T_NONE = 0, - T_REQUIRES = 256, T_PROVIDES, T_LET, T_VAR, T_FUN, T_TYPE, T_STRUCT, - T_UNION, T_RETURN, T_IF, T_ELSE, - T_STRING, T_ID, T_INT, - T_ERROR = -1 -}; - -LexRule rules[] = { - /* skip whitespace and comments */ - { .type = T_NONE, .patt = "[ \r\n\t]+" }, - { .type = T_NONE, .patt = "#.*\r?\n" }, - - /* punctuation definitions */ - { .type = '(' }, { .type = ')' }, - { .type = '[' }, { .type = ']' }, - { .type = '{' }, { .type = '}' }, - { .type = '.' }, { .type = ',' }, - { .type = '\''}, { .type = ':' }, - { .type = '&' }, { .type = '=' }, - { .type = ';' }, { .type = '*' }, - - /* keyword definitions */ - { .type = T_REQUIRES, .patt = "require" }, - { .type = T_PROVIDES, .patt = "provide" }, - { .type = T_LET, .patt = "let" }, - { .type = T_VAR, .patt = "var" }, - { .type = T_FUN, .patt = "fun" }, - { .type = T_TYPE, .patt = "type" }, - { .type = T_STRUCT, .patt = "struct" }, - { .type = T_UNION, .patt = "union" }, - { .type = T_RETURN, .patt = "return" }, - { .type = T_IF, .patt = "if" }, - { .type = T_ELSE, .patt = "else" }, - - /* value definitions */ - { .type = T_STRING, .patt = "\"([^\"]|\\\\\")*\"" }, - { .type = T_ID, .patt = "[a-zA-Z_][a-zA-Z0-9_]*" }, - { .type = T_INT, .patt = "[+-]?[0-9]+" }, -}; - -char* file_load(char* path) { - int fd = -1, nread = 0, length = 0; - struct stat sb = {0}; - char* contents = NULL; - if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0)) { - contents = calloc(sb.st_size + 1u, 1u); - while (sb.st_size && (nread = read(fd, contents+length, sb.st_size)) > 0) - length += nread, sb.st_size -= nread; - } - if (fd > 0) close(fd); - return contents; -} - -void settok(LexTok* tok, int type, char* data, size_t len) { - tok->type = type; - tok->text = realloc(tok->text, len+1); - tok->text[0] = '\0'; - strncat(tok->text, data, len); -} - -void token(char* data, size_t nrules, LexRule* rules, LexTok* tok) { - size_t sz = 0; - regmatch_t match; - tok->type = T_ERROR; - tok->text = calloc(1,1); - for (int i = 0; i < nrules; i++) { - if (*data == rules[i].type && sz < 1) - settok(tok, rules[i].type, data, 1u); - else if (rules[i].patt && !regexec(&(rules[i].regex), data, 1, &match, 0) && sz < match.rm_eo) - settok(tok, rules[i].type, data, match.rm_eo); - } -} - -void tokenize(char* data, size_t nrules, LexRule* rules) { - LexTok tok = {0}; - while (*data && tok.type != T_ERROR) { - token(data, nrules, rules, &tok); - if (tok.text) { - data += strlen(tok.text); - printf("{%ld-%ld} (%d '%s')\n", 0, strlen(tok.text), tok.type, tok.text); - } - free(tok.text), tok.text = NULL; - } - if (tok.type == T_ERROR) { - fprintf(stderr, "Failed tokenizing the file"); - exit(1); - } -} - -int main(int argc, char** argv) { - /* initialize the lexer rule sets */ - for (int i = 0; i < sizeof(rules)/sizeof(rules[0]); i++) { - if (!rules[i].patt) continue; - char* patt = calloc(strlen(rules[i].patt) + 2, 1u); - strcat(patt, "^"); - strcat(patt, rules[i].patt); - if (regcomp(&(rules[i].regex), patt, REG_EXTENDED) != 0) - { - fprintf(stderr, "Failed to initialize the lexer"); - return 1; - } - free(patt); - } - - /* read a file into memory */ - for (int i = 1; i < argc; i++) { - char* file = file_load(argv[i]); - if (file) { - tokenize(file, sizeof(rules)/sizeof(rules[0]), rules); - } - free(file); - } - - return 0; -} diff --git a/lexer2.c b/lexer2.c deleted file mode 100644 index 9119984..0000000 --- a/lexer2.c +++ /dev/null @@ -1,224 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -typedef struct LexTok { - struct LexTok* next; - long type; - long offset; - char* text; - long long value; -} LexTok; - -typedef struct { - char* keyword; - int type; -} KeywordDef; - -enum { - T_NONE = 0, - T_STRING = 256, T_ID, T_INT, T_BOOL, - T_REQUIRES, T_PROVIDES, T_LET, T_VAR, T_FUN, T_TYPE, T_STRUCT, - T_UNION, T_RETURN, T_IF, T_ELSE, -}; - -static const char FirstChar[256] = { - /* Whitespace */ - [' '] = 1, ['\t'] = 1, ['\r'] = 1, ['\n'] = 1, - /* comment start */ - ['#'] = 2, - /* number or op */ - ['+'] = 3, ['-'] = 3, - /* number digits */ - ['0'] = 4, ['1'] = 4, ['2'] = 4, ['3'] = 4, ['4'] = 4, - ['5'] = 4, ['6'] = 4, ['7'] = 4, ['8'] = 4, ['9'] = 4, - /* alpha characters */ - ['A'] = 5, ['B'] = 5, ['C'] = 5, ['D'] = 5, ['E'] = 5, - ['F'] = 5, ['G'] = 5, ['H'] = 5, ['I'] = 5, ['J'] = 5, - ['K'] = 5, ['L'] = 5, ['M'] = 5, ['N'] = 5, ['O'] = 5, - ['P'] = 5, ['Q'] = 5, ['R'] = 5, ['S'] = 5, ['T'] = 5, - ['U'] = 5, ['V'] = 5, ['W'] = 5, ['X'] = 5, ['Y'] = 5, - ['Z'] = 5, ['a'] = 5, ['b'] = 5, ['c'] = 5, ['d'] = 5, - ['e'] = 5, ['f'] = 5, ['g'] = 5, ['h'] = 5, ['i'] = 5, - ['j'] = 5, ['k'] = 5, ['l'] = 5, ['m'] = 5, ['n'] = 5, - ['o'] = 5, ['p'] = 5, ['q'] = 5, ['r'] = 5, ['s'] = 5, - ['t'] = 5, ['u'] = 5, ['v'] = 5, ['w'] = 5, ['x'] = 5, - ['y'] = 5, ['z'] = 5, - /* punctuation */ - ['('] = 6, [')'] = 6, ['['] = 6, [']'] = 6, ['{'] = 6, ['}'] = 6, - ['.'] = 6, [','] = 6, [':'] = 6, ['&'] = 6, ['='] = 6, [';'] = 6, - ['*'] = 6, ['\''] = 6, - /* strings */ - ['"'] = 7 -}; - -char SPACE[256] = { - [' '] = 1, ['\t'] = 1, ['\r'] = 1, ['\n'] = 1, -}; - -char DIGIT[256] = { - ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, - ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, -}; - -char ALNUM_[256] = { - ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, - ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, - ['A'] = 1, ['B'] = 1, ['C'] = 1, ['D'] = 1, ['E'] = 1, - ['F'] = 1, ['G'] = 1, ['H'] = 1, ['I'] = 1, ['J'] = 1, - ['K'] = 1, ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1, - ['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1, ['T'] = 1, - ['U'] = 1, ['V'] = 1, ['W'] = 1, ['X'] = 1, ['Y'] = 1, - ['Z'] = 1, ['a'] = 1, ['b'] = 1, ['c'] = 1, ['d'] = 1, - ['e'] = 1, ['f'] = 1, ['g'] = 1, ['h'] = 1, ['i'] = 1, - ['j'] = 1, ['k'] = 1, ['l'] = 1, ['m'] = 1, ['n'] = 1, - ['o'] = 1, ['p'] = 1, ['q'] = 1, ['r'] = 1, ['s'] = 1, - ['t'] = 1, ['u'] = 1, ['v'] = 1, ['w'] = 1, ['x'] = 1, - ['y'] = 1, ['z'] = 1, ['_'] = 1, -}; - -#define NUM_KEYWORDS (sizeof(Keywords) / sizeof(Keywords[0])) -KeywordDef Keywords[] = { - { "else", T_ELSE }, - { "false", T_BOOL }, - { "fun", T_FUN }, - { "if", T_IF }, - { "let", T_LET }, - { "provide", T_PROVIDES }, - { "require", T_REQUIRES }, - { "return", T_RETURN }, - { "struct", T_STRUCT }, - { "true", T_BOOL }, - { "type", T_TYPE }, - { "union", T_UNION }, - { "var", T_VAR }, -}; - -char* file_load(char* path) { - int fd = -1, nread = 0, length = 0; - struct stat sb = {0}; - char* contents = NULL; - if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0)) { - contents = calloc(sb.st_size + 1u, 1u); - while (sb.st_size && (nread = read(fd, contents+length, sb.st_size)) > 0) - length += nread, sb.st_size -= nread; - } - if (fd > 0) close(fd); - return contents; -} - -int keywcmp(const void* a, const void* b) { - return strcmp(((KeywordDef*)a)->keyword, ((KeywordDef*)b)->keyword); -} - -void tokenize(char* data) { -// LexTok *toklist = NULL; -// LexTok **lasttok = &toklist; - LexTok tok = {0}; - char *beg = data, *curr = data; - while (*curr) { - tok.type = T_NONE; - tok.offset = (beg - data); - beg = curr; - switch (FirstChar[*curr++]) { - case 1: /* skip whitespace */ - for (; SPACE[*curr]; curr++); - break; - - case 2: /* skip comments */ - for (; *curr != '\n'; curr++); - break; - - case 3: /* +/- as ops or number signs */ - tok.type = *(curr-1); - if (!DIGIT[*curr]) break; - /* fallthrough to number parsing */ - - case 4: - tok.type = T_INT; - for (; DIGIT[*curr]; curr++); - break; - - case 5: - tok.type = T_ID; - for (; ALNUM_[*curr]; curr++); - break; - - case 6: /* single char tokens */ - tok.type = *(curr-1); - break; - - case 7: /* string parsing */ - tok.type = T_STRING; - for (; *curr != '"'; curr++); - curr++; - break; - - case 0: /* error handling */ - default: - fprintf(stderr, "Failed to parse token '%c'\n", *(curr-1)); - exit(1); - } - - if (tok.type) { - size_t sz = (curr - beg); - tok.text = malloc(sz+1); - tok.text[sz] = '\0'; - strncpy(tok.text, beg, sz); - - /* perform value conversions */ - switch (tok.type) { - case T_STRING: { - size_t len = strlen(tok.text+1); - char* strtext = malloc(len); - strncpy(strtext, tok.text+1, len); - strtext[len-1] = '\0'; - free(tok.text), tok.text = strtext; - break; - } - - case T_INT: { - tok.value = strtol(tok.text, NULL, 0); - break; - } - - case T_ID: { - KeywordDef key = { .keyword = tok.text }; - KeywordDef* match = bsearch( - &key, Keywords, NUM_KEYWORDS, sizeof(KeywordDef), keywcmp); - if (match) tok.type = match->type; - break; - } - - case T_BOOL: { - tok.value = (tok.text[0] == 't'); - break; - } - } - -// *lasttok = malloc(sizeof(LexTok)); -// *(*lasttok) = tok; -// lasttok = &((*lasttok)->next); - } - } - -// while (toklist) { -// printf("{%ld-%ld} (%d '%s')\n", 0, 0, toklist->type, toklist->text); -// toklist = toklist->next; -// } -} - -int main(int argc, char** argv) { - /* read a file into memory and tokenize it */ - for (int i = 1; i < argc; i++) { - char* file = file_load(argv[i]); - if (file) tokenize(file); - free(file); - } - - return 0; -} diff --git a/modded.src b/modded.src deleted file mode 100644 index d1d8473..0000000 --- a/modded.src +++ /dev/null @@ -1,48 +0,0 @@ -require (fmt) -provide (main) - -let const_true bool = true -let const_false bool = false -let const_uint int = 123 -let const_string string = - -var var_true bool = true -var var_false bool = false -var var_uint int = 123 -var var_string string = - -type type_int = int -type type_intary = int[] -type type_intaryary = int[][] -type type_intptrary = int*[] -type type_intptr = int* -type type_intptrptr = int** -type type_struct = struct { - foo = int - bar = float -} -type type_union = union { - foo = int - bar = float -} - -fun main(args string[]) int { - let foo int = 123u - var bar int = 123 - {123} - 123 - (123) - foo() - bar(1) - baz(1,2) - if (123) {} - if 123 {} - if (123) {} else {} - if (123) {} else if (123) {} - if (123) {} else if (123) {} else {} - if 123 {} else if 123 {} else {} - fun main(args string[]) int { - 123 - } - foo.bar() -} diff --git a/source/ast.c b/source/ast.c index bc3aa53..7b4514d 100644 --- a/source/ast.c +++ b/source/ast.c @@ -92,12 +92,12 @@ AST* var_value(AST* var) { return var->value.var.value; } -bool var_const(AST* var) { +bool var_flagset(AST* var, int mask) { assert(var->nodetype == AST_VAR); - return (var->value.var.flags == SF_CONSTANT); + return ((var->value.var.flags & mask) == mask); } -AST* Func(AST* args, AST* body) +AST* Func(AST* args, AST* body, AST* type) { AST* node = ast(AST_FUNC); node->value.func.args = args; diff --git a/source/lex.c b/source/lex.c index 7a4c297..c150f94 100644 --- a/source/lex.c +++ b/source/lex.c @@ -201,7 +201,7 @@ void lex(Parser* ctx) { /* no more files left to process */ ctx->tok.type = T_END_FILE; return; - } else if (!*(ctx->file->fpos)) { + } else if (!(ctx->file->fpos) || !*(ctx->file->fpos)) { /* grab the next file to process */ LexFile* f = ctx->file; ctx->file = f->next; @@ -215,11 +215,16 @@ void lex(Parser* ctx) { } void lexprintpos(Parser* p, FILE* file, Tok* tok) { - size_t line = 1; + size_t line = 1, col = 1; char* data = p->file->fbeg; char* end = data + tok->offset; for (; *data && data < end; data++) { - if (*data == '\n') line++; + if (*data == '\n') { + line++; + col = 1; + } else { + col++; + } } - fprintf(file, "%s:%zu: ", tok->file, line); + fprintf(file, "%s:%zu:%zu:", p->file->path, line, col); } diff --git a/source/parser.c b/source/parser.c index f5e0df2..dd01fb2 100644 --- a/source/parser.c +++ b/source/parser.c @@ -107,7 +107,8 @@ static void definition_list(Parser* p) { } else { error(p, "only definitions are allowed at the top level"); } - pprint_tree(stdin, def, 0); + pprint_tree(stdout, def, 0); + puts(""); pkg_add_definition(&(p->pkg), def); } } @@ -131,24 +132,24 @@ static AST* type_definition(Parser* p) { return Var(str, NULL, type, SF_TYPEDEF); } -static AST* func_definition(Parser* p) { // TODO: Function AST nodes +static AST* func_definition(Parser* p) { expect(p, T_FUN); - char* str = strdup(expect_val(p, T_ID)->text); + char* name = strdup(expect_val(p, T_ID)->text); expect(p, '('); - if (!matches(p, ')')) { - while (true) { - expect(p, T_ID); - type_expression(p); - if (!matches(p, ')')) - expect(p, ','); - else - break; - } + AST* arglist = ExpList(); + while (!matches(p, ')')) { + char* argname = strdup(expect_val(p, T_ID)->text); + AST* type = type_expression(p); + AST* arg = Var(argname, NULL, type, SF_ARGUMENT); + explist_append(arglist, arg); + if (!matches(p, ')')) + expect(p, ','); } expect(p, ')'); AST* type = type_expression(p); AST* body = expression_block(p); - return Var(str, body, type, SF_CONSTANT); + AST* func = Func(arglist, body, type); + return Var(name, func, type, SF_CONSTANT); } static AST* expression(Parser* p) { diff --git a/source/pprint.c b/source/pprint.c index 969c2af..4630e2d 100644 --- a/source/pprint.c +++ b/source/pprint.c @@ -1,8 +1,8 @@ #include -static void print_indent(FILE* file, int depth) { - for(int i = 0; i < (2 * depth); i++) - fprintf(file, "%c", ' '); +static void indent(FILE* file, int depth) { + fprintf(file, "\n"); + if (depth) fprintf(file, "%*c", depth * 2, ' '); } static const char* token_type_to_string(int type) { @@ -110,31 +110,102 @@ static const char* tree_type_to_string(ASTType type) { static void pprint_literal(FILE* file, AST* tree, int depth) { - printf("%s:", tree_type_to_string(tree->nodetype)); + fprintf(file, "%s:", tree_type_to_string(tree->nodetype)); switch(tree->nodetype) { - case AST_STRING: printf("\"%s\"", string_value(tree)); break; - case AST_IDENT: printf("%s", ident_value(tree)); break; - case AST_CHAR: printf("%c", char_value(tree)); break; - case AST_INT: printf("%ld", integer_value(tree)); break; - case AST_FLOAT: printf("%lf", float_value(tree)); break; + case AST_STRING: fprintf(file, "\"%s\"", string_value(tree)); break; + case AST_IDENT: fprintf(file, "%s", ident_value(tree)); break; + case AST_CHAR: fprintf(file, "%c", char_value(tree)); break; + case AST_INT: fprintf(file, "%ld", integer_value(tree)); break; + case AST_FLOAT: fprintf(file, "%lf", float_value(tree)); break; case AST_BOOL: - printf("%s", bool_value(tree) ? "true" : "false"); + fprintf(file, "%s", bool_value(tree) ? "true" : "false"); break; - default: printf("???"); + default: fprintf(file, "???"); } } -void pprint_tree(FILE* file, AST* tree, int depth) -{ +static char* getvartype(AST* tree) { + if (var_flagset(tree, SF_CONSTANT)) + return "let"; + else if (var_flagset(tree, SF_TYPEDEF)) + return "typedef"; + else + return "var"; +} + +void pprint_fargs(FILE* file, AST* tree) { + size_t nargs = 0; + AST** args = explist_get(tree, &nargs); + fprintf(file, "("); + for (size_t i = 0; i < nargs; i++) { + fprintf(file, "("); + fprintf(file, "%s : type", var_name(args[i])); + fprintf(file, ") "); + } + fprintf(file, ")"); +} + +void pprint_block(FILE* file, AST* tree, int depth) { + if (!tree) return; + size_t nexprs = 0; + AST** exprs = explist_get(tree, &nexprs); + for (size_t i = 0; i < nexprs; i++) { + indent(file, depth); + pprint_tree(file, exprs[i], depth); + } +} + +void pprint_branch(FILE* file, AST* tree, int depth) { + indent(file, depth); + pprint_tree(file, tree, depth); +} + +void pprint_ifexpr(FILE* file, AST* tree, int depth) { + fprintf(file, "(if "); + pprint_tree(file, if_cond(tree), depth); + pprint_branch(file, if_then(tree), depth+1); + pprint_branch(file, if_else(tree), depth+1); + fprintf(file, ")"); +} + +void pprint_apply(FILE* file, AST* tree, int depth) { + fprintf(file, "(apply "); + pprint_tree(file, apply_func(tree), depth); + size_t nexprs = 0; + AST** exprs = explist_get(apply_args(tree), &nexprs); + for (size_t i = 0; i < nexprs; i++) { + indent(file, depth+1); + pprint_tree(file, exprs[i], depth+1); + } + fprintf(file, ")"); +} + +void pprint_tree(FILE* file, AST* tree, int depth) { if (tree == NULL) return; - print_indent(file, depth); switch (tree->nodetype) { case AST_VAR: - printf("(%s %s ", - (var_const(tree) ? "let" : "var"), - var_name(tree)); + fprintf(file, "(%s %s ", getvartype(tree), var_name(tree)); pprint_tree(file, var_value(tree), depth); - printf(")\n"); + fprintf(file, ")"); + break; + + case AST_FUNC: + pprint_fargs(file, func_args(tree)); + pprint_block(file, func_body(tree), depth+1); + break; + + case AST_EXPLIST: + fprintf(file, "(block"); + pprint_block(file, tree, depth+1); + fprintf(file, ")"); + break; + + case AST_IF: + pprint_ifexpr(file, tree, depth); + break; + + case AST_APPLY: + pprint_apply(file, tree, depth); break; default: diff --git a/source/sclpl.h b/source/sclpl.h index 6be6362..82b7d3e 100644 --- a/source/sclpl.h +++ b/source/sclpl.h @@ -74,6 +74,7 @@ bool types_equal(Type* type1, Type* type2); typedef enum { SF_TYPEDEF = (1 << 0), SF_CONSTANT = (1 << 1), + SF_ARGUMENT = (1 << 2), } SymFlags; typedef struct Sym { @@ -172,9 +173,9 @@ char* ident_value(AST* val); AST* Var(char* name, AST* value, AST* type, int flags); char* var_name(AST* var); AST* var_value(AST* var); -bool var_const(AST* var); +bool var_flagset(AST* var, int mask); -AST* Func(AST* args, AST* body); +AST* Func(AST* args, AST* body, AST* type); AST* func_args(AST* func); AST* func_body(AST* func); -- 2.54.0