From eaeaca156b0c1b8d2e514f378be81d8e88e5ef89 Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Wed, 20 Mar 2019 20:44:49 -0400 Subject: [PATCH] added api for lambda ast nodes --- a.out | Bin 0 -> 24048 bytes example.src2 | 480 + example.src3 | 4800 + example.src4 | 48000 +++++ example.src5 | 480000 ++++++++++++++++++++++++++++++++++++++++++++++ lexer.c | 136 + lexer2.c | 224 + modded.src | 48 + source/sclpl.h | 22 +- 9 files changed, 533708 insertions(+), 2 deletions(-) create mode 100755 a.out create mode 100644 example.src2 create mode 100644 example.src3 create mode 100644 example.src4 create mode 100644 example.src5 create mode 100644 lexer.c create mode 100644 lexer2.c create mode 100644 modded.src diff --git a/a.out b/a.out new file mode 100755 index 0000000000000000000000000000000000000000..5fb20dfeca1b29df6450fc55a526e9c93d8b943c GIT binary patch literal 24048 zcmb<-^>JfjWMqH=W(GS35N`t)M8p9?F?7uL?Ap!Kr%8gfy+!#z5t1cWM=X* zFfcPLg65S{sZ36g`ZXX)1_pS!2n+8GponH*VE6zn_h8}$Q1KNCsPO~iA5caWZxDu9 zZ~;xc6)OG!O}rZ_{sB$AA1cm(mM$hi#RbsBr$NOP(8T9K#SPHJmqEoH(8Sk5#RJg9 zw?M@c(8PB_#S75H_d&%Q(8P~G#c#+%!VOd=g33%ts4*}+KoW<=0Z8rzk~j}Y0E#~# zi9?MB%ltqRhgLCQaacYB)vciN7c9uYz#svQUv>rta5{pCD;{Q}um_SjCrAK_1CYd_O&73C1d=#3$%4fbki>bx zA`l`2Nt_QN1SSiR#QDJ@5TXJ}9G1sGG7U)Lf*=7X?m!YpF3&xh-*9+zvxbT(FnF{c zC}H}4!K3*I$6<#5rb%K74F6SA#1t6#&eX<)f%u@He`x^bGlBS^r20|;%>N~- zz>ooof|mkd{wEM06!b3{!2BRl1%|W6d1EYoXJGK? z<=xB%qHK-X7#RMGvWY4%eBqaOVPFXL=(SxTsledT%eoLmbw2g5{81w5(am~G1QOJ| zvHu^W>G8|AfQ)B|Jq!wE1mC0AHd9goWQ=JdRKMmwkAwf1Jvu*l9DHH#!FbH$;=d9J zk8ak5Py=~D2I4ZW!PZbxfq|h^-=o>KTtornz|N;7JRaS)QlbhBFL?j||Np|{|NsBT zSUDvX7#Km$X#hFwFuy#Q4)y4K8v8Qs|Ns9WmPg~83k(bl9-VtF{{R1<;?d2bssZM^ z{xCdnycHZF|Ns9#-Wu}%|9_A%$6I}%ERWu)p!fr|ix_r-Qldxa_0JxwJ&w0Vz*Ka% zf=t~7(zg#pcyt~ID}Q1B@BjZ!u<6ZvL3S|kcWeS#3o)`AEaTC6t@HScxvUHftq1r! zCWEBAT_SsWmU&jkk+IjrNoj?Eo!;I+m{gL8f%mWV664nH-ks#HIQeHAt^o5;L%yzffhF&$6Y}Y$KY|?^#(X-kGnpA(l4O& z2Qc03+5z=0h@SCMmze>fsXG)D4ju4NdEn9QIs>YFhDUDzqeo}x4E}8l{QC}gBwz68 zW$j~OVBp`z!o0B>OnnN9O`c&sehdXvK-AluS3DH zfMKJJQt1b<3tpQb>Ehpa!K0fsRTNaDn6|NiG9NToJpA?l|9((%exd&R|9?=-+yEu_ zmMT!#bcfCWr&L%1Uf|K~y1=9P03#^59RCGM`>roMxU&f_nVA+kF>I$dA9Y=_!}Qg*z6+V|mgw#RYT1K<>Q-1P*Q2Dz%!^?*mO>j9r$ z)){~P|KAC!O?QFn1&>}+F<}J;kPGjDEa`Mz;n7^Xf}w=hquX^ww*!X{VSRD?2U4z9=&XI?(OLT7 zxa${CM803Q-nqIOA};27>0=Gl4OG1M`{r&l$M2VB+Yg6Lq!&cB|W z-+VNmg4+fM|1!V0{{8=dkJh&(3Wm2~^@~sEFQ3kDE}ick|BKx5Xg3sQ-ao22GTa5w(E{m1bmMXJTMr0|90>W;O;E zIACJ~4?eNt1jNZh?HMR4HK#b0AuSP1mgX^JrZMECmM|0)<(Flqq%stx7M5lff#{Ob zqC8L!v^2Sdp`@rZm7%1vAeEssFEc-np)9e8!7VW}CpAT(BwwK*v8Xszp(H;$HBUiZ zHCdgD!Rhz^|8Bqj|IhjL|34_zuK)G_|NUS8|MUF*|6lR<|No5A%+?VM3=E(~+nlfe z|1V)+V3_mm|Nk=#3=C_&{r_LWz`(HW`~Uw>7#J9IfByf^!pOi7@$>(GQ0>9|_y2zv zMh1qOzyJTIFfuT_`TPI>8b$_&KY#!KKf=hs0PABgRs}IIRtPXk^RRPFU}P5niGv1s z48HyUuZHdvSpVPs%1`1Ak24M@O+Pa&O)kHZJl@&@ro z@n{H)hQMeDjE2By2#kinhzkMOI6iFL9@GQ^1puu8gK;hH-JVW85qD#c@TdA zR36k81@Zra2nGfQSThFH&H)J_k5Pm8pe7lJW`K^bLXClpj4~kFp`d|GkUDV?0h*J7 zIuIr<02=vZU|@g>GYCKpf(8o%%>Lj1A^w3W{RibgfO_mdln)Ev4^aL(sQgbTAJof* zI%Pm8*gQ76<=dbhJ_e<)LFs2u`WuvHgGQ7Xlvab%W>DGsLrQ4wNG$@Vk z4j3OcuLR>`&^Y^#7%E4!03#fX%wT|fAC@r@0_ZGI05LM4iz4_S`2okUc~w5Be_->e zpkY6d<6!fs98h`K{3$P#51Th-gYseXrL0gsY@QU<=LeYwn;+$Y%ERVGL4%GUdDwg? z%zoHBC?`}sZ2prQ%7@K+vP1dq&dyc}8t$oiCHeUZ=6c3@2D(MY3I&FGCYlf~$Py&1 z1D+EH2|!A0*t#X;F$It;41?+=CI&rNu?-W3)1baH6GJ2?tOkJ9htO(}0ahPEt1Y-b z7z5&-YPkLe=;U7!OaU4VQE$u*vk$hY1>_DirC=^doQYvJFHHR`XgLh6Ru~+h4u$c- znve)BM0kNi92Q=;k;-3CT^|nkDt2Z@4mMT>1{Qq=1_ov(_6tl549qO-wIGU-je`{=$ZEpMz`)AFnweL^ zzyexa!py;Xi;aPS3A7M|sV5oayfdr}3{1TsCL0GkM+e9-MmBzs!5}urP8J4+8_Zut zm>3v1|FbYKFnlVl$Mli>Ms~Thy6PV+`^$Dbf1vT-6J#0}V*)D!Lk3qE$TS{Ckb?`jM3@;Em<7O`60Tby86n0j z1_p)-u67VdjM18bfuVsb3}m!8qbCCcLlf6-kZBT(;v5VN&0KsS17sN0IT#o^xbA`U zs4|u^FfdHung-%%FeosLR2?uz)KZ23rV71E&$l(69zjZsy#~%D})D4w};8;QR<; zM}l+-aHg^`Ft9~~l7UAb14khn1A_ru5y;D&4j?N__`u%y4boN$Vsmi*0~uWj zGD3h;0i>oD#1`Rv3^J|}#FpUP1aeCAL>2}H1L z;M4>;qzB|C22N>^-+Mv&IUqsS2T~%yzy}Jk>nscmqM$U$AmG5R`2(aEG_Av~Jsliw zpde<~0Vxz<;BjDA6K7>$-~}tt04d;I!OFnEt_w1RfytDSfq`8QWQ5o{P+p(Kk-)^j zAi=xTBz_&lZeS1wl`29Yf`Od_Y@7}wg99adH`Ze zKZ*na#w& zAguwawd@)FIT#qEQ$gv<0m427s@fbOY*2JFyEC?ctpu4S?E*@@9$*n^ACT`n!E71O zViJyaPzICnU}0e3=m0YVKslik%=BetVBnMj#V=nE3j@PV4h9BU29|Ri3=A^s85tNj zr9mPxJ|IsZY}pJ93?R(Tz`;=navdmaxV%Ar0JA^@ z0|y7Foy5U031k|iS;Y-%O)zRCfQDIuIT#oiwZKeJslliNW~PHg^*~HE(6SW}23c~A zfq{XC`vyA$1ET?`q~vA;sWSvIdB1@~<3B)~ztB+>2F5vjMj$oLFh7Im!a!}!IiOiX zeoz~Q(HZ1qP@9$C39J_6ZXX5)2L8(+Go3-)8K9}OZjfx4Jp%&+Xl7DyBh)TckX8l; zhIxz(3__Uys4UKzHn;c~4Qc%1xf}6lhOyJfp6QuRa1c@~! zNF$hu3ET)~hBShq5zXSt#lXPS3aSHG(m~8NP~(qf4v5(fX6k~N9bg65K}&YKz@iU8 z%x%pCAQ`z|4st(dA%fK1g&2h{*&=1FUge3=9IyoS=RjTLdQq z12gv~ke}EZI2ah1c`k!HOsrox85kz8=$!^t+j3kC3@irV_+xto>L-|jB8G#l5yZ9y z&6NtU<%87NO$4nYWith_9ST76xNH|dY8*jp-4)oxLE_G!jHSX>!NI`5;tDcTgRKL^ z_FB#WYULaSsrTE(zR#yLJr3=CXpDGZEYc{@f12Ck&iGzLZ^ zki=$21_o|(XaWdgVqmaiVqoB53}rOrVGLpZ%oxhU7|8sYF@%8^)?tn{F@s%@QHUKJ^w6;7V_;y0xfT>WSb`oFEbJib zIAAfr3G+M`Brv#<-2rkgk0^t>Bm)Dl9)rCkB$m}BLAE_hNg9GvA`g;Jco9C~gGB;AikDa!7z7Xk zVE-`)u%V;`(8?Jm2GD*UM#kTKyx;*tkU|CqMm;_K;>zNb)PiFD3JbG%GZO}e;*!Lo z61`vsa0`Kx2{!z~Y{$qfpy#A)=c}SpTE;9;lEDNTdIa_Bm>8@;{Qz{c^-GJ3K%21i z)02~-rt20Y>q2&8rRSCEg9p#_GxL&jN>fr9^z`%#@{2Po^pi46ib1@b%%seGIIldp zI5(*n%+&|2ECMUZOwP|M1F401qBJkF0_3AIeXuoP;PfLq0VTeacyYZQM zC81t3?%=O$L5 znF!C6pj~kJDXGb2C7?`}U&Ig}pP5&dpPX2dnV%P*oL>rZH7Iw2O9`-P`K2&7KsDxP zGsLH5=A`E3gRKSoC^avIAwC|Q!Q)f&QsUE+OTdyK2ZH62ON-)@^K(jbA;FLgF$=6F zC$YE$v`wxg6(t1W<`shjBOa_`8g?|fG`5C)D#8K zxOhC=K}D&>r8$uBar5+XjW5V41zDStSds`1kn*C;l2k|{fd(l!IEz6^0h}h#4Thz) z_)=(6C{8R(g(i&n^5hJL^5l%fB2aAR*;AhW@zB(jnHSFxkDQ1~ z^1%t-+0YO&S)-tvYi6RLn_E(vmuj1ynwMIXnXI6j3))U;3)wNOpqmax{zeMAAY5FM zVw;{cl9`vTo10jYp`Z&Yl9ID^3yShfQj<&ai$I3Jv?d}; zDCnk@!By^09@FBk^(48rh%fG0iF;+>3|^~ zlqQlu3B4q>0Hha|wV?R}lE%Pk7UUgJYKNpvnCls!sUM#G5Q!(hv;>rTU?!&IgA-Ct zYF>It2G|d!d7w}PyEd&fhao<`EHS4P6#mKixk;IMsUSz^=BGd-B{?M?9PD|i6(DW- zX=!FA(7*wgJMh8}oZ=y24ll((T9ZoCAfg3{DW(kZrFo^rsVShOkK{tI4W9n-Nr}nX zr3J8jgQNiFd`L+S+FuOM>!5ZDXqRwNel9E_K}tbTVG<7sHk1+(k~$FXE=dHHd(ecE zlb;Bx2lA7$Qj<$S)i11yhBP$wq3sxO;Q}VWkrWS3Ux`IUiIt#ow5VjrXV(BvXWW@K zHLnb`Upp-`FEbw5(@1SB=x*-h;!@Ct?@UkxAyh-tLs2S5uq76ygUSR@^&bz?3sS(4 zSd?A{Db7>!OCcM(3sCez%mb-~ItL_BToPZDl3K=4QWl?AoC?~i9$yUFb`EMe43UBq zZzPnI#Y4T7T#}g!Zyv^j+Ca&PIXUsgp!69J+R7gfD(bTt$Zck&B;&B29()lvP{7OEXg-IziP81IEDx z@Ph>6L0(A)b&5bmKUfoH8#TG81k}(44b_1#s2d8R85u%hOi=#^A`}Z_f(FmtFrwy^FIOdX5{=>=gJAEs|UXgnNr4iI!m)D=p@)Dc57 zGcYp1&H+MJ4?61tv>E_e48&$;U}gXWP0~b7;ktLWJxEU5e!wprOhhYKIdK{P>GXpOJ zY`_R62BMi6_~7f3KztZxX5eRl&riVwL6d9@0`PN%kc1hS83Y*;K#P5l1;Oft;OBxM zOEEJD!_Ngl6&GQ^wyuFQJm_a-QW(MyDL-1iNW(M$_FN_PPnZZN%a4wv|%pebEBQuzxXHp@H zfkv5G7(RkG=_8AS#tE^BgOUd$13yCpET4fCLf1KCQx9?pHgS+$*u(=sb6KEO;862n z#fmye1A`Doyn{+vMh47ycZ8~kowEhzg3iMNGceO(I%tTB0W&?+g3lG=Wk8=$>;;R1 z4MQP7wFlJcDB@c{YrauLAe>A_(D_i9=?yg1DZn7kFadgA3q&;o189e{5SDy*9fx~f z;}8eUOA0aYG8jS^+-pNE;0H}!LfOca5)N?_9OA(^#M2o;XJujfw*-g!4o1*_R5lKnul?1)ywW&_ptW1j7cj_{{~IgD7ud@gf5@hnFD+8eibOpbQKQprvbq z4B`w2VC5Awe6*QB;VglXe$1ifutTS$!Fxs-7#M88;!wvSQGOtCCSLfVXR!Pj23iRR zvR4T;USdJ&k@Ulu`5^U7nCZD1hx!RjpnL(-ji6V7)$=m!f^N(J@2O>AVA#Th{XDNX z;PAw(2SIB@!Qloy=LwYGK(^cjxfAR8U#S(TMMiqb3~rf4#U;)eiABgm=HM2+9;j=E z*2{`_iS%>y^>l^}{2_M-9DU;b-Q0p*L*hdm zoqSwDqZRNDAE-|WKKRSQ0e0XQ=$J3)ab&21c+dVy1^4`mONw$*^U$<`y6DM>Bh3)9 zCJfoBmF1wr$iSzUAtXRf0F8E_ia`6g;NfiWiDjtLpiWqRGOAcSct8T=fFe-GtSC9R z09ApDr@Lne?8G!wsrY!5ILvQZJUY$GiHu z#K*(VfkRUOI@b;BVQrv+?|9HK4QMb6={z}9{objS<@rS^#h@9p)S@C(Nd~>*%G{E~ zBnCZDAcE-(7z;csSiqo{mtT^q=ji0ATauX0pqHLks+W;ioWY=%Qkhp=nG2yyiXbv& zsYS)00|UUypq%(32EC%xoJ5cYD61f+gh3A+_Id?5C3@-kB@B8cpkXyVaH`iU$_J-e zz0{2Ow4%h^)cA~)A_h1Qq7^d0!2s3)W2aQ+CFW)(Gk{D4narREF(;|Gm_aW&KQ}iu zuY^G_B{iuuJwCB0F)ux}7{&#UdBUWUl8RExU^1Y=VHmp@HJFo5PRL6e~%F3xj7!IGd=cQ6K&2K8{i z{r?Xt#=#=6{tt`>os$5PfYp=e-4ReX5maQu+z;y~!D!fiH}KLD@S-1(X$+iv6@JVdY zcoV}CexNl^pfmwmj0(~Z>wm*&*uF%N90-H#0?{yB25uiC?U#i1AL0E-kl`@J;I<8f z1KVfVh^`;hkzfFA41lPE`4_^2?J_~oU1<7Y{ZSa*1KUUmH2`KWl*<4*$PMH~nEzqp z4KNz&9hh>^!T>lM#Fz%P9~w>Y{w=JZy98u7NFfwMTSZVF1H(Kt{kYE8g=s(!f6zIZ zAS>YZL;K6Helw_@4ATpu(Zh2a0|Nu-a63>Q0?EVrMX>XC(bdEFF!~T^9VEC<4$_S{ zm-7R(YYI~bkwwHCO#B>ZwLg-6SU(+hzA@|^V^ID9nE}%Wi$~BwVIcp*+z;!|-vD(m z85kJA>+3-pp%|tgMuYlC$ok>^erQC2(g##4n1bnp(V$b(koCjH2Npo}e}Fj*tQ)iK z`V5*UVaj0VUQU1(oG^bvwPWbuYAE0v5z#7pPx}P5)X@M~H!e0kkUyrWZt`tCa<< zMFL5nVJGO2pxQy}hrmpjIiP+nj1886631O3 z3=LSugmB^dpTKv)nGJ^Vp-qgeq~Sl|Js7#yGp6_gG@!?{o{ ry#550Z)iaT*0>=6OfkGgGaO_eL +#include +#include +#include +#include +#include +#include + +typedef struct { + int type; + char* text; +} LexTok; + +typedef struct { + int type; + char* patt; + void (*actfn)(LexTok* tok); + regex_t regex; +} LexRule; + +enum { + T_NONE = 0, + T_REQUIRES = 256, T_PROVIDES, T_LET, T_VAR, T_FUN, T_TYPE, T_STRUCT, + T_UNION, T_RETURN, T_IF, T_ELSE, + T_STRING, T_ID, T_INT, + T_ERROR = -1 +}; + +LexRule rules[] = { + /* skip whitespace and comments */ + { .type = T_NONE, .patt = "[ \r\n\t]+" }, + { .type = T_NONE, .patt = "#.*\r?\n" }, + + /* punctuation definitions */ + { .type = '(' }, { .type = ')' }, + { .type = '[' }, { .type = ']' }, + { .type = '{' }, { .type = '}' }, + { .type = '.' }, { .type = ',' }, + { .type = '\''}, { .type = ':' }, + { .type = '&' }, { .type = '=' }, + { .type = ';' }, { .type = '*' }, + + /* keyword definitions */ + { .type = T_REQUIRES, .patt = "require" }, + { .type = T_PROVIDES, .patt = "provide" }, + { .type = T_LET, .patt = "let" }, + { .type = T_VAR, .patt = "var" }, + { .type = T_FUN, .patt = "fun" }, + { .type = T_TYPE, .patt = "type" }, + { .type = T_STRUCT, .patt = "struct" }, + { .type = T_UNION, .patt = "union" }, + { .type = T_RETURN, .patt = "return" }, + { .type = T_IF, .patt = "if" }, + { .type = T_ELSE, .patt = "else" }, + + /* value definitions */ + { .type = T_STRING, .patt = "\"([^\"]|\\\\\")*\"" }, + { .type = T_ID, .patt = "[a-zA-Z_][a-zA-Z0-9_]*" }, + { .type = T_INT, .patt = "[+-]?[0-9]+" }, +}; + +char* file_load(char* path) { + int fd = -1, nread = 0, length = 0; + struct stat sb = {0}; + char* contents = NULL; + if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0)) { + contents = calloc(sb.st_size + 1u, 1u); + while (sb.st_size && (nread = read(fd, contents+length, sb.st_size)) > 0) + length += nread, sb.st_size -= nread; + } + if (fd > 0) close(fd); + return contents; +} + +void settok(LexTok* tok, int type, char* data, size_t len) { + tok->type = type; + tok->text = realloc(tok->text, len+1); + tok->text[0] = '\0'; + strncat(tok->text, data, len); +} + +void token(char* data, size_t nrules, LexRule* rules, LexTok* tok) { + size_t sz = 0; + regmatch_t match; + tok->type = T_ERROR; + tok->text = calloc(1,1); + for (int i = 0; i < nrules; i++) { + if (*data == rules[i].type && sz < 1) + settok(tok, rules[i].type, data, 1u); + else if (rules[i].patt && !regexec(&(rules[i].regex), data, 1, &match, 0) && sz < match.rm_eo) + settok(tok, rules[i].type, data, match.rm_eo); + } +} + +void tokenize(char* data, size_t nrules, LexRule* rules) { + LexTok tok = {0}; + while (*data && tok.type != T_ERROR) { + token(data, nrules, rules, &tok); + if (tok.text) { + data += strlen(tok.text); + printf("{%ld-%ld} (%d '%s')\n", 0, strlen(tok.text), tok.type, tok.text); + } + free(tok.text), tok.text = NULL; + } + if (tok.type == T_ERROR) { + fprintf(stderr, "Failed tokenizing the file"); + exit(1); + } +} + +int main(int argc, char** argv) { + /* initialize the lexer rule sets */ + for (int i = 0; i < sizeof(rules)/sizeof(rules[0]); i++) { + if (!rules[i].patt) continue; + char* patt = calloc(strlen(rules[i].patt) + 2, 1u); + strcat(patt, "^"); + strcat(patt, rules[i].patt); + if (regcomp(&(rules[i].regex), patt, REG_EXTENDED) != 0) + { + fprintf(stderr, "Failed to initialize the lexer"); + return 1; + } + free(patt); + } + + /* read a file into memory */ + for (int i = 1; i < argc; i++) { + char* file = file_load(argv[i]); + if (file) { + tokenize(file, sizeof(rules)/sizeof(rules[0]), rules); + } + free(file); + } + + return 0; +} diff --git a/lexer2.c b/lexer2.c new file mode 100644 index 0000000..9119984 --- /dev/null +++ b/lexer2.c @@ -0,0 +1,224 @@ +#include +#include +#include +#include +#include +#include +#include + +typedef struct LexTok { + struct LexTok* next; + long type; + long offset; + char* text; + long long value; +} LexTok; + +typedef struct { + char* keyword; + int type; +} KeywordDef; + +enum { + T_NONE = 0, + T_STRING = 256, T_ID, T_INT, T_BOOL, + T_REQUIRES, T_PROVIDES, T_LET, T_VAR, T_FUN, T_TYPE, T_STRUCT, + T_UNION, T_RETURN, T_IF, T_ELSE, +}; + +static const char FirstChar[256] = { + /* Whitespace */ + [' '] = 1, ['\t'] = 1, ['\r'] = 1, ['\n'] = 1, + /* comment start */ + ['#'] = 2, + /* number or op */ + ['+'] = 3, ['-'] = 3, + /* number digits */ + ['0'] = 4, ['1'] = 4, ['2'] = 4, ['3'] = 4, ['4'] = 4, + ['5'] = 4, ['6'] = 4, ['7'] = 4, ['8'] = 4, ['9'] = 4, + /* alpha characters */ + ['A'] = 5, ['B'] = 5, ['C'] = 5, ['D'] = 5, ['E'] = 5, + ['F'] = 5, ['G'] = 5, ['H'] = 5, ['I'] = 5, ['J'] = 5, + ['K'] = 5, ['L'] = 5, ['M'] = 5, ['N'] = 5, ['O'] = 5, + ['P'] = 5, ['Q'] = 5, ['R'] = 5, ['S'] = 5, ['T'] = 5, + ['U'] = 5, ['V'] = 5, ['W'] = 5, ['X'] = 5, ['Y'] = 5, + ['Z'] = 5, ['a'] = 5, ['b'] = 5, ['c'] = 5, ['d'] = 5, + ['e'] = 5, ['f'] = 5, ['g'] = 5, ['h'] = 5, ['i'] = 5, + ['j'] = 5, ['k'] = 5, ['l'] = 5, ['m'] = 5, ['n'] = 5, + ['o'] = 5, ['p'] = 5, ['q'] = 5, ['r'] = 5, ['s'] = 5, + ['t'] = 5, ['u'] = 5, ['v'] = 5, ['w'] = 5, ['x'] = 5, + ['y'] = 5, ['z'] = 5, + /* punctuation */ + ['('] = 6, [')'] = 6, ['['] = 6, [']'] = 6, ['{'] = 6, ['}'] = 6, + ['.'] = 6, [','] = 6, [':'] = 6, ['&'] = 6, ['='] = 6, [';'] = 6, + ['*'] = 6, ['\''] = 6, + /* strings */ + ['"'] = 7 +}; + +char SPACE[256] = { + [' '] = 1, ['\t'] = 1, ['\r'] = 1, ['\n'] = 1, +}; + +char DIGIT[256] = { + ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, + ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, +}; + +char ALNUM_[256] = { + ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, + ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, + ['A'] = 1, ['B'] = 1, ['C'] = 1, ['D'] = 1, ['E'] = 1, + ['F'] = 1, ['G'] = 1, ['H'] = 1, ['I'] = 1, ['J'] = 1, + ['K'] = 1, ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1, + ['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1, ['T'] = 1, + ['U'] = 1, ['V'] = 1, ['W'] = 1, ['X'] = 1, ['Y'] = 1, + ['Z'] = 1, ['a'] = 1, ['b'] = 1, ['c'] = 1, ['d'] = 1, + ['e'] = 1, ['f'] = 1, ['g'] = 1, ['h'] = 1, ['i'] = 1, + ['j'] = 1, ['k'] = 1, ['l'] = 1, ['m'] = 1, ['n'] = 1, + ['o'] = 1, ['p'] = 1, ['q'] = 1, ['r'] = 1, ['s'] = 1, + ['t'] = 1, ['u'] = 1, ['v'] = 1, ['w'] = 1, ['x'] = 1, + ['y'] = 1, ['z'] = 1, ['_'] = 1, +}; + +#define NUM_KEYWORDS (sizeof(Keywords) / sizeof(Keywords[0])) +KeywordDef Keywords[] = { + { "else", T_ELSE }, + { "false", T_BOOL }, + { "fun", T_FUN }, + { "if", T_IF }, + { "let", T_LET }, + { "provide", T_PROVIDES }, + { "require", T_REQUIRES }, + { "return", T_RETURN }, + { "struct", T_STRUCT }, + { "true", T_BOOL }, + { "type", T_TYPE }, + { "union", T_UNION }, + { "var", T_VAR }, +}; + +char* file_load(char* path) { + int fd = -1, nread = 0, length = 0; + struct stat sb = {0}; + char* contents = NULL; + if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0)) { + contents = calloc(sb.st_size + 1u, 1u); + while (sb.st_size && (nread = read(fd, contents+length, sb.st_size)) > 0) + length += nread, sb.st_size -= nread; + } + if (fd > 0) close(fd); + return contents; +} + +int keywcmp(const void* a, const void* b) { + return strcmp(((KeywordDef*)a)->keyword, ((KeywordDef*)b)->keyword); +} + +void tokenize(char* data) { +// LexTok *toklist = NULL; +// LexTok **lasttok = &toklist; + LexTok tok = {0}; + char *beg = data, *curr = data; + while (*curr) { + tok.type = T_NONE; + tok.offset = (beg - data); + beg = curr; + switch (FirstChar[*curr++]) { + case 1: /* skip whitespace */ + for (; SPACE[*curr]; curr++); + break; + + case 2: /* skip comments */ + for (; *curr != '\n'; curr++); + break; + + case 3: /* +/- as ops or number signs */ + tok.type = *(curr-1); + if (!DIGIT[*curr]) break; + /* fallthrough to number parsing */ + + case 4: + tok.type = T_INT; + for (; DIGIT[*curr]; curr++); + break; + + case 5: + tok.type = T_ID; + for (; ALNUM_[*curr]; curr++); + break; + + case 6: /* single char tokens */ + tok.type = *(curr-1); + break; + + case 7: /* string parsing */ + tok.type = T_STRING; + for (; *curr != '"'; curr++); + curr++; + break; + + case 0: /* error handling */ + default: + fprintf(stderr, "Failed to parse token '%c'\n", *(curr-1)); + exit(1); + } + + if (tok.type) { + size_t sz = (curr - beg); + tok.text = malloc(sz+1); + tok.text[sz] = '\0'; + strncpy(tok.text, beg, sz); + + /* perform value conversions */ + switch (tok.type) { + case T_STRING: { + size_t len = strlen(tok.text+1); + char* strtext = malloc(len); + strncpy(strtext, tok.text+1, len); + strtext[len-1] = '\0'; + free(tok.text), tok.text = strtext; + break; + } + + case T_INT: { + tok.value = strtol(tok.text, NULL, 0); + break; + } + + case T_ID: { + KeywordDef key = { .keyword = tok.text }; + KeywordDef* match = bsearch( + &key, Keywords, NUM_KEYWORDS, sizeof(KeywordDef), keywcmp); + if (match) tok.type = match->type; + break; + } + + case T_BOOL: { + tok.value = (tok.text[0] == 't'); + break; + } + } + +// *lasttok = malloc(sizeof(LexTok)); +// *(*lasttok) = tok; +// lasttok = &((*lasttok)->next); + } + } + +// while (toklist) { +// printf("{%ld-%ld} (%d '%s')\n", 0, 0, toklist->type, toklist->text); +// toklist = toklist->next; +// } +} + +int main(int argc, char** argv) { + /* read a file into memory and tokenize it */ + for (int i = 1; i < argc; i++) { + char* file = file_load(argv[i]); + if (file) tokenize(file); + free(file); + } + + return 0; +} diff --git a/modded.src b/modded.src new file mode 100644 index 0000000..d1d8473 --- /dev/null +++ b/modded.src @@ -0,0 +1,48 @@ +require (fmt) +provide (main) + +let const_true bool = true +let const_false bool = false +let const_uint int = 123 +let const_string string = + +var var_true bool = true +var var_false bool = false +var var_uint int = 123 +var var_string string = + +type type_int = int +type type_intary = int[] +type type_intaryary = int[][] +type type_intptrary = int*[] +type type_intptr = int* +type type_intptrptr = int** +type type_struct = struct { + foo = int + bar = float +} +type type_union = union { + foo = int + bar = float +} + +fun main(args string[]) int { + let foo int = 123u + var bar int = 123 + {123} + 123 + (123) + foo() + bar(1) + baz(1,2) + if (123) {} + if 123 {} + if (123) {} else {} + if (123) {} else if (123) {} + if (123) {} else if (123) {} else {} + if 123 {} else if 123 {} else {} + fun main(args string[]) int { + 123 + } + foo.bar() +} diff --git a/source/sclpl.h b/source/sclpl.h index a380ebc..377af02 100644 --- a/source/sclpl.h +++ b/source/sclpl.h @@ -94,7 +94,8 @@ Sym* sym_get(SymTable* syms, char* name); /* AST Types *****************************************************************************/ typedef enum { - AST_VAR, AST_STRING, AST_SYMBOL, AST_CHAR, AST_INT, AST_FLOAT, AST_BOOL, AST_IDENT + AST_VAR, AST_FUNC, AST_EXPLIST, AST_STRING, AST_SYMBOL, AST_CHAR, AST_INT, + AST_FLOAT, AST_BOOL, AST_IDENT } ASTType; typedef struct AST { @@ -106,8 +107,18 @@ typedef struct AST { char* name; int flags; struct AST* value; - struct AST* type; + struct AST* type; // TODO: This should go away in favor of ->datatype } var; + /* Lambda Node */ + struct { + struct AST* args; + struct AST* body; + } func; + /* Expression Block Node */ + struct { + size_t nexprs; + struct AST** exprs; + } explist; /* String, Symbol, Identifier */ char* text; /* Character */ @@ -151,6 +162,13 @@ char* var_name(AST* var); AST* var_value(AST* var); bool var_const(AST* var); +AST* Func(AST* args, AST* body); +AST* func_args(AST* func); +AST* func_body(AST* func); + +AST* ExpList(size_t nexprs, AST** exprs); +AST** explist_get(size_t* nexprs); + /* Package Definition *****************************************************************************/ typedef struct Require { -- 2.54.0