1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
|
<HTML>
<HEAD>
<TITLE>Computer Science Logo Style vol 2 ch 6: Example: BASIC Compiler</TITLE>
</HEAD>
<BODY>
<CITE>Computer Science Logo Style</CITE> volume 2:
<CITE>Advanced Techniques</CITE> 2/e Copyright (C) 1997 MIT
<H1>Example: BASIC Compiler</H1>
<TABLE width="100%"><TR><TD>
<IMG SRC="../csls2.jpg" ALT="cover photo">
<TD><TABLE>
<TR><TD align="right"><CITE><A HREF="http://www.cs.berkeley.edu/~bh/">Brian
Harvey</A><BR>University of California, Berkeley</CITE>
<TR><TD align="right"><BR>
<TR><TD align="right"><A HREF="../pdf/v2ch06.pdf">Download PDF version</A>
<TR><TD align="right"><A HREF="../v2-toc2.html">Back to Table of Contents</A>
<TR><TD align="right"><A HREF="../v2ch5/v2ch5.html"><STRONG>BACK</STRONG></A>
chapter thread <A HREF="../v2ch7/v2ch7.html"><STRONG>NEXT</STRONG></A>
<TR><TD align="right"><A HREF="https://mitpress.mit.edu/books/computer-science-logo-style-second-edition-volume-2">MIT
Press web page for <CITE>Computer Science Logo Style</CITE></A>
</TABLE></TABLE>
<HR>
<P>Program file for this chapter: <A HREF="basic.lg"><CODE>basic</CODE></A>
<P>
The BASIC programming language was designed by John Kemeny
and Thomas Kurtz in the late 1960s. (The name is an acronym for
Beginner's All-purpose Symbolic Instruction Code.) It was first implemented
on a large, central computer facility at Dartmouth; the designers' goal was
to have a language that all students could use for simple problems, in
contrast to the arcane programming languages used by most experts at that
time.
<P>
A decade later, when the microcomputer was invented, BASIC took on a new
importance. Kemeny and Kurtz designed a simple language for the sake of
the users, but that simplicity also made the language easy for the
<EM>computer!</EM> Every programming language requires a computer program to
translate it into instructions that the computer can carry out. For example,
the Logo programs you write are translated by a Logo interpreter. But Logo
is a relatively complex language, and a Logo interpreter is a pretty big
program. The first microcomputers had only a few thousand bytes of memory.
(Today's home computers, by contrast, have several million bytes.) Those
early personal computers couldn't handle Logo, but it was possible to write
a BASIC interpreter that would fit them. As a result, BASIC became the
near-universal language for amateur computer enthusiasts in the late 1970s
and early 1980s.
<P>
Today's personal computers come with translators for a wide variety of
programming languages, and also with software packages that enable many
people to accomplish their computing tasks without writing programs
of their own at all. BASIC is much less widely used today, although
it has served as the core for Microsoft's "Visual Basic" language.
<P>
In this chapter, I want to show how Logo's <CODE>define</CODE> command can be
used in a program-writing program. My program will translate BASIC
programs into Logo programs. I chose BASIC for the same reason the
early microcomputers used it: It's a small language and the translator
is relatively easy to write. (Kemeny and Kurtz, the designers of BASIC,
have criticized the microcomputer implementations as <EM>too</EM> simple and
as unfaithful to their original goals. My implementation will share that
defect, to make the project easier. Don't use this version as a basis on
which to judge the language! For that you should investigate True Basic,
the version that Kemeny and Kurtz wrote themselves for personal computers.)
<P>
Here's a typical short BASIC program:
<PRE>
10 print "Table of Squares"
20 print
30 print "How many values would you like?"
40 input num
50 for i=1 to num
60 print i, i*i
70 next i
80 end
</PRE>
<P>
And here's what happens when we run it:
<PRE>
Table of Squares
How many values would you like?
<U>5</U>
1 1
2 4
3 9
4 16
5 25
</PRE>
<H2>A Short Course in BASIC</H2>
<P>
Each line in the sample BASIC program begins with a <EM>line number.</EM>
These numbers are used for program editing. Instead of the modern screen
editors with which you're familiar, the early versions of BASIC had a very
primitive editing facility; you could replace a line by typing a new line
with the same number. There was no way to replace less than an entire line.
To delete a line completely, you'd enter a line containing just the number.
The reason the line numbers in this program are multiples of ten is to leave
room for inserting new lines. For example, I could say
<PRE>
75 print "Have a nice day."
</PRE>
<P>
to insert a new line between lines 70 and 80.
(By the way, the earliest versions of Logo used a similar line numbering
system, except that each Logo procedure was separately numbered. The editing
technique isn't really part of the language design; early systems used
"line editors" because they had typewriter-like paper terminals instead
of today's display screens. I'm using a line editor in this project because
it's easy to implement!)
<P>
The BASIC language consists of one or two dozen commands, depending on the
version used. My BASIC dialect understands only these ten commands:
<PRE>
LET variable = value
PRINT values
INPUT variables
FOR variable = value TO value
NEXT variable
IF value THEN command
GOTO linenumber
GOSUB linenumber
RETURN
END
</PRE>
<P>
Unlike Logo procedure calls, which consist of the procedure
name followed by inputs in a uniform format, each BASIC command has its
own format, sometimes including internal separators such as the equal sign
and the word <CODE>to</CODE> in the <CODE>for</CODE> command format, or the word <CODE>then</CODE>
in the <CODE>if</CODE> command format.
<P>
In some versions of BASIC, including this one, a single line can contain
more than one command, if the commands are separated with colons. Thus
the same program shown earlier could also be written this way:
<PRE>
10 print "Table of Squares":print
30 print "How many values would you like?":input num
50 for i=1 to num : print i, i*i : next i
80 end
</PRE>
<P>
The <CODE>let</CODE> command assigns a value to a variable, like Logo's
<CODE>make</CODE> procedure. Unlike Logo, BASIC does not have the rule that
all inputs are evaluated before applying the command. In particular, the
word after <CODE>let</CODE> must be the name of the variable, not an
expression whose value is the name. Therefore the name is not quoted.
Also, a variable can't have the same name as a procedure, so there is no
need for anything like Logo's use of the colon to indicate a variable
value. (This restricted version of BASIC doesn't have named procedures at
all, like some early microcomputer versions.)
<PRE>
make "x :y + 3 <EM>(Logo)</EM>
let x = y + 3 <EM>(BASIC)</EM>
</PRE>
<P>
In my subset of BASIC, the value of a variable must be a number.
More complete BASIC dialects include string variables (like words in Logo)
and arrays (like Logo's arrays).
<P>
The value to be assigned to a variable can be computed using an arithmetic
expression made up of variables, numbers, the arithmetic operators
<CODE>+</CODE>, <CODE>-</CODE>, <CODE>*</CODE>, and <CODE>/</CODE>, and
parentheses for grouping.
<P>
The <CODE>print</CODE> command is similar to Logo's print procedure in that it
prints a line on the screen. That line can include any number of values.
Here is an example <CODE>print</CODE> command:
<PRE>
print "x = "; x, "y = "; y, "sum = "; x+y
</PRE>
<P>
In this example two kinds of values are printed: arithmetic
values (as in the <CODE>let</CODE> command) and strings. A <EM>string</EM> is any
sequence of characters surrounded by quotation marks.
<P>
Notice that the values in this example are separated by punctuation marks,
either commas or semicolons. When a semicolon is used, the two values are
printed right next to each other, with no space between them. (That's why
each of the strings in this example ends with a space.) When a comma is
used, BASIC prints a tab character between the two values, so that values on
different lines will line up to form columns. (Look again at the table of
squares example at the beginning of this chapter.)
<P>
The <CODE>input</CODE> command is the opposite of <CODE>print</CODE>; it reads values from
the keyboard and assigns them to variables. There is nothing in Logo exactly
like <CODE>input</CODE>. Instead, Logo has <EM>operations</EM> <CODE>readword</CODE> and
<CODE>readlist</CODE> that output the contents of a line; those values can be
assigned to variables using <CODE>make</CODE> or can be used in some other way.
The Logo approach is more flexible, but the early versions of BASIC didn't
have anything like Logo's operations. The <CODE>input</CODE> command will also
accept a string in quotation marks before its list of variables; that string
is printed as a prompt before BASIC reads from the keyboard. (BASIC does not
start a new line after printing the prompt, so the effect is like Logo's
<CODE>type</CODE> command rather than like <CODE>print</CODE>.) Here's an example:
<PRE>
input "Please enter x and y: " x,y
</PRE>
<P>
The user can type the values for x and y on the same line,
separated by spaces, or on separate lines. BASIC keeps reading lines until
it has collected enough numbers for the listed variables. Notice that the
variable names in the <CODE>input</CODE> command must be separated by commas, not
by semicolons.
<P>
The <CODE>for</CODE> and <CODE>next</CODE> commands work together to provide
a numeric iteration capability like Berkeley Logo's <CODE>for</CODE>
procedure. The <CODE>for</CODE> command format includes a variable name, a
starting value, and an ending value. (The step value is always 1.) The
named variable is given the specified starting value. If that value is less
than the ending value, then all of the commands between the <CODE>for</CODE>
command and the matching <CODE>next</CODE> command (the one with the same
named variable) are carried out. Then the variable is increased by 1, and
the process continues until the ending value is reached. <CODE>For</CODE>
and <CODE>next</CODE> pairs with different variables can be nested:
<PRE>
10 input "Input size: " num
20 for i = 1 to num
30 for j = i to num
40 print i;" ";j
50 next j:next i
60 end
Input size: <U>4</U>
1 1
1 2
1 3
1 4
2 2
2 3
2 4
3 3
3 4
4 4
</PRE>
<P>
Notice that the <CODE>next j</CODE> must come before the <CODE>next i</CODE> so
that the <CODE>for</CODE>/<CODE>next</CODE> pairs are properly nested.
<P>
The <CODE>if</CODE> command allows conditional execution, much like Logo's
<CODE>if</CODE> command, but with a different notation. Instead of taking
an instruction list as an input, BASIC's <CODE>if</CODE> uses the keyword
<CODE>then</CODE> to introduce a single conditional command. (If you want
to make more than one command conditional, you must combine <CODE>if</CODE>
with <CODE>goto</CODE>, described next.) The value that controls the
<CODE>if</CODE> must be computed using one of the operators <CODE>=</CODE>,
<CODE><</CODE>, or <CODE>></CODE> for numeric comparison.*
<SMALL><BLOCKQUOTE><SMALL><SUP>*</SUP>Notice that the equal sign has two
meanings in BASIC. In the <CODE>let</CODE> command, it's like Logo's
<CODE>make</CODE>; in the <CODE>if</CODE> command, it's like Logo's
<CODE>equalp</CODE>. In the early 1980s, Logo enthusiasts had fierce
arguments with BASIC fans, and this sort of notational inconsistency was one
of the things that drove us crazy! (More serious concerns were the lack of
operations and of recursion in the microcomputer versions of
BASIC.)</SMALL></BLOCKQUOTE></SMALL>
<P>
The <CODE>goto</CODE> command transfers control to the beginning of a command
line specified by its line number. It can be used with <CODE>if</CODE> to make a
sequence of commands conditional:
<PRE>
10 input x
20 if x > 0 then goto 100
30 print "x is negative."
40 print "x = "; x
50 goto 200
100 print "x is positive."
200 end
</PRE>
<P>
The <CODE>gosub</CODE> and <CODE>return</CODE> commands provide a rudimentary procedure
calling mechanism. I call it "rudimentary" because the procedures have no
inputs, and can only be commands, not operations. Also, the command lines
that make up the procedure are also part of the main program, so you
generally need a <CODE>goto</CODE> in the main program to skip over them:
<PRE>
10 let x=7
20 gosub 100
30 let x=9
40 gosub 100
50 goto 200
100 print x, x*x
110 return
200 end
</PRE>
<P>
Finally, the <CODE>end</CODE> command ends the program. There must be an <CODE>end</CODE>
at the end of a BASIC program, and there should not be one anywhere else.
(In this implementation of BASIC, an <CODE>end</CODE> stops the BASIC program even
if there are more lines after it. It's roughly equivalent to a <CODE>throw</CODE>
to <CODE>toplevel</CODE> in Logo.)
<H2>Using the BASIC Translator</H2>
<P>
To start the translator, run the Logo procedure <CODE>basic</CODE> with no inputs.
You will then see the BASIC prompt, which is the word <CODE>READY</CODE> on a line
by itself.
<P>
At the prompt you can do either of two things. If you type a line starting
with a line number, that line will be entered into your BASIC program. It
is inserted in order by line number. Any previous line with the same number
will be deleted. If the line you type contains <EM>only</EM> a line number,
then the line in the program with that number will be deleted.
<P>
If your line does not start with a number, then it is taken as an
<EM>immediate</EM> command, not as part of the program. This version of
BASIC recognizes only three immediate commands: The word <CODE>run</CODE>
means to run your program, starting from the smallest line number. The word
<CODE>list</CODE> means to print out a listing of the program's lines, in
numeric order. The word <CODE>exit</CODE> returns to the Logo prompt.
<H2>Overview of the Implementation</H2>
<P>
There are two kinds of translators for programming languages: compilers and
interpreters. The difference is that a compiler translates one language
(the <EM>source</EM> language) into another (the <EM>target</EM> language),
leaving the result around so that it can be run repeatedly without being
translated again. An interpreter translates each little piece of source
language into one action in the target language and runs the result, but
does not preserve a complete translated program in the target language.
<P>
Ordinarily, the target language for both compilers and interpreters is
the "native" language of the particular computer you're using, the
language that is wired into the computer hardware. This
<EM>machine language</EM> is the only form in which a program can
actually be run. The BASIC compiler in this chapter is quite unrealistic
in that it uses Logo as the target language, which means that the program
must go through <EM>another</EM> translation, from Logo to machine
language, before it can actually be run. For our purposes, there are
three advantages to using Logo as the target language. First, every kind
of computer has its own machine language, so I'd have to write several
versions of the compiler to satisfy everyone if I compiled BASIC into
machine language. Second, I know you know Logo, so you can understand
the resulting program, whereas you might not be familiar with any machine
language. Third, this approach allows me to
cheat by leaving out a lot of the complexity of a real compiler. Logo is
a "high level" language, which means that it takes care of many details
for us, such as the allocation of specific locations in the computer's
memory to hold each piece of information used by the program. In order to
compile into machine language, I'd have to pay attention to those details.
<P>
Why would anyone want an interpreter, if the compiler translates the program
once and for all, while the interpreter requires retranslation every time
a command is carried out? One reason is that an interpreter is easier to
write, because (just as in the case of a compiler with Logo as the target
language) many of the details can be left out. Another reason is that
traditional compilers work using a <EM>batch</EM> method, which means that you
must first write the entire program with a text editor, then run the
compiler to translate the program into machine language, and finally run
the program. This is okay for a working program that is used often, but not
recompiled often. But when you're creating a program in the first place,
there is a debugging process that requires frequent modifications to the
source language program. If each modification requires a complete
recompilation, the debugging is slow and frustrating. That's why
interpreted languages are often used for teaching--when you're learning
to program, you spend much more time debugging a program than running the
final version.
<P>
The best of both worlds is an <EM>incremental compiler,</EM> a
compiler that can recompile only the changed part when a small change is
made to a large program. For example, Object Logo is a commercial version
of Logo for the Macintosh in which each procedure is compiled when it is
defined. Modifying a procedure requires recompiling that procedure, but not
recompiling the others. Object Logo behaves like an interpreter, because the
user doesn't have to ask explicitly for a procedure to be compiled, but
programs run faster in Object Logo than in most other versions because each
procedure is translated only once, rather than on every invocation.
<P>
The BASIC translator in this chapter is an incremental compiler. Each
numbered line is compiled into a Logo procedure as soon as it is typed in.
If the line number is <CODE>40</CODE> then the resulting procedure will be
named <CODE>basic%40</CODE>. The last step in each of these procedures is
to invoke the procedure for the next line. The compiler maintains a list of
all the currently existing line numbers, in order, so the <CODE>run</CODE>
command is implemented by saying
<PRE>
run (list (word "basic% first :linenumbers))
</PRE>
<P>
Actually, what I just said about each procedure ending with an invocation
of the next one is slightly simplified. Suppose the BASIC program starts
<PRE>
10 let x=3
20 let y=9
30 ...
</PRE>
<P>
and we translate that into
<PRE>
to basic%10
make "%x 3
basic%20
end
to basic%20
make "%y 9
basic%30
end
</PRE>
<P>
Then what happens if the user adds a new line numbered 15? We
would have to recompile line 10 to invoke <CODE>basic%15</CODE> instead of
<CODE>basic%20</CODE>. To avoid that, each line is compiled in a way that
defers the choice of the next line until the program is actually run:
<PRE>
to basic%10
make "%x 3
nextline 10
end
to basic%20
make "%y 9
nextline 20
end
</PRE>
<P>
This solution depends on a procedure <CODE>nextline</CODE> that finds
the next available line number after its argument:
<PRE>
to nextline :num
make "target member :num :linenumbers
if not emptyp :target [make "target butfirst :target]
if not emptyp :target [run (list (word "basic% first :target))]
end
</PRE>
<P>
<CODE>Nextline</CODE> uses the Berkeley Logo primitive <CODE>member</CODE>,
which is like the predicate <CODE>memberp</CODE> except that if the first
input is found as a member of the second, instead of giving
<CODE>true</CODE> as its output, it gives the portion of the second input
starting with the first input:
<PRE>
? <U>show member "the [when in the course of human events]</U>
[the course of human events]
</PRE>
<P>
If the first input is not a member of the second, <CODE>member</CODE>
outputs an empty word or list, depending on the type of the second input.
<P>
The two separate <CODE>emptyp</CODE> tests are used instead of a single <CODE>if</CODE>
because the desired line number might not be in the list at all, or it might
be the last one in the list, in which case the <CODE>butfirst</CODE> invocation
will output an empty list. (Neither of these cases should arise. The first
means that we're running a line that doesn't exist, and the second means
that the BASIC program doesn't end with an <CODE>end</CODE> line. But the procedure
tries to avoid disaster even in these cases.)
<P>
Look again at the definition of <CODE>basic%10</CODE>. You'll see that the
variable named <CODE>x</CODE> in the BASIC program is named <CODE>%x</CODE> in the
Logo translation. The compiler uses this renaming technique to ensure that
the names of variables and procedures in the compiled program don't conflict
with names used in the compiler itself. For example, the compiler uses a
variable named <CODE>linenumbers</CODE> whose value is the list of line numbers.
What if someone writes a BASIC program that says
<PRE>
10 let linenumbers = 100
</PRE>
<P>
This won't be a problem because in the Logo translation, that
variable will be named <CODE>%linenumbers</CODE>.
<P>
The compiler can be divided conceptually into four parts:
<UL>
<LI>The <EM>reader</EM> divides the characters that the user types into
meaningful units. For example, it recognizes that <CODE>let</CODE> is a single
word, but <CODE>x+1</CODE> should be understood as three separate words.
<LI>The <EM>parser</EM> recognizes the form of each of the ten BASIC
commands that this dialect understands. For example, if a command
starts with <CODE>if</CODE>, the parser expects an expression followed by the word
<CODE>then</CODE> and another command.
<LI>The <EM>code generator</EM> constructs the actual translation of
each BASIC command into one or more Logo instructions.
<LI>The <EM>runtime library</EM> contains procedures that are used while the
translated program is running, rather than during the compilation process.
The <CODE>nextline</CODE> procedure discussed earlier is an example.
</UL>
<P>
Real compilers have the same structure, except of course that the code
generator produces machine language instructions rather than Logo
instructions. Also, a professional compiler will include an
<EM>optimizer</EM> that looks for ways to make the compiled program as
efficient as possible.
<H2>The Reader</H2>
<P>
A <EM>reader</EM> is a program that reads a bunch of characters
(typically one line, although not in every language) and divides those
characters into meaningful units. For example, every Logo implementation
includes a reader that interprets square brackets as indications of
list grouping. But some of the rules followed by the Logo reader differ
among implementations. For example, can the hyphen character (<CODE>-</CODE>) be
part of a larger word, or is it always a word by itself? In a context in
which it means subtraction, we'd like it to be a word by itself. For
example, when you say
<PRE>
print :x-3
</PRE>
<P>
as a Logo instruction, you mean to print three less than the
value of the variable named <CODE>x</CODE>, not to print the value of a variable
whose name is the three-letter word <CODE>x-3</CODE>! On the other hand, if you
have a list of telephone numbers like this:
<PRE>
make "phones [555-2368 555-9827 555-8311]
</PRE>
<P>
you'd like the <CODE>first</CODE> of that list to be an entire
phone number, the word <CODE>555-2368</CODE>, not just <CODE>555</CODE>. Some Logo
implementations treat every hyphen as a word by itself; some treat
every hyphen just like a letter, and require that you put spaces around
a minus sign if you mean subtraction. Other implementations, including
Berkeley Logo, use a more complicated rule in which the status of the
hyphen depends on the context in which it appears, so that both of the
examples in this paragraph work as desired.
<P>
In any case, Logo's reader follows rules that are not appropriate for BASIC.
For example, the colon (<CODE>:</CODE>) is a delimiter in BASIC, so it should be
treated as a word by itself; in Logo, the colon is paired with the variable
name that follows it. In both languages, the quotation mark (<CODE>"</CODE>) is
used to mark quoted text, but in Logo it comes only at the beginning of
a word, and the quoted text ends at the next space character, whereas in
BASIC the quoted text continues until a second, matching quotation mark.
For these and other reasons, it's desirable to have
a BASIC-specific reader for use in this project.
<P>
The rules of the BASIC reader are pretty simple. Each invocation of
<CODE>basicread</CODE> reads one line from the keyboard, ending with the Return
or Enter character. Within that line, space characters separate words
but are not part of any word. A quotation mark begins a quoted word that
includes everything up to and including the next matching quotation mark.
Certain characters form words by themselves:
<PRE>
+ - * / = < > ( ) , ; :
</PRE>
<P>
All other characters are treated like letters; that is, they
can be part of multi-character words.
<PRE>
? <U>show basicread</U>
<U>30 print x;y;"foo,baz",z:print hello+4</U>
[30 print x ; y ; "foo,baz" , z : print hello + 4]
</PRE>
<P>
Notice that the comma inside the quotation marks is not made
into a separate word by <CODE>basicread</CODE>. The other punctuation characters,
however, appear in the output sentence as one-character words.
<P>
<CODE>Basicread</CODE> uses the Logo primitive <CODE>readword</CODE> to read a line.
<CODE>Readword</CODE> can be thought of as a reader with one trivial rule: The
only special character is the one that ends a line. Everything else is
considered as part of a single long word. <CODE>Basicread</CODE> examines that
long word character by character, looking for delimiters, and accumulating
a sentence of words separated according to the BASIC rules. The
implementation of <CODE>basicread</CODE> is straightforward; you can read the
procedures at the end of this chapter if you're interested. For now,
I'll just take it for granted and go on to discuss the more interesting
parts of the BASIC compiler.
<H2>The Parser</H2>
<P>
The <EM>parser</EM> is the part of a compiler that figures out the
structure of each piece of the source program. For example, if the BASIC
compiler sees the command
<PRE>
let x = ( 3 * y ) + 7
</PRE>
<P>
it must recognize that this is a <CODE>let</CODE> command, which
must follow the pattern
<PRE>
LET variable = value
</PRE>
<P>
and therefore <CODE>x</CODE> must be the name of a variable, while
<CODE>( 3 * y ) + 7</CODE> must be an expression representing a value. The
expression must be further parsed into its component pieces. Both the
variable name and the expression must be translated into the form they
will take in the compiled (Logo) program, but that's the job of the
code generator.
<P>
In practice, the parser and the code generator are combined into one
step; as each piece of the source program is recognized, it is translated
into a corresponding piece of the object program. So we'll see that most
of the procedures in the BASIC compiler include parsing instructions and
code generation instructions. For example, here is the procedure that
compiles a <CODE>let</CODE> command:
<PRE>
to compile.let :command
make "command butfirst :command
make "var pop "command
make "delimiter pop "command
if not equalp :delimiter "= [(throw "error [Need = in let.])]
make "exp expression
queue "definition (sentence "make (word ""% :var) :exp)
end
</PRE>
<P>
In this procedure, all but the last instruction (the line
starting with <CODE>queue</CODE>) are parsing the source command. The last
line, which we'll come back to later, is generating a Logo <CODE>make</CODE>
instruction, the translation of the BASIC <CODE>let</CODE> in the object
program.
<P>
BASIC was designed to be very easy to parse. The parser can read a command
from left to right, one word at a time; at every moment, it knows exactly
what to expect. The command must begin with one of the small number of
command names that make up the BASIC language. What comes next depends on
that command name; in the case of <CODE>let</CODE>, what comes next is one word
(the variable name), then an equal sign, then an expression. Each
instruction in the <CODE>compile.let</CODE> procedure handles one of these pieces.
First we skip over the word <CODE>let</CODE> by removing it from the front of the
command:
<PRE>
make "command butfirst :command
</PRE>
<P>
Then we read and remember one word, the variable name:
<PRE>
make "var pop "command
</PRE>
<P>
(Remember that the <CODE>pop</CODE> operation removes one member
from the beginning of a list, returning that member. In this case
we are removing the variable name from the entire <CODE>let</CODE> command.)
Then we make sure there's an equal sign:
<PRE>
make "delimiter pop "command
if not equalp :delimiter "= [(throw "error [Need = in let.])]
</PRE>
<P>
And finally we call a subprocedure to read the expression;
as we'll see later, that procedure also translates the expression to
the form it will take in the object program:
<PRE>
make "exp expression
</PRE>
<P>
The parsers for other BASIC commands have essentially the same
structure as this example. They repeatedly invoke <CODE>pop</CODE> to
read one word from the command or <CODE>expression</CODE> to read and
translate an expression. (The <CODE>if</CODE> command is a little more
complicated because it contains another command as a component,
but that inner command is just compiled as if it occurred by itself.
We'll look at that process in more detail when we get to the
code generation part of the compiler.)
<P>
Each compilation procedure expects a single BASIC command as its
input. Remember that a line in a BASIC program can include more
than one command. The compiler uses a procedure named <CODE>split</CODE>
to break up each line into a list of commands:
<PRE>
? <U>show split [30 print x ; y ; "foo,baz" , z : print hello + 4]</U>
[30 [print x ; y ; "foo,baz" , z] [print hello + 4]]
</PRE>
<P>
<CODE>Split</CODE> outputs a list whose first member is a line
number; the remaining members are lists, each containing one BASIC
command. <CODE>Split</CODE> works by looking for colons within the
command line.
<P>
Here is the overall structure of the compiler, but with only the
instructions related to parsing included:
<PRE>
to basic
forever [basicprompt]
end
to basicprompt
print "READY
make "line basicread
if emptyp :line [stop]
ifelse numberp first :line [compile split :line] [immediate :line]
end
to compile :commands
make "number first :commands
ifelse emptyp butfirst :commands ~
[eraseline :number] ~
[makedef (word "basic% :number) butfirst :commands]
end
to makedef :name :commands
...
foreach :commands [run list (word "compile. first ?) ?]
...
end
</PRE>
<P>
<CODE>Basic</CODE> does some initialization (not shown) and then
invokes <CODE>basicprompt</CODE> repeatedly. <CODE>Basicprompt</CODE> calls the
BASIC reader to read a line; if that line starts with a number, then
<CODE>split</CODE> is used to transform the line into a list of commands,
and <CODE>compile</CODE> is invoked with that list as input. <CODE>Compile</CODE>
remembers the line number for later use, and then invokes <CODE>makedef</CODE>
with the list of commands as an input. I've left out most of the
instructions in <CODE>makedef</CODE> because they're concerned with code
generation, but the important part right now is that for each command
in the list, it invokes a procedure named <CODE>compile.</CODE>something
based on the first word of the command, which must be one of the
command names in the BASIC language.
<H2>The Code Generator</H2>
<P>
Each line of the BASIC source program is going to be compiled into one
Logo procedure. (We'll see shortly that the BASIC <CODE>if</CODE> and <CODE>for</CODE>
commands are exceptions.) For example, the line
<PRE>
10 let x = 3 : let y = 4 : print x,y+6
</PRE>
<P>
will be compiled into the Logo procedure
<PRE>
to basic%10
make "%x 3
make "%y 4
type :%x
type char 9
type :%y + 6
print []
nextline 10
end
</PRE>
<P>
Each of the three BASIC commands within the source line
contributes one or more instructions to the object procedure. Each
<CODE>let</CODE> command is translated into a <CODE>make</CODE> instruction; the
<CODE>print</CODE> command is translated into three <CODE>type</CODE> instructions
and a <CODE>print</CODE> instruction. (The last instruction line in the
procedure, the invocation of <CODE>nextline</CODE>, does not come from
any of the BASIC commands, but is automatically part of the translation
of every BASIC command line.)
<P>
To generate this object procedure, the BASIC compiler is going to have
to invoke Logo's <CODE>define</CODE> primitive, this way:
<PRE>
define "basic%10 [[] [make "%x 3] [make "%y 4] ... [nextline 10]]
</PRE>
<P>
Of course, these actual inputs do not appear explicitly in
the compiler! Rather, the inputs to <CODE>define</CODE> are variables that
have the desired values:
<PRE>
define :name :definition
</PRE>
<P>
The variable <CODE>name</CODE> is an input to <CODE>makedef</CODE>, as we've
seen earlier. The variable <CODE>definition</CODE> is created within
<CODE>makedef</CODE>. It starts out as a list containing just the empty
list, because the first sublist of the input to <CODE>define</CODE> is the
list of the names of the desired inputs to <CODE>basic%10</CODE>, but it has
no inputs. The procedures within the compiler that parse each of the
commands on the source line will also generate object code (that is, Logo
instructions) by appending those instructions to the value of
<CODE>definition</CODE> using Logo's <CODE>queue</CODE> command.
<CODE>Queue</CODE> takes two inputs: the name of a variable whose value is
a list, and a new member to be added at the end of the list. Its effect is
to change the value of the variable to be the extended list.
<P>
Look back at the definition of <CODE>compile.let</CODE> above. Earlier we
considered the parsing instructions within that procedure, but deferred
discussion of the last instruction:
<PRE>
queue "definition (sentence "make (word ""% :var) :exp)
</PRE>
<P>
Now we can understand what this does: It generates a Logo
<CODE>make</CODE> instruction and appends that instruction to the object
procedure definition in progress.
<P>
We can now also think about the output from the <CODE>expression</CODE> procedure.
Its job is to parse a BASIC expression and to translate it into the
corresponding Logo expression. This part of the compiler is one of the
least realistic. A real compiler would have to think about such issues as
the precedence of arithmetic operations; for example, an expression like
<CODE>3+x*4</CODE> must be translated into two machine language instructions, first
one that multiplies <CODE>x</CODE> by 4, and then one that adds the result of that
multiplication to 3. But the Logo interpreter already handles that aspect
of arithmetic for us, so all <CODE>expression</CODE> has to do is to translate
variable references like <CODE>x</CODE> into the Logo form <CODE>:%x</CODE>.
<PRE>
? <U>show expression [3 + x * 4]</U>
[3 + :%x * 4]
</PRE>
<P>
(We'll take a closer look at translating arithmetic expressions
in the Pascal compiler found in the third volume of this series,
<A HREF="../v3-toc2.html"><EM>Beyond Programming.</EM></A>)
<P>
We are now ready to look at the complete version of <CODE>makedef</CODE>:
<PRE>
to makedef :name :commands
make "definition [[]]
foreach :commands [run list (word "compile. first ?) ?]
queue "definition (list "nextline :number)
define :name :definition
make "linenumbers insert :number :linenumbers
end
</PRE>
<P>
I hope you'll find this straightforward. First we create
an empty definition. Then, for each BASIC command on the line, we
append to that definition whatever instructions are generated by
the code generating instructions for that command. After all the
BASIC commands have been compiled, we add an invocation of <CODE>nextline</CODE>
to the definition. Now we can actually define the Logo procedure whose
text we've been accumulating. The last instruction updates the list
of line numbers that <CODE>nextline</CODE> uses to find the next BASIC command
line when the compiled program is running.
<P>
In a sense, this is the end of the story. My purpose in this chapter
was to illustrate how <CODE>define</CODE> can be used in a significant project,
and I've done that. But there are a few more points I should explain
about the code generation for some specific BASIC commands, to complete
your understanding of the compiler.
<P>
One such point is about the difference between <CODE>goto</CODE> and
<CODE>gosub</CODE>. Logo doesn't have anything like a <CODE>goto</CODE>
mechanism; both <CODE>goto</CODE> and <CODE>gosub</CODE> must be implemented
by invoking the procedure corresponding to the given line number. The
difference is that in the case of <CODE>goto</CODE>, we want to invoke that
procedure and not come back! The solution is to compile the BASIC command
<PRE>
goto 40
</PRE>
<P>
into the Logo instructions
<PRE>
basic%40 stop
</PRE>
<P>
In effect, we are calling line 40 as a subprocedure, but
when it returns, we're finished. Any additional Logo instructions
generated for the same line after the <CODE>goto</CODE> (including the
invocation of <CODE>nextline</CODE> that's generated automatically for
every source line) will be ignored because of the <CODE>stop</CODE>.*
<SMALL><BLOCKQUOTE><SMALL><SUP>*</SUP>In fact, the Berkeley Logo interpreter
is clever enough to notice that there is a <CODE>stop</CODE> instruction
after the invocation of <CODE>basic%40</CODE>, and it arranges things so
that there is no "return" from that procedure. This makes things a little
more efficient, but doesn't change the meaning of the
program.</SMALL></BLOCKQUOTE></SMALL>
<P>
The next tricky part of the compiler has to do with the <CODE>for</CODE> and
<CODE>next</CODE> commands. Think first about <CODE>next</CODE>. It must
increment the value of the given variable, test that value against a
remembered limit, and, if the limit has not been reached, go to... where?
The <CODE>for</CODE> loop continues with the BASIC command just after the
<CODE>for</CODE> command itself. That might be in the middle of a line, so
<CODE>next</CODE> can't just remember a line number and invoke
<CODE>basic%N</CODE> for line number N. To solve this problem, the
line containing the <CODE>for</CODE> command is split into two Logo
procedures, one containing everything up to and including the
<CODE>for</CODE>, and one for the rest of the line. For example, the line
<PRE>
30 let x = 3 : for i = 1 to 5 : print i,x : next i
</PRE>
<P>
is translated into
<PRE>
to basic%30
make "%x 3
make "%i 1
make "let%i 5
make "next%i [%g1]
%g1
end
to %g1
type :%i
type char 9
type :%x
print []
make "%i :%i + 1
if not greaterp :%i :let%i [run :next%i stop]
nextline 30
end
</PRE>
<P>
The first <CODE>make</CODE> instruction in <CODE>basic%30</CODE> is the
translation of the <CODE>let</CODE> command. The remaining four lines are
the translation of the <CODE>for</CODE> command; it must give an initial
value to the variable <CODE>i</CODE>, remember the limit value 5, and
remember the Logo procedure to be used for looping. That latter procedure
is named <CODE>%g1</CODE> in this example. The percent sign is used for the
usual reason, to ensure that the names created by the compiler don't
conflict with names in the compiler itself. The <CODE>g1</CODE> part is a
<EM>generated symbol,</EM> created by invoking the Berkeley Logo primitive
operation <CODE>gensym</CODE>. Each invocation of <CODE>gensym</CODE>
outputs a new symbol, first <CODE>g1</CODE>, then <CODE>g2</CODE>, and so on.
<P>
The first four instructions in procedure <CODE>%g1</CODE> (three
<CODE>type</CODE>s and a <CODE>print</CODE>) are the translation of the
BASIC <CODE>print</CODE> command. The next two instructions are the
translation of the <CODE>next</CODE> command; the <CODE>make</CODE>
instruction increments <CODE>i</CODE>, and the <CODE>if</CODE> instruction
tests whether the limit has been passed, and if not, invokes the looping
procedure <CODE>%g1</CODE> again. (Why does this say
<CODE>run :next%i</CODE> instead of just <CODE>%g1</CODE>? Remember that
the name <CODE>%g1</CODE> was created during the compilation of the
<CODE>for</CODE> command. When we get around to compiling the
<CODE>next</CODE> command, the code generator has no way to remember which
generated symbol was used by the corresponding <CODE>for</CODE>. Instead it
makes reference to a variable <CODE>next%i</CODE>, named after the variable
given in the <CODE>next</CODE> command itself, whose value is the name of
the procedure to run. Why not just call that procedure itself
<CODE>next%i</CODE> instead of using a generated symbol? The trouble is
that there might be more than one pair of <CODE>for</CODE> and
<CODE>next</CODE> commands in the same BASIC program using the same
variable, and each of them must have its own looping procedure name.)
<P>
There is a slight complication in the <CODE>print</CODE> and
<CODE>input</CODE> commands to deal with quoted character strings. The
trouble is that Logo's idea of a word ends with a space, so it's not easy to
translate
<PRE>
20 print "hi there"
</PRE>
<P>
into a Logo instruction in which the string is explicitly
present in the instruction. Instead, the BASIC compiler creates a
Logo global variable with a generated name, and uses that variable
in the compiled Logo instructions.
<P>
The trickiest compilation problem comes from the <CODE>if</CODE> command, because
it includes another command as part of itself. That included command might
be translated into several Logo instructions, all of which should be
made to depend on the condition that the <CODE>if</CODE> is testing. The solution
is to put the translation of the inner command into a separate procedure,
so that the BASIC command line
<PRE>
50 if x<6 then print x, x*x
</PRE>
<P>
is translated into the two Logo procedures
<PRE>
to basic%50
if :%x < 6 [%g2]
nextline 50
end
to %g2
type :%x
type char 9
type :%x * :%x
print []
end
</PRE>
<P>
Unfortunately, this doesn't quite work if the inner command is
a <CODE>goto</CODE>. If we were to translate
<PRE>
60 if :foo < 10 then goto 200
</PRE>
<P>
into
<PRE>
to basic%60
if :%foo < 10 [%g3]
nextline 60
end
to %g3
basic%200 stop
end
</PRE>
<P>
then the <CODE>stop</CODE> inside <CODE>%g3</CODE> would stop only <CODE>%g3</CODE>
itself, not <CODE>basic%60</CODE> as desired. So the code generator for <CODE>if</CODE>
checks to see whether the result of compiling the inner command is a single
Logo instruction line; if so, that line is used directly in the compiled
Logo <CODE>if</CODE> rather than diverted into a subprocedure:
<PRE>
to basic%60
if :%foo < 10 [basic%200 stop]
nextline 60
end
</PRE>
<P>
How does the code generator for <CODE>if</CODE> divert the result of compiling
the inner command away from the definition of the overall BASIC command
line? Here is the relevant part of the compiler:
<PRE>
to compile.if :command
make "command butfirst :command
make "exp expression
make "delimiter pop "command
if not equalp :delimiter "then [(throw "error [Need then after if.])]
queue "definition (sentence "if :exp (list c.if1))
end
to c.if1
local "definition
make "definition [[]]
run list (word "compile. first :command) :command
ifelse (count :definition) = 2 ~
[output last :definition] ~
[make "newname word "% gensym
define :newname :definition
output (list :newname)]
end
</PRE>
<P>
The first few lines of this are straightforwardly parsing the
part of the BASIC <CODE>if</CODE> command up to the word <CODE>then</CODE>. What
happens next is a little tricky; a subprocedure <CODE>c.if1</CODE> is invoked
to parse and translate the inner command. It has to be a subprocedure
because it creates a local variable named <CODE>definition</CODE>; when the
inner command is compiled, this local variable "steals" the generated
code. If there is only one line of generated code, then <CODE>c.if1</CODE> outputs
that line; if more than one, then <CODE>c.if1</CODE> creates a subprocedure
and outputs an instruction to invoke that subprocedure. This technique
depends on Logo's dynamic scope, so that references to the variable
named <CODE>definition</CODE> in other parts of the compiler (such as, for
example, <CODE>compile.print</CODE> or <CODE>compile.goto</CODE>) will refer to this
local version.
<H2>The Runtime Library</H2>
<P>
We've already seen the most important part of the runtime library: the
procedure <CODE>nextline</CODE> that gets the compiled program from one line
to the next.
<P>
There is only one more procedure needed as runtime support; it's called
<CODE>readvalue</CODE> and it's used by the BASIC <CODE>input</CODE>
command. In <CODE>BASIC</CODE>, data input is independent of lines. If a
single <CODE>input</CODE> command includes two variables, the user can type
the two desired values on separate lines or on a single line. Furthermore,
two <EM>separate</EM> <CODE>input</CODE> commands can read values from a
single line, if there are still values left on the line after the first
<CODE>input</CODE> has been satisfied. <CODE>Readvalue</CODE> uses a global
variable <CODE>readline</CODE> whose value is whatever's still available
from the last data input line, if any. If there is nothing available, it
reads a new line of input.
<P>
A more realistic BASIC implementation would include runtime library
procedures to compute built-in functions (the equivalent to Logo's
primitive operations) such as absolute value or the trigonometric
functions.
<H2>Further Explorations</H2>
<P>
This BASIC compiler leaves out many features of a complete implementation.
In a real BASIC, a string can be the value of a variable, and there are
string operations such as concatenation and substring extraction analogous
to the arithmetic operations for numbers. The BASIC programmer can create
an array of numbers, or an array of strings. In some versions of BASIC,
the programmer can define named subprocedures, just as in Logo. For the
purposes of this chapter, I wanted to make the compiler as simple as possible
and still have a usable language. If you want to extend the compiler, get
a BASIC textbook and start implementing features.
<P>
It's also possible to expand the immediate command capabilities of the
compiler. In most BASIC implementations, for example, you can say
<CODE>list 100-200</CODE> to list only a specified range of lines within the
source program.
<P>
A much harder project would be to replace the code generator in this
compiler with one that generates machine language for your computer.
Instead of using <CODE>define</CODE> to create Logo procedures, your compiler
would then write machine language instructions into a data file.
To do this, you must learn quite a lot about how machine language
programs are run on your computer!
<P>
<TABLE width="100%"><TR><TD><A HREF="../v2-toc2.html">(back to Table of Contents)</A>
<TD align="right"><A HREF="../v2ch5/v2ch5.html"><STRONG>BACK</STRONG></A>
chapter thread <A HREF="../v2ch7/v2ch7.html"><STRONG>NEXT</STRONG></A>
</TABLE>
<H2>Program Listing</H2>
<P>
I haven't discussed every detail of the program. For example, you may
want to trace through what happens when you ask to delete a line from
the BASIC source program. Here is the complete compiler.
<PRE>
to basic
make "linenumbers []
make "readline []
forever [basicprompt]
end
to basicprompt
print []
print "READY
print []
make "line basicread
if emptyp :line [stop]
ifelse numberp first :line [compile split :line] [immediate :line]
end
to compile :commands
make "number first :commands
make :number :line
ifelse emptyp butfirst :commands ~
[eraseline :number] ~
[makedef (word "basic% :number) butfirst :commands]
end
to makedef :name :commands
make "definition [[]]
foreach :commands [run list (word "compile. first ?) ?]
queue "definition (list "nextline :number)
define :name :definition
make "linenumbers insert :number :linenumbers
end
to insert :num :list
if emptyp :list [output (list :num)]
if :num = first :list [output :list]
if :num < first :list [output fput :num :list]
output fput first :list (insert :num butfirst :list)
end
to eraseline :num
make "linenumbers remove :num :linenumbers
end
to immediate :line
if equalp :line [list] [foreach :linenumbers [print thing ?] stop]
if equalp :line [run] [run (list (word "basic% first :linenumbers))
stop]
if equalp :line [exit] [throw "toplevel]
print sentence [Invalid command:] :line
end
;; Compiling each BASIC command
to compile.end :command
queue "definition [stop]
end
to compile.goto :command
queue "definition (list (word "basic% last :command) "stop)
end
to compile.gosub :command
queue "definition (list (word "basic% last :command))
end
to compile.return :command
queue "definition [stop]
end
to compile.print :command
make "command butfirst :command
while [not emptyp :command] [c.print1]
queue "definition [print []]
end
to c.print1
make "exp expression
ifelse equalp first first :exp "" ~
[make "sym gensym
make word "%% :sym butfirst butlast first :exp
queue "definition list "type word ":%% :sym] ~
[queue "definition fput "type :exp]
if emptyp :command [stop]
make "delimiter pop "command
if equalp :delimiter ", [queue "definition [type char 9] stop]
if equalp :delimiter "\; [stop]
(throw "error [Comma or semicolon needed in print.])
end
to compile.input :command
make "command butfirst :command
if equalp first first :command "" ~
[make "sym gensym
make "prompt pop "command
make word "%% :sym butfirst butlast :prompt
queue "definition list "type word ":%% :sym]
while [not emptyp :command] [c.input1]
end
to c.input1
make "var pop "command
queue "definition (list "make (word ""% :var) "readvalue)
if emptyp :command [stop]
make "delimiter pop "command
if not equalp :delimiter ", (throw "error [Comma needed in input.])
end
to compile.let :command
make "command butfirst :command
make "var pop "command
make "delimiter pop "command
if not equalp :delimiter "= [(throw "error [Need = in let.])]
make "exp expression
queue "definition (sentence "make (word ""% :var) :exp)
end
to compile.for :command
make "command butfirst :command
make "var pop "command
make "delimiter pop "command
if not equalp :delimiter "= [(throw "error [Need = after for.])]
make "start expression
make "delimiter pop "command
if not equalp :delimiter "to [(throw "error [Need to after for.])]
make "end expression
queue "definition (sentence "make (word ""% :var) :start)
queue "definition (sentence "make (word ""let% :var) :end)
make "newname word "% gensym
queue "definition (sentence "make (word ""next% :var)
(list (list :newname)))
queue "definition (list :newname)
define :name :definition
make "name :newname
make "definition [[]]
end
to compile.next :command
make "command butfirst :command
make "var pop "command
queue "definition (sentence "make (word ""% :var) (word ":% :var) [+ 1])
queue "definition (sentence [if not greaterp]
(word ":% :var) (word ":let% :var)
(list (list "run (word ":next% :var)
"stop)))
end
to compile.if :command
make "command butfirst :command
make "exp expression
make "delimiter pop "command
if not equalp :delimiter "then [(throw "error [Need then after if.])]
queue "definition (sentence "if :exp (list c.if1))
end
to c.if1
local "definition
make "definition [[]]
run list (word "compile. first :command) :command
ifelse (count :definition) = 2 ~
[output last :definition] ~
[make "newname word "% gensym
define :newname :definition
output (list :newname)]
end
;; Compile an expression for LET, IF, PRINT, or FOR
to expression
make "expr []
make "token expr1
while [not emptyp :token] [queue "expr :token
make "token expr1]
output :expr
end
to expr1
if emptyp :command [output []]
make "token pop "command
if memberp :token [+ - * / = < > ( )] [output :token]
if memberp :token [, \; : then to] [push "command :token output []]
if numberp :token [output :token]
if equalp first :token "" [output :token]
output word ":% :token
end
;; reading input
to basicread
output basicread1 readword [] "
end
to basicread1 :input :output :token
if emptyp :input [if not emptyp :token [push "output :token]
output reverse :output]
if equalp first :input "| | [if not emptyp :token [push "output :token]
output basicread1 (butfirst :input)
:output "]
if equalp first :input "" [if not emptyp :token [push "output :token]
output breadstring butfirst :input
:output "]
if memberp first :input [+ - * / = < > ( ) , \; :] ~
[if not emptyp :token [push "output :token]
output basicread1 (butfirst :input) (fput first :input :output) "]
output basicread1 (butfirst :input) :output (word :token first :input)
end
to breadstring :input :output :string
if emptyp :input [(throw "error [String needs ending quote.])]
if equalp first :input "" ~
[output basicread1 (butfirst :input)
(fput (word "" :string "") :output)
"]
output breadstring (butfirst :input) :output (word :string first :input)
end
to split :line
output fput first :line split1 (butfirst :line) [] []
end
to split1 :input :output :command
if emptyp :input [if not emptyp :command [push "output reverse :command]
output reverse :output]
if equalp first :input ": [if not emptyp :command
[push "output reverse :command]
output split1 (butfirst :input) :output []]
output split1 (butfirst :input) :output (fput first :input :command)
end
;; Runtime library
to nextline :num
make "target member :num :linenumbers
if not emptyp :target [make "target butfirst :target]
if not emptyp :target [run (list (word "basic% first :target))]
end
to readvalue
while [emptyp :readline] [make "readline basicread]
output pop "readline
end
</PRE>
<P><A HREF="../v2-toc2.html">(back to Table of Contents)</A>
<P><A HREF="../v2ch5/v2ch5.html"><STRONG>BACK</STRONG></A>
chapter thread <A HREF="../v2ch7/v2ch7.html"><STRONG>NEXT</STRONG></A>
<P>
<ADDRESS>
<A HREF="../index.html">Brian Harvey</A>,
<CODE>bh@cs.berkeley.edu</CODE>
</ADDRESS>
</BODY>
</HTML>
|