about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKartik K. Agaram <vc@akkartik.com>2015-05-23 12:30:58 -0700
committerKartik K. Agaram <vc@akkartik.com>2015-05-23 12:35:05 -0700
commit047296d811b062477715e3435e1b49ae63d54501 (patch)
treee271c65abd0adafd43f75ad766834e238c7eb89b
parenta95c44f6981946d87e05d0929c0dd3a4894e953e (diff)
downloadmu-047296d811b062477715e3435e1b49ae63d54501.tar.gz
1434 - support all unicode spaces
-rw-r--r--002test.cc1
-rw-r--r--031address.cc4
-rw-r--r--041name.cc14
-rw-r--r--043space.cc15
-rw-r--r--044space_surround.cc10
-rw-r--r--050scenario.cc26
-rw-r--r--060string.mu79
7 files changed, 128 insertions, 21 deletions
diff --git a/002test.cc b/002test.cc
index 76bf02cf..ea75db82 100644
--- a/002test.cc
+++ b/002test.cc
@@ -88,6 +88,7 @@ long long int to_integer(string n) {
   char* end = NULL;
   // safe because string.c_str() is guaranteed to be null-terminated
   long long int result = strtoll(n.c_str(), &end, /*any base*/0);
+  if (*end != '\0') raise << "tried to convert " << n << " to number\n";
   assert(*end == '\0');
   return result;
 }
diff --git a/031address.cc b/031address.cc
index cee1d0e8..052eec37 100644
--- a/031address.cc
+++ b/031address.cc
@@ -102,14 +102,14 @@ base = canonize(base);
 
 :(code)
 bool has_property(reagent x, string name) {
-  for (long long int i = 0; i < SIZE(x.properties); ++i) {
+  for (long long int i = /*skip name:type*/1; i < SIZE(x.properties); ++i) {
     if (x.properties.at(i).first == name) return true;
   }
   return false;
 }
 
 vector<string> property(const reagent& r, const string& name) {
-  for (long long int p = 0; p != SIZE(r.properties); ++p) {
+  for (long long int p = /*skip name:type*/1; p != SIZE(r.properties); ++p) {
     if (r.properties.at(p).first == name)
       return r.properties.at(p).second;
   }
diff --git a/041name.cc b/041name.cc
index 0b9e2270..300a9c60 100644
--- a/041name.cc
+++ b/041name.cc
@@ -70,8 +70,7 @@ bool disqualified(/*mutable*/ reagent& x) {
   if (is_raw(x)) return true;
   if (isa_literal(x)) return true;
   if (is_integer(x.name)) return true;
-  if (x.name == "default-space")
-    x.initialized = true;
+  // End Disqualified Reagents
   if (x.initialized) return true;
   return false;
 }
@@ -125,12 +124,11 @@ bool is_raw(const reagent& r) {
 
 bool is_special_name(const string& s) {
   if (s == "_") return true;
-  // lexical scopes
-  if (s == "default-space") return true;
   if (s == "0") return true;
   // tests will use these in later layers even though tests will mostly use numeric addresses
   if (s == "screen") return true;
   if (s == "keyboard") return true;
+  // End is_special_name Cases
   return false;
 }
 
@@ -142,14 +140,6 @@ recipe main [
 +name: assign x 1
 -name: assign _ 1
 
-//: one reserved word that we'll need later
-:(scenario convert_names_passes_default_space)
-recipe main [
-  default-space:number, x:number <- copy 0:literal, 1:literal
-]
-+name: assign x 1
--name: assign default-space 1
-
 //: an escape hatch to suppress name conversion that we'll use later
 :(scenario convert_names_passes_raw)
 recipe main [
diff --git a/043space.cc b/043space.cc
index 37683ab8..e1100f89 100644
--- a/043space.cc
+++ b/043space.cc
@@ -25,6 +25,21 @@ recipe main [
 ]
 +mem: storing 34 in location 8
 
+//:: first disable name conversion for 'default-space'
+:(scenario convert_names_passes_default_space)
+recipe main [
+  default-space:number, x:number <- copy 0:literal, 1:literal
+]
++name: assign x 1
+-name: assign default-space 1
+
+:(before "End Disqualified Reagents")
+if (x.name == "default-space")
+  x.initialized = true;
+:(before "End is_special_name Cases")
+if (s == "default-space") return true;
+
+//:: now implement space support
 :(before "End call Fields")
 long long int default_space;
 :(replace "call(recipe_number r) :running_recipe(r)")
diff --git a/044space_surround.cc b/044space_surround.cc
index 54c92973..61521013 100644
--- a/044space_surround.cc
+++ b/044space_surround.cc
@@ -27,11 +27,13 @@ recipe main [
 
 :(replace{} "long long int space_base(const reagent& x)")
 long long int space_base(const reagent& x) {
+//?   cerr << "space_base: " << x.to_string() << '\n'; //? 1
   return space_base(x, space_index(x), Current_routine->calls.front().default_space);
 }
 
 long long int space_base(const reagent& x, long long int space_index, long long int base) {
 //?   trace("foo") << "base of space " << space_index << '\n'; //? 1
+//?   cerr << "space_base sub: " << x.to_string() << '\n'; //? 1
   if (space_index == 0) {
 //?     trace("foo") << "base of space " << space_index << " is " << base << '\n'; //? 1
     return base;
@@ -42,7 +44,8 @@ long long int space_base(const reagent& x, long long int space_index, long long
 }
 
 long long int space_index(const reagent& x) {
-  for (long long int i = 0; i < SIZE(x.properties); ++i) {
+//?   cerr << "space_index: " << x.to_string() << '\n'; //? 1
+  for (long long int i = /*skip name:type*/1; i < SIZE(x.properties); ++i) {
     if (x.properties.at(i).first == "space") {
       assert(SIZE(x.properties.at(i).second) == 1);
       return to_integer(x.properties.at(i).second.at(0));
@@ -50,3 +53,8 @@ long long int space_index(const reagent& x) {
   }
   return 0;
 }
+
+:(scenario permit_space_as_variable_name)
+recipe main [
+  space:number <- copy 0:literal
+]
diff --git a/050scenario.cc b/050scenario.cc
index 1d7366cd..8df08e77 100644
--- a/050scenario.cc
+++ b/050scenario.cc
@@ -252,13 +252,31 @@ void check_type(const string& lhs, istream& in) {
 
 void check_string(long long int address, const string& literal) {
   trace(Primitive_recipe_depth, "run") << "checking string length at " << address;
-  if (Memory[address] != SIZE(literal))
-    raise << "expected location " << address << " to contain length " << SIZE(literal) << " of string [" << literal << "] but saw " << Memory[address] << '\n';
+  if (Memory[address] != SIZE(literal)) {
+    if (Current_scenario && !Hide_warnings)
+      raise << "\nF - " << Current_scenario->name << ": expected location " << address << " to contain length " << SIZE(literal) << " of string [" << literal << "] but saw " << Memory[address] << '\n';
+    else
+      raise << "expected location " << address << " to contain length " << SIZE(literal) << " of string [" << literal << "] but saw " << Memory[address] << '\n';
+    if (!Hide_warnings) {
+      Passed = false;
+      ++Num_failures;
+    }
+    return;
+  }
   ++address;  // now skip length
   for (long long int i = 0; i < SIZE(literal); ++i) {
     trace(Primitive_recipe_depth, "run") << "checking location " << address+i;
-    if (Memory[address+i] != literal.at(i))
-      raise << "expected location " << (address+i) << " to contain " << literal.at(i) << " but saw " << Memory[address+i] << '\n';
+    if (Memory[address+i] != literal.at(i)) {
+      if (Current_scenario && !Hide_warnings)
+        raise << "\nF - " << Current_scenario->name << ": expected location " << (address+i) << " to contain " << literal.at(i) << " but saw " << Memory[address+i] << '\n';
+      else
+        raise << "expected location " << (address+i) << " to contain " << literal.at(i) << " but saw " << Memory[address+i] << '\n';
+      if (!Hide_warnings) {
+        Passed = false;
+        ++Num_failures;
+      }
+      return;
+    }
   }
 }
 
diff --git a/060string.mu b/060string.mu
index f0b3ad18..338748fc 100644
--- a/060string.mu
+++ b/060string.mu
@@ -494,6 +494,69 @@ scenario interpolate-at-end [
   ]
 ]
 
+recipe space? [
+  default-space:array:address:location <- new location:type, 30:literal
+  c:character <- next-ingredient
+  result:boolean <- equal c:character, 32:literal/space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 10:literal/newline
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 9:literal/tab
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 13:literal/carriage-return
+  # remaining uncommon cases in sorted order
+  # http://unicode.org code-points in unicode-set Z and Pattern_White_Space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 11:literal/ctrl-k
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 12:literal/ctrl-l
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 133:literal/ctrl-0085
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 160:literal/no-break-space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 5760:literal/ogham-space-mark
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8192:literal/en-quad
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8193:literal/em-quad
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8194:literal/en-space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8195:literal/em-space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8196:literal/three-per-em-space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8197:literal/four-per-em-space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8198:literal/six-per-em-space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8199:literal/figure-space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8200:literal/punctuation-space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8201:literal/thin-space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8202:literal/hair-space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8206:literal/left-to-right
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8207:literal/right-to-left
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8232:literal/line-separator
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8233:literal/paragraph-separator
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8239:literal/narrow-no-break-space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 8287:literal/medium-mathematical-space
+  jump-if result:boolean, +reply:label
+  result:boolean <- equal c:character, 12288:literal/ideographic-space
+  jump-if result:boolean, +reply:label
+  +reply
+  reply result:boolean
+]
+
 recipe trim [
   default-space:array:address:location <- new location:type, 30:literal
   s:address:array:character <- next-ingredient
@@ -508,7 +571,7 @@ recipe trim [
       reply result:address:array:character
     }
     curr:character <- index s:address:array:character/deref, start:number
-    whitespace?:boolean <- equal curr:character, 32:literal/space
+    whitespace?:boolean <- space? curr:character
     break-unless whitespace?:boolean
     start:number <- add start:number, 1:literal
     loop
@@ -519,7 +582,7 @@ recipe trim [
     not-at-start?:boolean <- greater-than end:number, start:number
     assert not-at-start?:boolean [end ran up against start]
     curr:character <- index s:address:array:character/deref, end:number
-    whitespace?:boolean <- equal curr:character, 32:literal/space
+    whitespace?:boolean <- space? curr:character
     break-unless whitespace?:boolean
     end:number <- subtract end:number, 1:literal
     loop
@@ -589,3 +652,15 @@ scenario trim-left-right [
     3:string <- [abc]
   ]
 ]
+
+scenario trim-newline-tab [
+  run [
+    1:address:array:character <- new [	abc
+]
+    2:address:array:character <- trim 1:address:array:character
+    3:array:character <- copy 2:address:array:character/deref
+  ]
+  memory-should-contain [
+    3:string <- [abc]
+  ]
+]
fd180680755'>e0ffdcd1 ^
33e7c3a7 ^

ac07e589 ^
33e7c3a7 ^
ac07e589 ^
33e7c3a7 ^
e0ffdcd1 ^
33e7c3a7 ^





ac07e589 ^
33e7c3a7 ^

14a38052 ^
33e7c3a7 ^

e0ffdcd1 ^

901ae474 ^
e0ffdcd1 ^
e99038ea ^
e0ffdcd1 ^



901ae474 ^
e99038ea ^
e0ffdcd1 ^

5a2cb154 ^



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156