# # # Nim's Runtime Library # (c) Copyright 2012 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## This module provides support to handle the Unicode UTF-8 encoding. {.deadCodeElim: on.} include "system/inclrtl" type RuneImpl = int32 # underlying type of Rune Rune* = distinct RuneImpl ## type that can hold any Unicode character Rune16* = distinct int16 ## 16 bit Unicode character {.deprecated: [TRune: Rune, TRune16: Rune16].} proc `<=%`*(a, b: Rune): bool = return int(a) <=% int(b) proc `<%`*(a, b: Rune): bool = return int(a) <% int(b) proc `==`*(a, b: Rune): bool = return int(a) == int(b) template ones(n: untyped): untyped = ((1 shl n)-1) proc runeLen*(s: string): int {.rtl, extern: "nuc$1".} = ## Returns the number of Unicode characters of the string ``s`` var i = 0 while i < len(s): if ord(s[i]) <=% 127: inc(i) elif ord(s[i]) shr 5 == 0b110: inc(i, 2) elif ord(s[i]) shr 4 == 0b1110: inc(i, 3) elif ord(s[i]) shr 3 == 0b11110: inc(i, 4) elif ord(s[i]) shr 2 == 0b111110: inc(i, 5) elif ord(s[i]) shr 1 == 0b1111110: inc(i, 6) else: inc i inc(result) proc runeLenAt*(s: string, i: Natural): int = ## Returns the number of bytes the rune starting at ``s[i]`` takes if ord(s[i]) <=% 127: result = 1 elif ord(s[i]) shr 5 == 0b110: result = 2 elif ord(s[i]) shr 4 == 0b1110: result = 3 elif ord(s[i]) shr 3 == 0b11110: result = 4 elif ord(s[i]) shr 2 == 0b111110: result = 5 elif ord(s[i]) shr 1 == 0b1111110: result = 6 else: result = 1 const replRune = Rune(0xFFFD) template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) = ## Returns the Unicode character ``s[i]`` in ``result``. If ``doInc == true`` ## ``i`` is incremented by the number of bytes that have been processed. bind ones if ord(s[i]) <=% 127: result = Rune(ord(s[i])) when doInc: inc(i) elif ord(s[i]) shr 5 == 0b110: # assert(ord(s[i+1]) shr 6 == 0b10) if i <= s.len - 2: result = Rune((ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))) when doInc: inc(i, 2) else: result = replRune when doInc: inc(i) elif ord(s[i]) shr 4 == 0b1110: # assert(ord(s[i+1]) shr 6 == 0b10) # assert(ord(s[i+2]) shr 6 == 0b10) if i <= s.len - 3: result = Rune((ord(s[i]) and ones(4)) shl 12 or (ord(s[i+1]) and ones(6)) shl 6 or (ord(s[i+2]) and ones(6))) when doInc: inc(i, 3) else: result = replRune when doInc: inc(i) elif ord(s[i]) shr 3 == 0b11110: # assert(ord(s[i+1]) shr 6 == 0b10) # assert(ord(s[i+2]) shr 6 == 0b10) # assert(ord(s[i+3]) shr 6 == 0b10) if i <= s.len - 4: result = Rune((ord(s[i]) and ones(3)) shl 18 or (ord(s[i+1]) and ones(6)) shl 12 or (ord(s[i+2]) and ones(6)) shl 6 or (ord(s[i+3]) and ones(6))) when doInc: inc(i, 4) else: result = replRune when doInc: inc(i) elif ord(s[i]) shr 2 == 0b111110: # assert(ord(s[i+1]) shr 6 == 0b10) # assert(ord(s[i+2]) shr 6 == 0b10) # assert(ord(s[i+3]) shr 6 == 0b10) # assert(ord(s[i+4]) shr 6 == 0b10) if i <= s.len - 5: result = Rune((ord(s[i]) and ones(2)) shl 24 or (ord(s[i+1]) and ones(6)) shl 18 or (ord(s[i+2]) and ones(6)) shl 12 or (ord(s[i+3]) and ones(6)) shl 6 or (ord(s[i+4]) and ones(6))) when doInc: inc(i, 5) else: result = replRune when doInc: inc(i) elif ord(s[i]) shr 1 == 0b1111110: # assert(ord(s[i+1]) shr 6 == 0b10) # assert(ord(s[i+2]) shr 6 == 0b10) # assert(ord(s[i+3]) shr 6 == 0b10) # assert(ord(s[i+4]) shr 6 == 0b10) # assert(ord(s[i+5]) shr 6 == 0b10) if i <= s.len - 6: result = Rune((ord(s[i]) and ones(1)) shl 30 or (ord(s[i+1]) and ones(6)) shl 24 or (ord(s[i+2]) and ones(6)) shl 18 or (ord(s[i+3]) and ones(6)) shl 12 or (ord(s[i+4]) and ones(6)) shl 6 or (ord(s[i+5]) and ones(6))) when doInc: inc(i, 6) else: result = replRune when doInc: inc(i) else: result = Rune(ord(s[i])) when doInc: inc(i) proc validateUtf8*(s: string): int = ## Returns the position of the invalid byte in ``s`` if the string ``s`` does ## not hold valid UTF-8 data. Otherwise ``-1`` is returned. var i = 0 let L = s.len while i < L: if ord(s[i]) <=% 127: inc(i) elif ord(s[i]) shr 5 == 0b110: if ord(s[i]) < 0xc2: return i # Catch overlong ascii representations. if i+1 < L and ord(s[i+1]) shr 6 == 0b10: inc(i, 2) else: return i elif ord(s[i]) shr 4 == 0b1110: if i+2 < L and ord(s[i+1]) shr 6 == 0b10 and ord(s[i+2]) shr 6 == 0b10: inc i, 3 else: return i elif ord(s[i]) shr 3 == 0b11110: if i+3 < L and ord(s[i+1]) shr 6 == 0b10 and ord(s[i+2]) shr 6 == 0b10 and ord(s[i+3]) shr 6 == 0b10: inc i, 4 else: return i else: return i return -1 proc runeAt*(s: string, i: Natural): Rune = ## Returns the unicode character in ``s`` at byte index ``i`` fastRuneAt(s, i, result, false) template fastToUTF8Copy*(c: Rune, s: var string, pos: int, doInc = true) = ## Copies UTF-8 representation of `c` into the preallocated string `s` ## starting at position `pos`. If `doInc == true`, `pos` is incremented ## by the number of bytes that have been processed. ## ## To be the most efficient, make sure `s` is preallocated ## with an additional amount equal to the byte length of ## `c`. var i = RuneImpl(c) if i <=% 127: s.setLen(pos+1) s[pos+0] = chr(i) when doInc: inc(pos) elif i <=% 0x07FF: s.setLen(pos+2) s[pos+0] = chr((i shr 6) or 0b110_00000) s[pos+1] = chr((i and ones(6)) or 0b10_0000_00) when doInc: inc(pos, 2) elif i <=% 0xFFFF: s.setLen(pos+3) s[pos+0] = chr(i shr 12 or 0b1110_0000) s[pos+1] = chr(i shr 6 and ones(6) or 0b10_0000_00) s[pos+2] = chr(i and ones(6) or 0b10_0000_00) when doInc: inc(pos, 3) elif i <=% 0x001FFFFF: s.setLen(pos+4) s[pos+0] = chr(i shr 18 or 0b1111_0000) s[pos+1] = chr(i shr 12 and ones(6) or 0b10_0000_00) s[pos+2] = chr(i shr 6 and ones(6) or 0b10_0000_00) s[pos+3] = chr(i and ones(6) or 0b10_0000_00) when doInc: inc(pos, 4) elif i <=% 0x03FFFFFF: s.setLen(pos+5) s[pos+0] = chr(i shr 24 or 0b111110_00) s[pos+1] = chr(i shr 18 and ones(6) or 0b10_0000_00) s[pos+2] = chr(i shr 12 and ones(6) or 0b10_0000_00) s[pos+3] = chr(i shr 6 and ones(6) or 0b10_0000_00) s[pos+4] = chr(i and ones(6) or 0b10_0000_00) when doInc: inc(pos, 5) elif i <=% 0x7FFFFFFF: s.setLen(pos+6) s[pos+0] = chr(i shr 30 or 0b1111110_0) s[pos+1] = chr(i shr 24 and ones(6) or 0b10_0000_00) s[pos+2] = chr(i shr 18 and ones(6) or 0b10_0000_00) s[pos+3] = chr(i shr 12 and ones(6) or 0b10_0000_00) s[pos+4] = chr(i shr 6 and ones(6) or 0b10_0000_00) s[pos+5] = chr(i and ones(6) or 0b10_0000_00) when doInc: inc(pos, 6) else: discard # error, exception? proc toUTF8*(c: Rune): string {.rtl, extern: "nuc$1".} = ## Converts a rune into its UTF-8 representation result = "" fastToUTF8Copy(c, result, 0, false) proc `$`*(rune: Rune): string = ## Converts a Rune to a string rune.toUTF8 proc `$`*(runes: seq[Rune]): string = ## Converts a sequence of Runes to a string result = "" for rune in runes: result.add(rune.toUTF8) proc runeOffset*(s: string, pos:Natural, start: Natural = 0): int = ## Returns the byte position of unicode character ## at position pos in s with an optional start byte position. ## returns the special value -1 if it runs out of the string ## ## Beware: This can lead to unoptimized code and slow execution! ## Most problems are solve more efficient by using an iterator ## or conversion to a seq of Rune. var i = 0 o = start while i < pos: o += runeLenAt(s, o) if o >= s.len: return -1 inc i return o proc runeAtPos*(s: string, pos: int): Rune = ## Returns the unicode character at position pos ## ## Beware: This can lead to unoptimized code and slow execution! ## Most problems are solve more efficient by using an iterator ## or conversion to a seq of Rune. fastRuneAt(s, runeOffset(s, pos), result, false) proc runeStrAtPos*(s: string, pos: Natural): string = ## Returns the unicode character at position pos as UTF8 String ## ## Beware: This can lead to unoptimized code and slow execution! ## Most problems are solve more efficient by using an iterator ## or conversion to a seq of Rune. let o = runeOffset(s, pos) s[o.. (o+runeLenAt(s, o)-1)] proc runeReverseOffset*(s: string, rev:Positive): (int, int) = ## Returns a tuple with the the byte offset of the ## unicode character at position ``rev`` in s counting ## from the end (starting with 1) and the total ## number of runes in the string. Returns a negative value ## for offset if there are to few runes in the string to ## satisfy the request. ## ## Beware: This can lead to unoptimized code and slow execution! ## Most problems are solve more efficient by using an iterator ## or conversion to a seq of Rune. var a = rev.int o = 0 x = 0 while o < s.len: let r = runeLenAt(s, o) o
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<title>Mu - subx/020syscalls.cc</title>
<meta name="Generator" content="Vim/8.0">
<meta name="plugin-version" content="vim7.4_v2">
<meta name="syntax" content="cpp">
<meta name="settings" content="number_lines,use_css,pre_wrap,no_foldcolumn,expand_tabs,line_ids,prevent_copy=">
<meta name="colorscheme" content="minimal">
<style type="text/css">
<!--
pre { white-space: pre-wrap; font-family: monospace; color: #aaaaaa; background-color: #080808; }
body { font-size:12pt; font-family: monospace; color: #aaaaaa; background-color: #080808; }
.subxS2Comment a { color:inherit; }
.subxS1Comment a { color:inherit; }
.subxComment a { color:inherit; }
.subxH2Comment a { color:inherit; }
.subxH1Comment a { color:inherit; }
* { font-size:12pt; font-size: 1em; }
.PreProc { color:#800080; }
.LineNr { color:#444444; }
.Constant { color:#00a0a0; }
.Delimiter { color:#800080; }
.Special { color:#c00000; }
.Identifier { color:#c0a020; }
.Normal { color: #aaaaaa; background-color: #080808; padding-bottom: 1px; }
.Comment { color: #8080ff; }
.cSpecial { color: #008000; }
-->
</style>

<script type='text/javascript'>
<!--

/* function to open any folds containing a jumped-to line before jumping to it */
function JumpToLine()
{
  var lineNum;
  lineNum = window.location.hash;
  lineNum = lineNum.substr(1); /* strip off '#' */

  if (lineNum.indexOf('L') == -1) {
    lineNum = 'L'+lineNum;
  }
  lineElem = document.getElementById(lineNum);
  /* Always jump to new location even if the line was hidden inside a fold, or
   * we corrected the raw number to a line ID.
   */
  if (lineElem) {
    lineElem.scrollIntoView(true);
  }
  return true;
}
if ('onhashchange' in window) {
  window.onhashchange = JumpToLine;
}

-->
</script>
</head>
<body onload='JumpToLine();'>
<a href='https://github.com/akkartik/mu/blob/master/subx/020syscalls.cc'>https://github.com/akkartik/mu/blob/master/subx/020syscalls.cc</a>
<pre id='vimCodeElement'>
<span id="L1" class="LineNr">  1 </span><span class="Delimiter">:(before &quot;End Initialize Op Names&quot;)</span>
<span id="L2" class="LineNr">  2 </span><a href='001help.cc.html#L253'>put_new</a><span class="Delimiter">(</span><span class="Special"><a href='010---vm.cc.html#L378'>Name</a></span><span class="Delimiter">,</span> <span class="Constant">&quot;cd&quot;</span><span class="Delimiter">,</span> <span class="Constant">&quot;software interrupt (int)&quot;</span><span class="Delimiter">);</span>
<span id="L3" class="LineNr">  3 </span>
<span id="L4" class="LineNr">  4 </span><span class="Delimiter">:(before &quot;End Single-Byte Opcodes&quot;)</span>
<span id="L5" class="LineNr">  5 </span><span class="Normal">case</span> <span class="Constant">0xcd</span>: <span class="Delimiter">{</span>  <span class="Comment">// int imm8 (software interrupt)</span>
<span id="L6" class="LineNr">  6 </span>  <a href='003trace.cc.html#L203'>trace</a><span class="Delimiter">(</span><span class="Constant">90</span><span class="Delimiter">,</span> <span class="Constant">&quot;run&quot;</span><span class="Delimiter">)</span> &lt;&lt; <span class="Constant">&quot;syscall&quot;</span> &lt;&lt; end<span class="Delimiter">();</span>
<span id="L7" class="LineNr">  7 </span>  <span class="Normal">uint8_t</span> <a href='031check_operands.cc.html#L11'>code</a> = <a href='010---vm.cc.html#L364'>next</a><span class="Delimiter">();</span>
<span id="L8" class="LineNr">  8 </span>  <span class="Normal">if</span> <span class="Delimiter">(</span><a href='031check_operands.cc.html#L11'>code</a> != <span class="Constant">0x80</span><span class="Delimiter">)</span> <span class="Delimiter">{</span>
<span id="L9" class="LineNr">  9 </span>    <a href='003trace.cc.html#L210'>raise</a> &lt;&lt; <span class="Constant">&quot;Unimplemented interrupt <a href='031check_operands.cc.html#L11'>code</a> &quot;</span> &lt;&lt; <a href='010---vm.cc.html#L412'>HEXBYTE</a> &lt;&lt; <a href='031check_operands.cc.html#L11'>code</a> &lt;&lt; <span class="cSpecial">'\n'</span> &lt;&lt; end<span class="Delimiter">();</span>
<span id="L10" class="LineNr"> 10 </span>    <a href='003trace.cc.html#L210'>raise</a> &lt;&lt; <span class="Constant">&quot;  Only `int 80h` supported for now.\n&quot;</span> &lt;&lt; end<span class="Delimiter">();</span>
<span id="L11" class="LineNr"> 11 </span>    <span class="Identifier">break</span><span class="Delimiter">;</span>
<span id="L12" class="LineNr"> 12 </span>  <span class="Delimiter">}</span>
<span id="L13" class="LineNr"> 13 </span>  <a href='020syscalls.cc.html#L18'>process_int80</a><span class="Delimiter">();</span>
<span id="L14" class="LineNr"> 14 </span>  <span class="Identifier">break</span><span class="Delimiter">;</span>
<span id="L15" class="LineNr"> 15 </span><span class="Delimiter"