diff options
Diffstat (limited to 'nimlib')
52 files changed, 19100 insertions, 0 deletions
diff --git a/nimlib/copying.txt b/nimlib/copying.txt new file mode 100755 index 000000000..be182d65c --- /dev/null +++ b/nimlib/copying.txt @@ -0,0 +1,29 @@ +======================================================= + The Nimrod Runtime Library + Copyright (C) 2004-2009 Andreas Rumpf +======================================================= + +This is the file copying.txt, it applies to the Nimrod Run-Time Library +(lib) and base packages (base) distributed by members of the Nimrod +Development Team. + +The source code of the Nimrod Runtime Libraries and packages are +distributed under the Library GNU General Public License +(see the file lgpl.txt) with the following modification: + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent modules, +and to copy and distribute the resulting executable under terms of your choice, +provided that you also meet, for each linked independent module, the terms +and conditions of the license of that module. An independent module is a module +which is not derived from or based on this library. If you modify this +library, you may extend this exception to your version of the library, but +you are not obligated to do so. If you do not wish to do so, delete this +exception statement from your version. + +If you didn't receive a copy of the file lgpl.txt, contact: + Free Software Foundation + 675 Mass Ave + Cambridge, MA 02139 + USA diff --git a/nimlib/lgpl.txt b/nimlib/lgpl.txt new file mode 100755 index 000000000..f6fa6c9e5 --- /dev/null +++ b/nimlib/lgpl.txt @@ -0,0 +1,502 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/nimlib/nimbase.h b/nimlib/nimbase.h new file mode 100755 index 000000000..a0f08f4f3 --- /dev/null +++ b/nimlib/nimbase.h @@ -0,0 +1,425 @@ +/* + + Nimrod's Runtime Library + (c) Copyright 2009 Andreas Rumpf + + See the file "copying.txt", included in this + distribution, for details about the copyright. +*/ + +/* compiler symbols: +__BORLANDC__ +_MSC_VER +__WATCOMC__ +__LCC__ +__GNUC__ +__DMC__ +__POCC__ +__TINYC__ +*/ + + +#ifndef NIMBASE_H +#define NIMBASE_H + +#include <math.h> + +/* calling convention mess ----------------------------------------------- */ +#if defined(__GNUC__) || defined(__LCC__) || defined(__POCC__) \ + || defined(__TINYC__) + /* these should support C99's inline */ + /* the test for __POCC__ has to come before the test for _MSC_VER, + because PellesC defines _MSC_VER too. This is brain-dead. */ +# define N_INLINE(rettype, name) inline rettype name +#elif defined(__BORLANDC__) || defined(_MSC_VER) +/* Borland's compiler is really STRANGE here; note that the __fastcall + keyword cannot be before the return type, but __inline cannot be after + the return type, so we do not handle this mess in the code generator + but rather here. */ +# define N_INLINE(rettype, name) __inline rettype name +#elif defined(__DMC__) +# define N_INLINE(rettype, name) inline rettype name +#elif defined(__WATCOMC__) +# define N_INLINE(rettype, name) __inline rettype name +#else /* others are less picky: */ +# define N_INLINE(rettype, name) rettype __inline name +#endif + +#if defined(__POCC__) || defined(_MSC_VER) +# define HAVE_LRINT 1 +#endif + +#if defined(__POCC__) +# define NIM_CONST /* PCC is really picky with const modifiers */ +# undef _MSC_VER /* Yeah, right PCC defines _MSC_VER even if it is + not that compatible. Well done. */ +#elif defined(__cplusplus) +# define NIM_CONST /* C++ is picky with const modifiers */ +#else +# define NIM_CONST const +#endif + +#define NIM_THREADVAR __thread + +/* --------------- how int64 constants should be declared: ----------- */ +#if defined(__GNUC__) || defined(__LCC__) || \ + defined(__POCC__) || defined(__DMC__) +# define IL64(x) x##LL +#else /* works only without LL */ +# define IL64(x) x +#endif + +/* ---------------- casting without correct aliasing rules ----------- */ + +#if defined(__GNUCC__) +# define NIM_CAST(type, ptr) (((union{type __x__;}*)(ptr))->__x__) +#else +# define NIM_CAST(type, ptr) ((type)(ptr)) +#endif + +/* ------------------------------------------------------------------- */ + +#if defined(WIN32) || defined(_WIN32) /* only Windows has this mess... */ +# define N_CDECL(rettype, name) rettype __cdecl name +# define N_STDCALL(rettype, name) rettype __stdcall name +# define N_SYSCALL(rettype, name) rettype __syscall name +# define N_FASTCALL(rettype, name) rettype __fastcall name +# define N_SAFECALL(rettype, name) rettype __safecall name +/* function pointers with calling convention: */ +# define N_CDECL_PTR(rettype, name) rettype (__cdecl *name) +# define N_STDCALL_PTR(rettype, name) rettype (__stdcall *name) +# define N_SYSCALL_PTR(rettype, name) rettype (__syscall *name) +# define N_FASTCALL_PTR(rettype, name) rettype (__fastcall *name) +# define N_SAFECALL_PTR(rettype, name) rettype (__safecall *name) + +# define N_LIB_EXPORT extern __declspec(dllexport) +# define N_LIB_IMPORT extern __declspec(dllimport) +#else +# define N_CDECL(rettype, name) rettype name +# define N_STDCALL(rettype, name) rettype name +# define N_SYSCALL(rettype, name) rettype name +# define N_FASTCALL(rettype, name) rettype name +# define N_SAFECALL(rettype, name) rettype name +/* function pointers with calling convention: */ +# define N_CDECL_PTR(rettype, name) rettype (*name) +# define N_STDCALL_PTR(rettype, name) rettype (*name) +# define N_SYSCALL_PTR(rettype, name) rettype (*name) +# define N_FASTCALL_PTR(rettype, name) rettype (*name) +# define N_SAFECALL_PTR(rettype, name) rettype (*name) + +# define N_LIB_EXPORT extern +# define N_LIB_IMPORT extern +#endif + +#define N_NOCONV(rettype, name) rettype name +/* specify no calling convention */ +#define N_NOCONV_PTR(rettype, name) rettype (*name) + +#define N_CLOSURE(rettype, name) rettype name +/* specify no calling convention */ +#define N_CLOSURE_PTR(rettype, name) rettype (*name) + + +#if defined(__GNUC__) || defined(__ICC__) +# define N_NOINLINE(rettype, name) rettype __attribute__((noinline)) name +#elif defined(_MSC_VER) +# define N_NOINLINE(rettype, name) __declspec(noinline) rettype name +#else +# define N_NOINLINE(rettype, name) rettype name +#endif + +#define N_NOINLINE_PTR(rettype, name) rettype (*name) + +#if defined(__BORLANDC__) || defined(__WATCOMC__) || \ + defined(__POCC__) || defined(_MSC_VER) +/* these compilers have a fastcall so use it: */ +# define N_NIMCALL(rettype, name) rettype __fastcall name +# define N_NIMCALL_PTR(rettype, name) rettype (__fastcall *name) +#else +# define N_NIMCALL(rettype, name) rettype name /* no modifier */ +# define N_NIMCALL_PTR(rettype, name) rettype (*name) +#endif + +/* ----------------------------------------------------------------------- */ + +/* from float_cast.h: */ + +/* +** Copyright (C) 2001 Erik de Castro Lopo <erikd AT mega-nerd DOT com> +** +** Permission to use, copy, modify, distribute, and sell this file for any +** purpose is hereby granted without fee, provided that the above copyright +** and this permission notice appear in all copies. No representations are +** made about the suitability of this software for any purpose. It is +** provided "as is" without express or implied warranty. +*/ + +/* Version 1.1 */ + + +/*============================================================================ +** On Intel Pentium processors (especially PIII and probably P4), converting +** from float to int is very slow. To meet the C specs, the code produced by +** most C compilers targeting Pentium needs to change the FPU rounding mode +** before the float to int conversion is performed. +** +** Changing the FPU rounding mode causes the FPU pipeline to be flushed. It +** is this flushing of the pipeline which is so slow. +** +** Fortunately the ISO C99 specifications define the functions lrint, lrintf, +** llrint and llrintf which fix this problem as a side effect. +** +** On Unix-like systems, the configure process should have detected the +** presence of these functions. If they weren't found we have to replace them +** here with a standard C cast. +*/ + +/* +** The C99 prototypes for lrint and lrintf are as follows: +** +** long int lrintf (float x); +** long int lrint (double x); +*/ + +#if defined(__LCC__) || (defined(__GNUC__) && defined(WIN32)) +/* Linux' GCC does not seem to have these. Why? */ +# define HAVE_LRINT +# define HAVE_LRINTF +#endif + +#if defined(HAVE_LRINT) && defined(HAVE_LRINTF) + +/* These defines enable functionality introduced with the 1999 ISO C +** standard. They must be defined before the inclusion of math.h to +** engage them. If optimisation is enabled, these functions will be +** inlined. With optimisation switched off, you have to link in the +** maths library using -lm. +*/ + +# define _ISOC9X_SOURCE 1 +# define _ISOC99_SOURCE 1 +# define __USE_ISOC9X 1 +# define __USE_ISOC99 1 + +#elif (defined(WIN32) || defined(_WIN32) || defined(__WIN32__)) \ + && !defined(__BORLANDC__) && !defined(__POCC__) + +/* Win32 doesn't seem to have these functions. +** Therefore implement inline versions of these functions here. +*/ +static N_INLINE(long int, lrint)(double flt) { + long int intgr; + _asm { + fld flt + fistp intgr + }; + return intgr; +} + +static N_INLINE(long int, lrintf)(float flt) { + long int intgr; + _asm { + fld flt + fistp intgr + }; + return intgr; +} + +#else + +# ifndef lrint +# define lrint(dbl) ((long int)(dbl)) +# endif +# ifndef lrintf +# define lrintf(flt) ((long int)(flt)) +# endif + +#endif /* defined(HAVE_LRINT) && defined(HAVE_LRINTF) */ + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <stddef.h> +#include <signal.h> +#include <setjmp.h> + +/* +#ifndef INF +static unsigned long nimInf[2]={0xffffffff, 0x7fffffff}; +# define INF (*(double*) nimInf) +#endif */ + +/* C99 compiler? */ +#if (defined(__STD_VERSION__) && (__STD_VERSION__ >= 199901)) +# define HAVE_STDINT_H +#endif + +#if defined(__LCC__) || defined(__DMC__) || defined(__POCC__) +# define HAVE_STDINT_H +#endif + +/* bool types (C++ has it): */ +#ifdef __cplusplus +# ifndef NIM_TRUE +# define NIM_TRUE true +# endif +# ifndef NIM_FALSE +# define NIM_FALSE false +# endif +# define NIM_BOOL bool +#else +# ifdef bool +# define NIM_BOOL bool +# else + typedef unsigned char NIM_BOOL; +# endif +# ifndef NIM_TRUE +# define NIM_TRUE ((NIM_BOOL) 1) +# endif +# ifndef NIM_FALSE +# define NIM_FALSE ((NIM_BOOL) 0) +# endif +#endif + +#define NIM_NIL ((void*)0) /* C's NULL is fucked up in some C compilers, so + the generated code does not rely on it anymore */ + +#if defined(__BORLANDC__) || defined(__DMC__) \ + || defined(__WATCOMC__) || defined(_MSC_VER) +typedef signed char NI8; +typedef signed short int NI16; +typedef signed int NI32; +/* XXX: Float128? */ +typedef unsigned char NU8; +typedef unsigned short int NU16; +typedef unsigned __int64 NU64; +typedef __int64 NI64; +typedef unsigned int NU32; +#elif defined(HAVE_STDINT_H) +# include <stdint.h> +typedef int8_t NI8; +typedef int16_t NI16; +typedef int32_t NI32; +typedef int64_t NI64; +typedef uint64_t NU64; +typedef uint8_t NU8; +typedef uint16_t NU16; +typedef uint32_t NU32; +#else +typedef signed char NI8; +typedef signed short int NI16; +typedef signed int NI32; +/* XXX: Float128? */ +typedef unsigned char NU8; +typedef unsigned short int NU16; +typedef unsigned long long int NU64; +typedef long long int NI64; +typedef unsigned int NU32; +#endif + +typedef float NF32; +typedef double NF64; +typedef double NF; + +typedef char NIM_CHAR; +typedef char* NCSTRING; + +#ifdef NIM_BIG_ENDIAN +# define NIM_IMAN 1 +#else +# define NIM_IMAN 0 +#endif + +static N_INLINE(NI32, float64ToInt32)(double val) { + val = val + 68719476736.0*1.5; + /* 2^36 * 1.5, (52-_shiftamt=36) uses limited precisicion to floor */ + return ((NI32*)&val)[NIM_IMAN] >> 16; /* 16.16 fixed point representation */ +} + +static N_INLINE(NI32, float32ToInt32)(float val) { + return float64ToInt32((double)val); +} + +#define float64ToInt64(x) ((NI64) (x)) + +#define zeroMem(a, size) memset(a, 0, size) +#define equalMem(a, b, size) (memcmp(a, b, size) == 0) + +#define STRING_LITERAL(name, str, length) \ + static const struct { \ + TGenericSeq Sup; \ + NIM_CHAR data[length + 1]; \ + } name = {{length, length}, str} + +typedef struct TStringDesc* string; + +/* declared size of a sequence: */ +#if defined(__GNUC__) +# define SEQ_DECL_SIZE /* empty is correct! */ +#else +# define SEQ_DECL_SIZE 1000000 +#endif + +#define ALLOC_0(size) calloc(1, size) +#define DL_ALLOC_0(size) dlcalloc(1, size) + +#define GenericSeqSize sizeof(TGenericSeq) +#define paramCount() cmdCount + +#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__i386__) +# ifndef NAN +static unsigned long nimNaN[2]={0xffffffff, 0x7fffffff}; +# define NAN (*(double*) nimNaN) +# endif +#endif + +#ifndef NAN +# define NAN (0.0 / 0.0) +#endif + +#ifndef INF +# ifdef INFINITY +# define INF INFINITY +# elif defined(HUGE_VAL) +# define INF HUGE_VAL +# else +# define INF (1.0 / 0.0) +# endif +#endif +/* +typedef struct TSafePoint TSafePoint; +struct TSafePoint { + NI exc; + NCSTRING excname; + NCSTRING msg; + TSafePoint* prev; + jmp_buf context; +}; */ + +typedef struct TFrame TFrame; +struct TFrame { + TFrame* prev; + NCSTRING procname; + NI line; + NCSTRING filename; + NI len; +}; + +extern TFrame* framePtr; +/*extern TSafePoint* excHandler; */ + +#if defined(__cplusplus) +struct NimException { + TSafePoint sp; + + NimException(NI aExc, NCSTRING aExcname, NCSTRING aMsg) { + sp.exc = aExc; sp.excname = aExcname; sp.msg = aMsg; + sp.prev = excHandler; + excHandler = &sp; + } +}; +#endif + +#endif diff --git a/nimlib/pure/cgi.nim b/nimlib/pure/cgi.nim new file mode 100755 index 000000000..baae244e7 --- /dev/null +++ b/nimlib/pure/cgi.nim @@ -0,0 +1,375 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements helper procs for CGI applictions. Example: +## +## .. code-block:: Nimrod +## +## import strtabs, cgi +## +## # Fill the values when debugging: +## when debug: +## setTestData("name", "Klaus", "password", "123456") +## # read the data into `myData` +## var myData = readData() +## # check that the data's variable names are "name" or "passwort" +## validateData(myData, "name", "password") +## # start generating content: +## writeContentType() +## # generate content: +## write(stdout, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n") +## write(stdout, "<html><head><title>Test</title></head><body>\n") +## writeln(stdout, "your name: " & myData["name"]) +## writeln(stdout, "your password: " & myData["password"]) +## writeln(stdout, "</body></html>") + +import strutils, os, strtabs + +proc URLencode*(s: string): string = + ## Encodes a value to be HTTP safe: This means that characters in the set + ## ``{'A'..'Z', 'a'..'z', '0'..'9', '_'}`` are carried over to the result, + ## a space is converted to ``'+'`` and every other character is encoded as + ## ``'%xx'`` where ``xx`` denotes its hexadecimal value. + result = "" + for i in 0..s.len-1: + case s[i] + of 'a'..'z', 'A'..'Z', '0'..'9', '_': add(result, s[i]) + of ' ': add(result, '+') + else: + add(result, '%') + add(result, toHex(ord(s[i]), 2)) + +proc handleHexChar(c: char, x: var int) {.inline.} = + case c + of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) + of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) + of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) + else: assert(false) + +proc URLdecode*(s: string): string = + ## Decodes a value from its HTTP representation: This means that a ``'+'`` + ## is converted to a space, ``'%xx'`` (where ``xx`` denotes a hexadecimal + ## value) is converted to the character with ordinal number ``xx``, and + ## and every other character is carried over. + result = "" + var i = 0 + while i < s.len: + case s[i] + of '%': + var x = 0 + handleHexChar(s[i+1], x) + handleHexChar(s[i+2], x) + inc(i, 2) + add(result, chr(x)) + of '+': add(result, ' ') + else: add(result, s[i]) + inc(i) + +proc addXmlChar(dest: var string, c: Char) {.inline.} = + case c + of '&': add(dest, "&") + of '<': add(dest, "<") + of '>': add(dest, ">") + of '\"': add(dest, """) + else: add(dest, c) + +proc XMLencode*(s: string): string = + ## Encodes a value to be XML safe: + ## * ``"`` is replaced by ``"`` + ## * ``<`` is replaced by ``<`` + ## * ``>`` is replaced by ``>`` + ## * ``&`` is replaced by ``&`` + ## * every other character is carried over. + result = "" + for i in 0..len(s)-1: addXmlChar(result, s[i]) + +type + ECgi* = object of EIO ## the exception that is raised, if a CGI error occurs + TRequestMethod* = enum ## the used request method + methodNone, ## no REQUEST_METHOD environment variable + methodPost, ## query uses the POST method + methodGet ## query uses the GET method + +proc cgiError*(msg: string) {.noreturn.} = + ## raises an ECgi exception with message `msg`. + var e: ref ECgi + new(e) + e.msg = msg + raise e + +proc getEncodedData(allowedMethods: set[TRequestMethod]): string = + case getenv("REQUEST_METHOD") + of "POST": + if methodPost notin allowedMethods: + cgiError("'REQUEST_METHOD' 'POST' is not supported") + var L = parseInt(getenv("CONTENT_LENGTH")) + result = newString(L) + if readBuffer(stdin, addr(result[0]), L) != L: + cgiError("cannot read from stdin") + of "GET": + if methodGet notin allowedMethods: + cgiError("'REQUEST_METHOD' 'GET' is not supported") + result = getenv("QUERY_STRING") + else: + if methodNone notin allowedMethods: + cgiError("'REQUEST_METHOD' must be 'POST' or 'GET'") + +iterator decodeData*(allowedMethods: set[TRequestMethod] = + {methodNone, methodPost, methodGet}): tuple[key, value: string] = + ## Reads and decodes CGI data and yields the (name, value) pairs the + ## data consists of. If the client does not use a method listed in the + ## `allowedMethods` set, an `ECgi` exception is raised. + var enc = getEncodedData(allowedMethods) + if not isNil(enc): + # decode everything in one pass: + var i = 0 + var name = "" + var value = "" + while enc[i] != '\0': + setLen(name, 0) # reuse memory + while true: + case enc[i] + of '\0': break + of '%': + var x = 0 + handleHexChar(enc[i+1], x) + handleHexChar(enc[i+2], x) + inc(i, 2) + add(name, chr(x)) + of '+': add(name, ' ') + of '=', '&': break + else: add(name, enc[i]) + inc(i) + if enc[i] != '=': cgiError("'=' expected") + inc(i) # skip '=' + setLen(value, 0) # reuse memory + while true: + case enc[i] + of '%': + var x = 0 + handleHexChar(enc[i+1], x) + handleHexChar(enc[i+2], x) + inc(i, 2) + add(value, chr(x)) + of '+': add(value, ' ') + of '&', '\0': break + else: add(value, enc[i]) + inc(i) + yield (name, value) + if enc[i] == '&': inc(i) + elif enc[i] == '\0': break + else: cgiError("'&' expected") + +proc readData*(allowedMethods: set[TRequestMethod] = + {methodNone, methodPost, methodGet}): PStringTable = + ## Read CGI data. If the client does not use a method listed in the + ## `allowedMethods` set, an `ECgi` exception is raised. + result = newStringTable() + for name, value in decodeData(allowedMethods): + result[name] = value + +proc validateData*(data: PStringTable, validKeys: openarray[string]) = + ## validates data; raises `ECgi` if this fails. This checks that each variable + ## name of the CGI `data` occurs in the `validKeys` array. + for key, val in pairs(data): + if find(validKeys, key) < 0: + cgiError("unknown variable name: " & key) + +proc getContentLength*(): string = + ## returns contents of the ``CONTENT_LENGTH`` environment variable + return getenv("CONTENT_LENGTH") + +proc getContentType*(): string = + ## returns contents of the ``CONTENT_TYPE`` environment variable + return getenv("CONTENT_Type") + +proc getDocumentRoot*(): string = + ## returns contents of the ``DOCUMENT_ROOT`` environment variable + return getenv("DOCUMENT_ROOT") + +proc getGatewayInterface*(): string = + ## returns contents of the ``GATEWAY_INTERFACE`` environment variable + return getenv("GATEWAY_INTERFACE") + +proc getHttpAccept*(): string = + ## returns contents of the ``HTTP_ACCEPT`` environment variable + return getenv("HTTP_ACCEPT") + +proc getHttpAcceptCharset*(): string = + ## returns contents of the ``HTTP_ACCEPT_CHARSET`` environment variable + return getenv("HTTP_ACCEPT_CHARSET") + +proc getHttpAcceptEncoding*(): string = + ## returns contents of the ``HTTP_ACCEPT_ENCODING`` environment variable + return getenv("HTTP_ACCEPT_ENCODING") + +proc getHttpAcceptLanguage*(): string = + ## returns contents of the ``HTTP_ACCEPT_LANGUAGE`` environment variable + return getenv("HTTP_ACCEPT_LANGUAGE") + +proc getHttpConnection*(): string = + ## returns contents of the ``HTTP_CONNECTION`` environment variable + return getenv("HTTP_CONNECTION") + +proc getHttpCookie*(): string = + ## returns contents of the ``HTTP_COOKIE`` environment variable + return getenv("HTTP_COOKIE") + +proc getHttpHost*(): string = + ## returns contents of the ``HTTP_HOST`` environment variable + return getenv("HTTP_HOST") + +proc getHttpReferer*(): string = + ## returns contents of the ``HTTP_REFERER`` environment variable + return getenv("HTTP_REFERER") + +proc getHttpUserAgent*(): string = + ## returns contents of the ``HTTP_USER_AGENT`` environment variable + return getenv("HTTP_USER_AGENT") + +proc getPathInfo*(): string = + ## returns contents of the ``PATH_INFO`` environment variable + return getenv("PATH_INFO") + +proc getPathTranslated*(): string = + ## returns contents of the ``PATH_TRANSLATED`` environment variable + return getenv("PATH_TRANSLATED") + +proc getQueryString*(): string = + ## returns contents of the ``QUERY_STRING`` environment variable + return getenv("QUERY_STRING") + +proc getRemoteAddr*(): string = + ## returns contents of the ``REMOTE_ADDR`` environment variable + return getenv("REMOTE_ADDR") + +proc getRemoteHost*(): string = + ## returns contents of the ``REMOTE_HOST`` environment variable + return getenv("REMOTE_HOST") + +proc getRemoteIdent*(): string = + ## returns contents of the ``REMOTE_IDENT`` environment variable + return getenv("REMOTE_IDENT") + +proc getRemotePort*(): string = + ## returns contents of the ``REMOTE_PORT`` environment variable + return getenv("REMOTE_PORT") + +proc getRemoteUser*(): string = + ## returns contents of the ``REMOTE_USER`` environment variable + return getenv("REMOTE_USER") + +proc getRequestMethod*(): string = + ## returns contents of the ``REQUEST_METHOD`` environment variable + return getenv("REQUEST_METHOD") + +proc getRequestURI*(): string = + ## returns contents of the ``REQUEST_URI`` environment variable + return getenv("REQUEST_URI") + +proc getScriptFilename*(): string = + ## returns contents of the ``SCRIPT_FILENAME`` environment variable + return getenv("SCRIPT_FILENAME") + +proc getScriptName*(): string = + ## returns contents of the ``SCRIPT_NAME`` environment variable + return getenv("SCRIPT_NAME") + +proc getServerAddr*(): string = + ## returns contents of the ``SERVER_ADDR`` environment variable + return getenv("SERVER_ADDR") + +proc getServerAdmin*(): string = + ## returns contents of the ``SERVER_ADMIN`` environment variable + return getenv("SERVER_ADMIN") + +proc getServerName*(): string = + ## returns contents of the ``SERVER_NAME`` environment variable + return getenv("SERVER_NAME") + +proc getServerPort*(): string = + ## returns contents of the ``SERVER_PORT`` environment variable + return getenv("SERVER_PORT") + +proc getServerProtocol*(): string = + ## returns contents of the ``SERVER_PROTOCOL`` environment variable + return getenv("SERVER_PROTOCOL") + +proc getServerSignature*(): string = + ## returns contents of the ``SERVER_SIGNATURE`` environment variable + return getenv("SERVER_SIGNATURE") + +proc getServerSoftware*(): string = + ## returns contents of the ``SERVER_SOFTWARE`` environment variable + return getenv("SERVER_SOFTWARE") + +proc setTestData*(keysvalues: openarray[string]) = + ## fills the appropriate environment variables to test your CGI application. + ## This can only simulate the 'GET' request method. `keysvalues` should + ## provide embedded (name, value)-pairs. Example: + ## + ## .. code-block:: Nimrod + ## setTestData("name", "Hanz", "password", "12345") + putenv("REQUEST_METHOD", "GET") + var i = 0 + var query = "" + while i < keysvalues.len: + add(query, URLencode(keysvalues[i])) + add(query, '=') + add(query, URLencode(keysvalues[i+1])) + add(query, '&') + inc(i, 2) + putenv("QUERY_STRING", query) + +proc writeContentType*() = + ## call this before starting to send your HTML data to `stdout`. This + ## implements this part of the CGI protocol: + ## + ## .. code-block:: Nimrod + ## write(stdout, "Content-type: text/html\n\n") + ## + ## It also modifies the debug stack traces so that they contain + ## ``<br />`` and are easily readable in a browser. + write(stdout, "Content-type: text/html\n\n") + system.stackTraceNewLine = "<br />\n" + +proc setCookie*(name, value: string) = + ## Sets a cookie. + write(stdout, "Set-Cookie: ", name, "=", value, "\n") + +var + cookies: PStringTable = nil + +proc parseCookies(s: string): PStringTable = + result = newStringTable(modeCaseInsensitive) + var i = 0 + while true: + while s[i] == ' ' or s[i] == '\t': inc(i) + var keystart = i + while s[i] != '=' and s[i] != '\0': inc(i) + var keyend = i-1 + if s[i] == '\0': break + inc(i) # skip '=' + var valstart = i + while s[i] != ';' and s[i] != '\0': inc(i) + result[copy(s, keystart, keyend)] = copy(s, valstart, i-1) + if s[i] == '\0': break + inc(i) # skip ';' + +proc getCookie*(name: string): string = + ## Gets a cookie. If no cookie of `name` exists, "" is returned. + if cookies == nil: cookies = parseCookies(getHttpCookie()) + result = cookies[name] + +proc existsCookie*(name: string): bool = + ## Checks if a cookie of `name` exists. + if cookies == nil: cookies = parseCookies(getHttpCookie()) + result = hasKey(cookies) + + diff --git a/nimlib/pure/complex.nim b/nimlib/pure/complex.nim new file mode 100755 index 000000000..f50ff4bd0 --- /dev/null +++ b/nimlib/pure/complex.nim @@ -0,0 +1,106 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2006 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + + + +## This module implements complex numbers. + +{.push checks:off, line_dir:off, stack_trace:off, debugger:off.} +# the user does not want to trace a part +# of the standard library! + +import + math + +type + TComplex* = tuple[re, im: float] + ## a complex number, consisting of a real and an imaginary part + +proc `==` *(x, y: TComplex): bool = + ## Compare two complex numbers `x` and `y` for equality. + result = x.re == y.re and x.im == y.im + +proc `+` *(x, y: TComplex): TComplex = + ## Add two complex numbers. + result.re = x.re + y.re + result.im = x.im + y.im + +proc `-` *(x, y: TComplex): TComplex = + ## Subtract two complex numbers. + result.re = x.re - y.re + result.im = x.im - y.im + +proc `-` *(z: TComplex): TComplex = + ## Unary minus for complex numbers. + result.re = -z.re + result.im = -z.im + +proc `/` *(x, y: TComplex): TComplex = + ## Divide `x` by `y`. + var + r, den: float + if abs(y.re) < abs(y.im): + r = y.re / y.im + den = y.im + r * y.re + result.re = (x.re * r + x.im) / den + result.im = (x.im * r - x.re) / den + else: + r = y.im / y.re + den = y.re + r * y.im + result.re = (x.re + r * x.im) / den + result.im = (x.im - r * x.re) / den + +proc `*` *(x, y: TComplex): TComplex = + ## Multiply `x` with `y`. + result.re = x.re * y.re - x.im * y.im + result.im = x.im * y.re + x.re * y.im + +proc abs*(z: TComplex): float = + ## Return the distance from (0,0) to `z`. + + # optimized by checking special cases (sqrt is expensive) + var x, y, temp: float + + x = abs(z.re) + y = abs(z.im) + if x == 0.0: + result = y + elif y == 0.0: + result = x + elif x > y: + temp = y / x + result = x * sqrt(1.0 + temp * temp) + else: + temp = x / y + result = y * sqrt(1.0 + temp * temp) + +proc sqrt*(z: TComplex): TComplex = + ## Square root for a complex number `z`. + var x, y, w, r: float + + if z.re == 0.0 and z.im == 0.0: + result = z + else: + x = abs(z.re) + y = abs(z.im) + if x >= y: + r = y / x + w = sqrt(x) * sqrt(0.5 * (1.0 + sqrt(1.0 + r * r))) + else: + r = x / y + w = sqrt(y) * sqrt(0.5 * (r + sqrt(1.0 + r * r))) + if z.re >= 0.0: + result.re = w + result.im = z.im / (w * 2) + else: + if z.im >= 0.0: result.im = w + else: result.im = -w + result.re = z.im / (c.im + c.im) + +{.pop.} diff --git a/nimlib/pure/dynlib.nim b/nimlib/pure/dynlib.nim new file mode 100755 index 000000000..592073e3d --- /dev/null +++ b/nimlib/pure/dynlib.nim @@ -0,0 +1,84 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements the ability to access symbols from shared +## libraries. On POSIX this uses the ``dlsym`` mechanism, on +## Windows ``LoadLibrary``. + +type + TLibHandle* = pointer ## a handle to a dynamically loaded library + +proc LoadLib*(path: string): TLibHandle + ## loads a library from `path`. Returns nil if the library could not + ## be loaded. + +proc UnloadLib*(lib: TLibHandle) + ## unloads the library `lib` + +proc symAddr*(lib: TLibHandle, name: string): pointer + ## retrieves the address of a procedure/variable from `lib`. Returns nil + ## if the symbol could not be found. + +proc checkedSymAddr*(lib: TLibHandle, name: string): pointer = + ## retrieves the address of a procedure/variable from `lib`. Raises + ## `EInvalidLibrary` if the symbol could not be found. + result = symAddr(lib, name) + if result == nil: + var e: ref EInvalidLibrary + new(e) + e.msg = "could not find symbol: " & name + raise e + +when defined(posix): + # + # ========================================================================= + # This is an implementation based on the dlfcn interface. + # The dlfcn interface is available in Linux, SunOS, Solaris, IRIX, FreeBSD, + # NetBSD, AIX 4.2, HPUX 11, and probably most other Unix flavors, at least + # as an emulation layer on top of native functions. + # ========================================================================= + # + var + RTLD_NOW {.importc: "RTLD_NOW", header: "<dlfcn.h>".}: int + + proc dlclose(lib: TLibHandle) {.importc, header: "<dlfcn.h>".} + proc dlopen(path: CString, mode: int): TLibHandle {. + importc, header: "<dlfcn.h>".} + proc dlsym(lib: TLibHandle, name: cstring): pointer {. + importc, header: "<dlfcn.h>".} + + proc LoadLib(path: string): TLibHandle = return dlopen(path, RTLD_NOW) + proc UnloadLib(lib: TLibHandle) = dlclose(lib) + proc symAddr(lib: TLibHandle, name: string): pointer = + return dlsym(lib, name) + +elif defined(windows) or defined(dos): + # + # ======================================================================= + # Native Windows Implementation + # ======================================================================= + # + type + THINSTANCE {.importc: "HINSTANCE".} = pointer + + proc FreeLibrary(lib: THINSTANCE) {.importc, header: "<windows.h>", stdcall.} + proc winLoadLibrary(path: cstring): THINSTANCE {. + importc: "LoadLibraryA", header: "<windows.h>", stdcall.} + proc GetProcAddress(lib: THINSTANCE, name: cstring): pointer {. + importc: "GetProcAddress", header: "<windows.h>", stdcall.} + + proc LoadLib(path: string): TLibHandle = + result = cast[TLibHandle](winLoadLibrary(path)) + proc UnloadLib(lib: TLibHandle) = FreeLibrary(cast[THINSTANCE](lib)) + + proc symAddr(lib: TLibHandle, name: string): pointer = + result = GetProcAddress(cast[THINSTANCE](lib), name) + +else: + {.error: "no implementation for dynlib".} diff --git a/nimlib/pure/hashes.nim b/nimlib/pure/hashes.nim new file mode 100755 index 000000000..1593119bd --- /dev/null +++ b/nimlib/pure/hashes.nim @@ -0,0 +1,97 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2008 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements efficient computations of hash values for diverse +## Nimrod types. + +import + strutils + +type + THash* = int ## a hash value; hash tables using these values should + ## always have a size of a power of two and can use the ``and`` + ## operator instead of ``mod`` for truncation of the hash value. + +proc concHash(h: THash, val: int): THash {.inline.} = + result = h +% val + result = result +% result shl 10 + result = result xor (result shr 6) + +proc finishHash(h: THash): THash {.inline.} = + result = h +% h shl 3 + result = result xor (result shr 11) + result = result +% result shl 15 + +proc hashData*(Data: Pointer, Size: int): THash = + ## hashes an array of bytes of size `size` + var + h: THash + p: cstring + i, s: int + h = 0 + p = cast[cstring](Data) + i = 0 + s = size + while s > 0: + h = concHash(h, ord(p[i])) + Inc(i) + Dec(s) + result = finishHash(h) + +proc hash*(x: Pointer): THash {.inline.} = + ## efficient hashing of pointers + result = (cast[THash](x)) shr 3 # skip the alignment + +proc hash*(x: int): THash {.inline.} = + ## efficient hashing of integers + result = x + +proc hash*(x: int64): THash {.inline.} = + ## efficient hashing of integers + result = toU32(x) + +proc hash*(x: char): THash {.inline.} = + ## efficient hashing of characters + result = ord(x) + +proc hash*(x: string): THash = + ## efficient hashing of strings + var h: THash + h = 0 + for i in 0..x.len-1: + h = concHash(h, ord(x[i])) + result = finishHash(h) + +proc hashIgnoreStyle*(x: string): THash = + ## efficient hashing of strings; style is ignored + var + h: THash + c: Char + h = 0 + for i in 0..x.len-1: + c = x[i] + if c == '_': + continue # skip _ + if c in {'A'..'Z'}: + c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() + h = concHash(h, ord(c)) + result = finishHash(h) + +proc hashIgnoreCase*(x: string): THash = + ## efficient hashing of strings; case is ignored + var + h: THash + c: Char + h = 0 + for i in 0..x.len-1: + c = x[i] + if c in {'A'..'Z'}: + c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() + h = concHash(h, ord(c)) + result = finishHash(h) diff --git a/nimlib/pure/hashtabs.nim b/nimlib/pure/hashtabs.nim new file mode 100755 index 000000000..68d19d63b --- /dev/null +++ b/nimlib/pure/hashtabs.nim @@ -0,0 +1,163 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## The ``hashtabs`` module implements an efficient generic hash +## table/dictionary data type. + +import + hashes + +const + growthFactor = 2 + startSize = 8 + sham = sizeof(THash)*8-2 # shift amount + mask = 0b11 shl sham + usedSlot = 0b10 shl sham + delSlot = 0b01 shl sham + emptySlot = 0 + +type + TTable*[TKey, TValue] = object + counter: int + data: seq[tuple[key: TKey, val: TValue, h: THash]] + +proc init*(t: var TTable, size = startSize) = + t.counter = 0 + newSeq(t.data, size) + +proc markUsed(h: THash): THash {.inline.} = + return h and not mask or usedSlot + +proc len*(t: TTable): int {.inline.} = + ## returns the number of keys in `t`. + result = t.counter + +proc mustRehash(length, counter: int): bool = + assert(length > counter) + result = (length * 2 < counter * 3) or (length - counter < 4) + +proc nextTry(h, maxHash: THash): THash {.inline.} = + result = ((5 * h) + 1) and maxHash + +template eq(a, b: expr): expr = a == b + +proc rawGet(t: TTable, key: TKey, fullhash: THash): int = + var h = fullhash and high(t.data) + while (t.data[h].h and mask) != 0: + # If it is a deleted entry, the comparison with ``markUsed(fullhash)`` + # fails, so there is no need to check for this explicitely. + if t.data[h].h == markUsed(fullhash) and eq(t.data[h].key, key): return h + h = nextTry(h, high(t.data)) + result = - 1 + +proc `[]`*(t: TTable, key: TKey): TValue = + ## retrieves the value at ``t[key]``. If `key` is not in `t`, + ## `EInvalidValue` is raised. + var index = rawGet(t, key, hash(key)) + if index >= 0: result = t.data[index].val + else: + var e: ref EInvalidValue + new(e) + e.msg = "invalid key: " & $key + raise e + +proc hasKey*(t: TTable, key: TKey): bool = + ## returns true iff `key` is in the table `t`. + result = rawGet(t, key) >= 0 + +proc rawInsert[TKey, TValue]( + data: var seq[tuple[key: TKey, val: TValue, h: THash]], + tup: tuple[key: TKey, val: TValue, h: THash]) = + var h = tup.h and high(data) + while (data[h].h and mask) == usedSlot: h = nextTry(h, high(data)) + data[h] = tup + +proc enlarge(t: var TTable) = + var n: seq[tuple[key: TKey, val: TValue, h: THash]] + newSeq(n, len(t.data) * growthFactor) + for i in 0..high(t.data): + if (t.data[i].h and mask) == usedSlot: rawInsert(n, t.data[i]) + swap(t.data, n) + +proc `[]=`*(t: var TTable, key: TKey, val: TValue) = + ## puts a (key, value)-pair into `t`. + var fullhash = hash(key) + var index = rawGet(t, key, fullhash) + if index >= 0: + t.data[index].val = val + else: + if mustRehash(len(t.data), t.counter): enlarge(t) + rawInsert(t.data, (key, val, markUsed(fullhash))) + inc(t.counter) + +proc add*(t: var TTable, key: TKey, val: TValue) = + ## puts a (key, value)-pair into `t`, but does not check if key already + ## exists. + if mustRehash(len(t.data), t.counter): enlarge(t) + rawInsert(t.data, (key, val, markUsed(hash(key)))) + inc(t.counter) + +proc del*(t: var TTable, key: TKey) = + ## deletes a (key, val)-pair in `t`. + var index = rawGet(t, key) + if index >= 0: + t.data[index].h = delSlot + +proc delAll*(t: var TTable, key: TKey) = + ## deletes all (key, val)-pairs in `t`. + while true: + var index = rawGet(t, key) + if index < 0: break + t.data[index].h = delSlot + +iterator pairs*(t: TTable): tuple[key: TKey, value: TValue] = + ## iterates over any (key, value) pair in the table `t`. + for h in 0..high(t.data): + if (t.data[h].h and mask) == usedSlot: + yield (t.data[h].key, t.data[h].val) + +iterator keys*(t: TTable): TKey = + ## iterate over any key in the table `t`. If key occurs multiple times, it + ## is yielded multiple times. + for h in 0..high(t.data): + if (t.data[h].h and mask) == usedSlot: + yield t.data[h].key + +iterator values*(t: TTable): TValue = + ## iterate over any value in the table `t`. + for h in 0..high(t.data): + if (t.data[h].h and mask) == usedSlot: + yield t.data[h].val + +iterator values*(t: TTable, key: TKey): TValue = + ## iterate over any value associated with `key` in `t`. + var fullhash = hash(key) + var h = fullhash and high(t.data) + while (t.data[h].h and mask) != 0: + # If it is a deleted entry, the comparison with ``markUsed(fullhash)`` + # fails, so there is no need to check for this explicitely. + if t.data[h].h == markUsed(fullhash) and eq(t.data[h].key, key): + yield t.data[h].val + h = nextTry(h, high(t.data)) + +proc `$`*[KeyToStr=`$`, ValueToStr=`$`](t: TTable): string = + ## turns the table into its string representation. `$` must be available + ## for TKey and TValue for this to work. + if t.len == 0: + result = "{:}" + else: + result = "{" + var i = 0 + for k, v in pairs(t): + if i > 0: add(result, ", ") + add(result, KeyToStr(k)) + add(result, ": ") + add(result, ValueToStr(v)) + inc(i) + add(result, "}") diff --git a/nimlib/pure/lexbase.nim b/nimlib/pure/lexbase.nim new file mode 100755 index 000000000..bb207e92a --- /dev/null +++ b/nimlib/pure/lexbase.nim @@ -0,0 +1,166 @@ +# +# +# The Nimrod Compiler +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a base object of a lexer with efficient buffer +## handling. Only at line endings checks are necessary if the buffer +## needs refilling. + +import + strutils, streams + +const + EndOfFile* = '\0' ## end of file marker + NewLines* = {'\c', '\L'} + +# Buffer handling: +# buf: +# "Example Text\n ha!" bufLen = 17 +# ^pos = 0 ^ sentinel = 12 +# + +type + TBaseLexer* = object of TObject ## the base lexer. Inherit your lexer from + ## this object. + bufpos*: int ## the current position within the buffer + buf*: cstring ## the buffer itself + bufLen*: int ## length of buffer in characters + input: PStream ## the input stream + LineNumber*: int ## the current line number + sentinel: int + lineStart: int # index of last line start in buffer + fileOpened: bool + +proc open*(L: var TBaseLexer, input: PStream, bufLen: int = 8192) + ## inits the TBaseLexer with a stream to read from + +proc close*(L: var TBaseLexer) + ## closes the base lexer. This closes `L`'s associated stream too. + +proc getCurrentLine*(L: TBaseLexer, marker: bool = true): string + ## retrieves the current line. + +proc getColNumber*(L: TBaseLexer, pos: int): int + ## retrieves the current column. + +proc HandleCR*(L: var TBaseLexer, pos: int): int + ## Call this if you scanned over '\c' in the buffer; it returns the the + ## position to continue the scanning from. `pos` must be the position + ## of the '\c'. +proc HandleLF*(L: var TBaseLexer, pos: int): int + ## Call this if you scanned over '\L' in the buffer; it returns the the + ## position to continue the scanning from. `pos` must be the position + ## of the '\L'. + +# implementation + +const + chrSize = sizeof(char) + +proc close(L: var TBaseLexer) = + dealloc(L.buf) + L.input.close(L.input) + +proc FillBuffer(L: var TBaseLexer) = + var + charsRead, toCopy, s: int # all are in characters, + # not bytes (in case this + # is not the same) + oldBufLen: int + # we know here that pos == L.sentinel, but not if this proc + # is called the first time by initBaseLexer() + assert(L.sentinel < L.bufLen) + toCopy = L.BufLen - L.sentinel - 1 + assert(toCopy >= 0) + if toCopy > 0: + MoveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize) # "moveMem" handles overlapping regions + charsRead = L.input.readData(L.input, addr(L.buf[toCopy]), + (L.sentinel + 1) * chrSize) div chrSize + s = toCopy + charsRead + if charsRead < L.sentinel + 1: + L.buf[s] = EndOfFile # set end marker + L.sentinel = s + else: + # compute sentinel: + dec(s) # BUGFIX (valgrind) + while true: + assert(s < L.bufLen) + while (s >= 0) and not (L.buf[s] in NewLines): Dec(s) + if s >= 0: + # we found an appropriate character for a sentinel: + L.sentinel = s + break + else: + # rather than to give up here because the line is too long, + # double the buffer's size and try again: + oldBufLen = L.BufLen + L.bufLen = L.BufLen * 2 + L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize)) + assert(L.bufLen - oldBuflen == oldBufLen) + charsRead = L.input.ReadData(L.input, addr(L.buf[oldBufLen]), + oldBufLen * chrSize) div chrSize + if charsRead < oldBufLen: + L.buf[oldBufLen + charsRead] = EndOfFile + L.sentinel = oldBufLen + charsRead + break + s = L.bufLen - 1 + +proc fillBaseLexer(L: var TBaseLexer, pos: int): int = + assert(pos <= L.sentinel) + if pos < L.sentinel: + result = pos + 1 # nothing to do + else: + fillBuffer(L) + L.bufpos = 0 # XXX: is this really correct? + result = 0 + L.lineStart = result + +proc HandleCR(L: var TBaseLexer, pos: int): int = + assert(L.buf[pos] == '\c') + inc(L.linenumber) + result = fillBaseLexer(L, pos) + if L.buf[result] == '\L': + result = fillBaseLexer(L, result) + +proc HandleLF(L: var TBaseLexer, pos: int): int = + assert(L.buf[pos] == '\L') + inc(L.linenumber) + result = fillBaseLexer(L, pos) #L.lastNL := result-1; // BUGFIX: was: result; + +proc skip_UTF_8_BOM(L: var TBaseLexer) = + if (L.buf[0] == '\xEF') and (L.buf[1] == '\xBB') and (L.buf[2] == '\xBF'): + inc(L.bufpos, 3) + inc(L.lineStart, 3) + +proc open(L: var TBaseLexer, input: PStream, bufLen: int = 8192) = + assert(bufLen > 0) + assert(input != nil) + L.input = input + L.bufpos = 0 + L.bufLen = bufLen + L.buf = cast[cstring](alloc(bufLen * chrSize)) + L.sentinel = bufLen - 1 + L.lineStart = 0 + L.linenumber = 1 # lines start at 1 + fillBuffer(L) + skip_UTF_8_BOM(L) + +proc getColNumber(L: TBaseLexer, pos: int): int = + result = abs(pos - L.lineStart) + +proc getCurrentLine(L: TBaseLexer, marker: bool = true): string = + var i: int + result = "" + i = L.lineStart + while not (L.buf[i] in {'\c', '\L', EndOfFile}): + add(result, L.buf[i]) + inc(i) + add(result, "\n") + if marker: + add(result, RepeatChar(getColNumber(L, L.bufpos)) & "^\n") + diff --git a/nimlib/pure/logging.nim b/nimlib/pure/logging.nim new file mode 100755 index 000000000..6df39f50b --- /dev/null +++ b/nimlib/pure/logging.nim @@ -0,0 +1,146 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a simple logger. It is based on the following design: +## * Runtime log formating is a bug: Sooner or later ever log file is parsed. +## * Keep it simple: If this library does not fullfill your needs, write your +## own. Trying to support every logging feature just leads to bloat. +## +## Format is:: +## +## DEBUG|INFO|... (2009-11-02 00:00:00)? (Component: )? Message +## +## + +type + TLevel* = enum ## logging level + lvlAll, ## all levels active + lvlDebug, ## debug level (and any above) active + lvlInfo, ## info level (and any above) active + lvlWarn, ## warn level (and any above) active + lvlError, ## error level (and any above) active + lvlFatal ## fatal level (and any above) active + +const + LevelNames*: array [TLevel, string] = [ + "DEBUG", "DEBUG", "INFO", "WARN", "ERROR", "FATAL" + ] + +type + TLogger* = object of TObject ## abstract logger; the base type of all loggers + levelThreshold*: TLevel ## only messages of level >= levelThreshold + ## should be processed + TConsoleLogger* = object of TLogger ## logger that writes the messages to the + ## console + + TFileLogger* = object of TLogger ## logger that writes the messages to a file + f: TFile + + TRollingFileLogger* = object of + TFileLogger ## logger that writes the message to a file + maxlines: int # maximum number of lines + lines: seq[string] + +method log*(L: ref TLogger, level: TLevel, + frmt: string, args: openArray[string]) = + ## override this method in custom loggers. Default implementation does + ## nothing. + nil + +method log*(L: ref TConsoleLogger, level: TLevel, + frmt: string, args: openArray[string]) = + Writeln(stdout, LevelNames[level], " ", frmt % args) + +method log*(L: ref TFileLogger, level: TLevel, + frmt: string, args: openArray[string]) = + Writeln(L.f, LevelNames[level], " ", frmt % args) + +proc defaultFilename*(): string = + ## returns the default filename for a logger + var (path, name, ext) = splitFile(getApplicationFilename()) + result = changeFileExt(path / name & "_" & getDateStr(), "log") + +proc substituteLog*(frmt: string): string = + ## converts $date to the current date + ## converts $time to the current time + ## converts $app to getApplicationFilename() + ## converts + result = "" + var i = 0 + while i < frmt.len: + if frmt[i] != '$': + result.add(frmt[i]) + inc(i) + else: + inc(i) + var v = "" + var app = getApplicationFilename() + while frmt[i] in IdentChars: + v.add(toLower(frmt[i])) + inc(i) + case v + of "date": result.add(getDateStr()) + of "time": result.add(getClockStr()) + of "app": result.add(app) + of "appdir": result.add(app.splitFile.dir) + of "appname": result.add(app.splitFile.name) + + +proc newFileLogger(filename = defaultFilename(), + mode: TFileMode = fmAppend, + levelThreshold = lvlNone): ref TFileLogger = + new(result) + result.levelThreshold = levelThreshold + if not open(result.f, filename, mode): + raiseException(EIO, "cannot open for writing: " & filename) + +proc newRollingFileLogger(filename = defaultFilename(), + mode: TFileMode = fmAppend, + levelThreshold = lvlNone, + maxLines = 1000): ref TFileLogger = + new(result) + result.levelThreshold = levelThreshold + result.maxLines = maxLines + if not open(result.f, filename, mode): + raiseException(EIO, "cannot open for writing: " & filename) + +var + level* = lvlNone + handlers*: seq[ref TLogger] = @[] + +proc logLoop(level: TLevel, msg: string) = + for logger in items(handlers): + if level >= logger.levelThreshold: + log(logger, level, msg) + +template log*(level: TLevel, msg: string) = + ## logs a message of the given level + if level >= logging.Level: + (bind logLoop)(level, frmt, args) + +template debug*(msg: string) = + ## logs a debug message + log(lvlDebug, msg) + +template info*(msg: string) = + ## logs an info message + log(lvlInfo, msg) + +template warn*(msg: string) = + ## logs a warning message + log(lvlWarn, msg) + +template error*(msg: string) = + ## logs an error message + log(lvlError, msg) + +template fatal*(msg: string) = + ## logs a fatal error message and calls ``quit(msg)`` + log(lvlFatal, msg) + diff --git a/nimlib/pure/macros.nim b/nimlib/pure/macros.nim new file mode 100755 index 000000000..677469ed2 --- /dev/null +++ b/nimlib/pure/macros.nim @@ -0,0 +1,249 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + + +## This module contains the interface to the compiler's abstract syntax +## tree (`AST`:idx:). Macros operate on this tree. + +## .. include:: ../doc/astspec.txt + +#[[[cog +#def toEnum(name, elems): +# body = "" +# counter = 0 +# for e in elems: +# if counter % 4 == 0: p = "\n " +# else: p = "" +# body = body + p + 'n' + e + ', ' +# counter = counter + 1 +# +# return (" TNimrod%s* = enum%s\n TNim%ss* = set[TNimrod%s]\n" % +# (name, body[:-2], name, name)) +# +#enums = eval(open("data/ast.yml").read()) +#cog.out("type\n") +#for key, val in enums.items(): +# if key[-4:] == "Flag": continue +# cog.out(toEnum(key, val)) +#]]] +type + TNimrodNodeKind* = enum + nnkNone, nnkEmpty, nnkIdent, nnkSym, + nnkType, nnkCharLit, nnkIntLit, nnkInt8Lit, + nnkInt16Lit, nnkInt32Lit, nnkInt64Lit, nnkFloatLit, + nnkFloat32Lit, nnkFloat64Lit, nnkStrLit, nnkRStrLit, + nnkTripleStrLit, nnkMetaNode, nnkNilLit, nnkDotCall, + nnkCommand, nnkCall, nnkCallStrLit, nnkExprEqExpr, + nnkExprColonExpr, nnkIdentDefs, nnkVarTuple, nnkInfix, + nnkPrefix, nnkPostfix, nnkPar, nnkCurly, + nnkBracket, nnkBracketExpr, nnkPragmaExpr, nnkRange, + nnkDotExpr, nnkCheckedFieldExpr, nnkDerefExpr, nnkIfExpr, + nnkElifExpr, nnkElseExpr, nnkLambda, nnkAccQuoted, + nnkTableConstr, nnkBind, nnkSymChoice, nnkHiddenStdConv, + nnkHiddenSubConv, nnkHiddenCallConv, nnkConv, nnkCast, + nnkAddr, nnkHiddenAddr, nnkHiddenDeref, nnkObjDownConv, + nnkObjUpConv, nnkChckRangeF, nnkChckRange64, nnkChckRange, + nnkStringToCString, nnkCStringToString, nnkPassAsOpenArray, nnkAsgn, + nnkFastAsgn, nnkGenericParams, nnkFormalParams, nnkOfInherit, + nnkModule, nnkProcDef, nnkMethodDef, nnkConverterDef, + nnkMacroDef, nnkTemplateDef, nnkIteratorDef, nnkOfBranch, + nnkElifBranch, nnkExceptBranch, nnkElse, nnkMacroStmt, + nnkAsmStmt, nnkPragma, nnkIfStmt, nnkWhenStmt, + nnkForStmt, nnkWhileStmt, nnkCaseStmt, nnkVarSection, + nnkConstSection, nnkConstDef, nnkTypeSection, nnkTypeDef, + nnkYieldStmt, nnkTryStmt, nnkFinally, nnkRaiseStmt, + nnkReturnStmt, nnkBreakStmt, nnkContinueStmt, nnkBlockStmt, + nnkDiscardStmt, nnkStmtList, nnkImportStmt, nnkFromStmt, + nnkIncludeStmt, nnkCommentStmt, nnkStmtListExpr, nnkBlockExpr, + nnkStmtListType, nnkBlockType, nnkTypeOfExpr, nnkObjectTy, + nnkTupleTy, nnkRecList, nnkRecCase, nnkRecWhen, + nnkRefTy, nnkPtrTy, nnkVarTy, nnkDistinctTy, + nnkProcTy, nnkEnumTy, nnkEnumFieldDef, nnkReturnToken + TNimNodeKinds* = set[TNimrodNodeKind] + TNimrodTypeKind* = enum + ntyNone, ntyBool, ntyChar, ntyEmpty, + ntyArrayConstr, ntyNil, ntyExpr, ntyStmt, + ntyTypeDesc, ntyGenericInvokation, ntyGenericBody, ntyGenericInst, + ntyGenericParam, ntyDistinct, ntyEnum, ntyOrdinal, + ntyArray, ntyObject, ntyTuple, ntySet, + ntyRange, ntyPtr, ntyRef, ntyVar, + ntySequence, ntyProc, ntyPointer, ntyOpenArray, + ntyString, ntyCString, ntyForward, ntyInt, + ntyInt8, ntyInt16, ntyInt32, ntyInt64, + ntyFloat, ntyFloat32, ntyFloat64, ntyFloat128 + TNimTypeKinds* = set[TNimrodTypeKind] + TNimrodSymKind* = enum + nskUnknown, nskConditional, nskDynLib, nskParam, + nskGenericParam, nskTemp, nskType, nskConst, + nskVar, nskProc, nskMethod, nskIterator, + nskConverter, nskMacro, nskTemplate, nskField, + nskEnumField, nskForVar, nskModule, nskLabel, + nskStub + TNimSymKinds* = set[TNimrodSymKind] +#[[[end]]] + +type + TNimrodIdent* = object of TObject + ## represents a Nimrod identifier in the AST + + TNimrodSymbol {.final.} = object # hidden + TNimrodType {.final.} = object # hidden + + PNimrodType* {.compilerproc.} = ref TNimrodType + ## represents a Nimrod type in the compiler; currently this is not very + ## useful as there is no API to deal with Nimrod types. + + PNimrodSymbol* {.compilerproc.} = ref TNimrodSymbol + ## represents a Nimrod *symbol* in the compiler; a *symbol* is a looked-up + ## *ident*. + + PNimrodNode* = expr + ## represents a Nimrod AST node. Macros operate on this type. + +# Nodes should be reference counted to make the `copy` operation very fast! +# However, this is difficult to achieve: modify(n[0][1]) should propagate to +# its father. How to do this without back references? + +proc `[]`* (n: PNimrodNode, i: int): PNimrodNode {.magic: "NChild".} + ## get `n`'s `i`'th child. + +proc `[]=`* (n: PNimrodNode, i: int, child: PNimrodNode) {.magic: "NSetChild".} + ## set `n`'s `i`'th child to `child`. + +proc `!` *(s: string): TNimrodIdent {.magic: "StrToIdent".} + ## constructs an identifier from the string `s` + +proc `$`*(i: TNimrodIdent): string {.magic: "IdentToStr".} + ## converts a Nimrod identifier to a string + +proc `==`* (a, b: TNimrodIdent): bool {.magic: "EqIdent", noSideEffect.} + ## compares two Nimrod identifiers + +proc `==`* (a, b: PNimrodNode): bool {.magic: "EqNimrodNode", noSideEffect.} + ## compares two Nimrod nodes + +proc len*(n: PNimrodNode): int {.magic: "NLen".} + ## returns the number of children of `n`. + +proc add*(father, child: PNimrodNode) {.magic: "NAdd".} + ## adds the `child` to the `father` node + +proc add*(father: PNimrodNode, children: openArray[PNimrodNode]) {. + magic: "NAddMultiple".} + ## adds each child of `children` to the `father` node + +proc del*(father: PNimrodNode, idx = 0, n = 1) {.magic: "NDel".} + ## deletes `n` children of `father` starting at index `idx`. + +proc kind*(n: PNimrodNode): TNimrodNodeKind {.magic: "NKind".} + ## returns the `kind` of the node `n`. + +proc intVal*(n: PNimrodNode): biggestInt {.magic: "NIntVal".} +proc floatVal*(n: PNimrodNode): biggestFloat {.magic: "NFloatVal".} +proc symbol*(n: PNimrodNode): PNimrodSymbol {.magic: "NSymbol".} +proc ident*(n: PNimrodNode): TNimrodIdent {.magic: "NIdent".} +proc typ*(n: PNimrodNode): PNimrodType {.magic: "NGetType".} +proc strVal*(n: PNimrodNode): string {.magic: "NStrVal".} + +proc `intVal=`*(n: PNimrodNode, val: biggestInt) {.magic: "NSetIntVal".} +proc `floatVal=`*(n: PNimrodNode, val: biggestFloat) {.magic: "NSetFloatVal".} +proc `symbol=`*(n: PNimrodNode, val: PNimrodSymbol) {.magic: "NSetSymbol".} +proc `ident=`*(n: PNimrodNode, val: TNimrodIdent) {.magic: "NSetIdent".} +proc `typ=`*(n: PNimrodNode, typ: PNimrodType) {.magic: "NSetType".} +proc `strVal=`*(n: PNimrodNode, val: string) {.magic: "NSetStrVal".} + +proc newNimNode*(kind: TNimrodNodeKind, + n: PNimrodNode=nil): PNimrodNode {.magic: "NNewNimNode".} + +proc copyNimNode*(n: PNimrodNode): PNimrodNode {.magic: "NCopyNimNode".} +proc copyNimTree*(n: PNimrodNode): PNimrodNode {.magic: "NCopyNimTree".} + +proc error*(msg: string) {.magic: "NError".} + ## writes an error message at compile time + +proc warning*(msg: string) {.magic: "NWarning".} + ## writes a warning message at compile time + +proc hint*(msg: string) {.magic: "NHint".} + ## writes a hint message at compile time + +proc newStrLitNode*(s: string): PNimrodNode {.compileTime.} = + ## creates a string literal node from `s` + result = newNimNode(nnkStrLit) + result.strVal = s + +proc newIntLitNode*(i: biggestInt): PNimrodNode {.compileTime.} = + ## creates a int literal node from `i` + result = newNimNode(nnkIntLit) + result.intVal = i + +proc newFloatLitNode*(f: biggestFloat): PNimrodNode {.compileTime.} = + ## creates a float literal node from `f` + result = newNimNode(nnkFloatLit) + result.floatVal = f + +proc newIdentNode*(i: TNimrodIdent): PNimrodNode {.compileTime.} = + ## creates an identifier node from `i` + result = newNimNode(nnkIdent) + result.ident = i + +proc newIdentNode*(i: string): PNimrodNode {.compileTime.} = + ## creates an identifier node from `i` + result = newNimNode(nnkIdent) + result.ident = !i + +proc toStrLit*(n: PNimrodNode): PNimrodNode {.compileTime.} = + ## converts the AST `n` to the concrete Nimrod code and wraps that + ## in a string literal node + return newStrLitNode(repr(n)) + +proc expectKind*(n: PNimrodNode, k: TNimrodNodeKind) {.compileTime.} = + ## checks that `n` is of kind `k`. If this is not the case, + ## compilation aborts with an error message. This is useful for writing + ## macros that check the AST that is passed to them. + if n.kind != k: error("macro expects a node of kind: " & repr(k)) + +proc expectMinLen*(n: PNimrodNode, min: int) {.compileTime.} = + ## checks that `n` has at least `min` children. If this is not the case, + ## compilation aborts with an error message. This is useful for writing + ## macros that check its number of arguments. + if n.len < min: error("macro expects a node with " & $min & " children") + +proc expectLen*(n: PNimrodNode, len: int) {.compileTime.} = + ## checks that `n` has exactly `len` children. If this is not the case, + ## compilation aborts with an error message. This is useful for writing + ## macros that check its number of arguments. + if n.len != len: error("macro expects a node with " & $len & " children") + +proc newCall*(theProc: TNimrodIdent, + args: openArray[PNimrodNode]): PNimrodNode {.compileTime.} = + ## produces a new call node. `theProc` is the proc that is called with + ## the arguments ``args[0..]``. + result = newNimNode(nnkCall) + result.add(newIdentNode(theProc)) + result.add(args) + +proc newCall*(theProc: string, + args: openArray[PNimrodNode]): PNimrodNode {.compileTime.} = + ## produces a new call node. `theProc` is the proc that is called with + ## the arguments ``args[0..]``. + result = newNimNode(nnkCall) + result.add(newIdentNode(theProc)) + result.add(args) + +proc nestList*(theProc: TNimrodIdent, + x: PNimrodNode): PNimrodNode {.compileTime.} = + ## nests the list `x` into a tree of call expressions: + ## ``[a, b, c]`` is transformed into ``theProc(a, theProc(c, d))`` + var L = x.len + result = newCall(theProc, x[L-2], x[L-1]) + var a = result + for i in countdown(L-3, 0): + a = newCall(theProc, x[i], copyNimTree(a)) + diff --git a/nimlib/pure/math.nim b/nimlib/pure/math.nim new file mode 100755 index 000000000..bca45894c --- /dev/null +++ b/nimlib/pure/math.nim @@ -0,0 +1,249 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Basic math routines for Nimrod. +## This module is available for the ECMAScript target. + +{.push debugger:off .} # the user does not want to trace a part + # of the standard library! + +{.push checks:off, line_dir:off, stack_trace:off.} + +when defined(Posix): + {.passl: "-lm".} + +const + PI* = 3.1415926535897932384626433 ## the circle constant PI (Ludolph's number) + E* = 2.71828182845904523536028747 ## Euler's number + +type + TFloatClass* = enum ## describes the class a floating point value belongs to. + ## This is the type that is returned by `classify`. + fcNormal, ## value is an ordinary nonzero floating point value + fcSubnormal, ## value is a subnormal (a very small) floating point value + fcZero, ## value is zero + fcNegZero, ## value is the negative zero + fcNan, ## value is Not-A-Number (NAN) + fcInf, ## value is positive infinity + fcNegInf ## value is negative infinity + +proc classify*(x: float): TFloatClass = + ## classifies a floating point value. Returns `x`'s class as specified by + ## `TFloatClass`. + + # ECMAScript and most C compilers have no classify: + if x == 0.0: + if 1.0/x == Inf: + return fcZero + else: + return fcNegZero + if x*0.5 == x: + if x > 0.0: return fcInf + else: return fcNegInf + if x != x: return fcNan + return fcNormal + # XXX: fcSubnormal is not detected! + + +proc binom*(n, k: int): int {.noSideEffect.} = + ## computes the binomial coefficient + if k <= 0: return 1 + if 2*k > n: return binom(n, n-k) + result = n + for i in countup(2, k): + result = (result * (n + 1 - i)) div i + +proc fac*(n: int): int {.noSideEffect.} = + ## computes the faculty function + result = 1 + for i in countup(2, n): + result = result * i + +proc isPowerOfTwo*(x: int): bool {.noSideEffect.} = + ## returns true, if x is a power of two, false otherwise. + ## Negative numbers are not a power of two. + return (x and -x) == x + +proc nextPowerOfTwo*(x: int): int = + ## returns the nearest power of two, so that + ## result**2 >= x > (result-1)**2. + result = x - 1 + when defined(cpu64): + result = result or (result shr 32) + result = result or (result shr 16) + result = result or (result shr 8) + result = result or (result shr 4) + result = result or (result shr 2) + result = result or (result shr 1) + Inc(result) + +proc countBits32*(n: int32): int {.noSideEffect.} = + ## counts the set bits in `n`. + var v = n + v = v -% ((v shr 1'i32) and 0x55555555'i32) + v = (v and 0x33333333'i32) +% ((v shr 2'i32) and 0x33333333'i32) + result = ((v +% (v shr 4'i32) and 0xF0F0F0F'i32) *% 0x1010101'i32) shr 24'i32 + +proc sum*[T](x: openarray[T]): T {.noSideEffect.} = + ## computes the sum of the elements in `x`. + ## If `x` is empty, 0 is returned. + for i in items(x): result = result + i + +proc mean*(x: openarray[float]): float {.noSideEffect.} = + ## computes the mean of the elements in `x`. + ## If `x` is empty, NaN is returned. + result = sum(x) / toFloat(len(x)) + +proc variance*(x: openarray[float]): float {.noSideEffect.} = + ## computes the mean of the elements in `x`. + ## If `x` is empty, NaN is returned. + result = 0.0 + var m = mean(x) + for i in 0 .. high(x): + var diff = x[i] - m + result = result + diff*diff + result = result / toFloat(len(x)) + +when not defined(ECMAScript): + proc random*(max: int): int + ## returns a random number in the range 0..max-1. The sequence of + ## random number is always the same, unless `randomize` is called + ## which initializes the random number generator with a "random" + ## number, i.e. a tickcount. + proc randomize*() + ## initializes the random number generator with a "random" + ## number, i.e. a tickcount. Note: Does nothing for the ECMAScript target, + ## as ECMAScript does not support this. + + proc sqrt*(x: float): float {.importc: "sqrt", header: "<math.h>".} + ## computes the square root of `x`. + + proc ln*(x: float): float {.importc: "log", header: "<math.h>".} + ## computes ln(x). + proc log10*(x: float): float {.importc: "log10", header: "<math.h>".} + proc log2*(x: float): float = return ln(x) / ln(2.0) + proc exp*(x: float): float {.importc: "exp", header: "<math.h>".} + ## computes e**x. + + proc frexp*(x: float, exponent: var int): float {. + importc: "frexp", header: "<math.h>".} + ## Split a number into mantissa and exponent. + ## `frexp` calculates the mantissa m (a float greater than or equal to 0.5 + ## and less than 1) and the integer value n such that `x` (the original + ## float value) equals m * 2**n. frexp stores n in `exponent` and returns + ## m. + + proc round*(x: float): int {.importc: "lrint", nodecl.} + ## converts a float to an int by rounding. + + proc arccos*(x: float): float {.importc: "acos", header: "<math.h>".} + proc arcsin*(x: float): float {.importc: "asin", header: "<math.h>".} + proc arctan*(x: float): float {.importc: "atan", header: "<math.h>".} + proc arctan2*(y, x: float): float {.importc: "atan2", header: "<math.h>".} + ## Calculate the arc tangent of `y` / `x`. + ## `atan2` returns the arc tangent of `y` / `x`; it produces correct + ## results even when the resulting angle is near pi/2 or -pi/2 + ## (`x` near 0). + + proc cos*(x: float): float {.importc: "cos", header: "<math.h>".} + proc cosh*(x: float): float {.importc: "cosh", header: "<math.h>".} + proc hypot*(x, y: float): float {.importc: "hypot", header: "<math.h>".} + ## same as ``sqrt(x*x + y*y)``. + + proc sinh*(x: float): float {.importc: "sinh", header: "<math.h>".} + proc tan*(x: float): float {.importc: "tan", header: "<math.h>".} + proc tanh*(x: float): float {.importc: "tanh", header: "<math.h>".} + proc pow*(x, y: float): float {.importc: "pow", header: "<math.h>".} + ## computes x to power raised of y. + + # C procs: + proc gettime(dummy: ptr cint): cint {.importc: "time", header: "<time.h>".} + proc srand(seed: cint) {.importc: "srand", nodecl.} + proc rand(): cint {.importc: "rand", nodecl.} + + proc randomize() = srand(gettime(nil)) + proc random(max: int): int = return int(rand()) mod max + +else: + proc mathrandom(): float {.importc: "Math.random", nodecl.} + proc mathfloor(x: float): float {.importc: "Math.floor", nodecl.} + proc random*(max: int): int = return mathfloor(mathrandom() * max) + proc randomize*() = nil + + proc sqrt*(x: float): float {.importc: "Math.sqrt", nodecl.} + proc ln*(x: float): float {.importc: "Math.log", nodecl.} + proc log10*(x: float): float = return ln(x) / ln(10.0) + proc log2*(x: float): float = return ln(x) / ln(2.0) + + proc exp*(x: float): float {.importc: "Math.exp", nodecl.} + proc round*(x: float): int {.importc: "Math.round", nodecl.} + proc pow*(x, y: float): float {.importc: "Math.pow", nodecl.} + + proc frexp*(x: float, exponent: var int): float = + if x == 0.0: + exponent = 0.0 + result = 0.0 + elif x < 0.0: + result = -frexp(-x, exponent) + else: + var ex = mathfloor(log2(x)) + exponent = round(ex) + result = x / pow(2.0, ex) + + proc arccos*(x: float): float {.importc: "Math.acos", nodecl.} + proc arcsin*(x: float): float {.importc: "Math.asin", nodecl.} + proc arctan*(x: float): float {.importc: "Math.atan", nodecl.} + proc arctan2*(y, x: float): float {.importc: "Math.atan2", nodecl.} + + proc cos*(x: float): float {.importc: "Math.cos", nodecl.} + proc cosh*(x: float): float = return (exp(x)+exp(-x))*0.5 + proc hypot*(x, y: float): float = return sqrt(x*x + y*y) + proc sinh*(x: float): float = return (exp(x)-exp(-x))*0.5 + proc tan*(x: float): float {.importc: "Math.tan", nodecl.} + proc tanh*(x: float): float = + var y = exp(2.0*x) + return (y-1.0)/(y+1.0) + + +type + TRunningStat* = object ## an accumulator for statistical data + n*: int ## number of pushed data + sum*, min*, max*, mean*: float ## self-explaining + oldM, oldS, newS: float + +proc push*(s: var TRunningStat, x: float) = + ## pushes a value `x` for processing + inc(s.n) + # See Knuth TAOCP vol 2, 3rd edition, page 232 + if s.n == 1: + s.oldM = x + s.mean = x + s.oldS = 0.0 + else: + s.mean = s.oldM + (x - s.oldM)/toFloat(s.n) + s.newS = s.oldS + (x - s.oldM)*(x - s.mean) + + # set up for next iteration: + s.oldM = s.mean + s.oldS = s.newS + + s.sum = s.sum + x + if s.min > x: s.min = x + if s.max < x: s.max = x + +proc variance*(s: TRunningStat): float = + ## computes the current variance of `s` + if s.n > 1: result = s.newS / (toFloat(s.n - 1)) + +proc standardDeviation*(s: TRunningStat): float = + ## computes the current standard deviation of `s` + result = sqrt(variance(s)) + +{.pop.} +{.pop.} diff --git a/nimlib/pure/md5.nim b/nimlib/pure/md5.nim new file mode 100755 index 000000000..d9bb92949 --- /dev/null +++ b/nimlib/pure/md5.nim @@ -0,0 +1,245 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Module for computing MD5 checksums. + +type + MD5State = array[0..3, int32] + MD5Block = array[0..15, int32] + MD5CBits = array[0..7, int8] + MD5Digest* = array[0..15, int8] + MD5Buffer = array[0..63, int8] + MD5Context* {.final.} = object + State: MD5State + Count: array[0..1, int32] + Buffer: MD5Buffer + +const + padding: cstring = "\x80\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0" + +proc F(x, y, z: int32): int32 {.inline.} = + Result = (x and y) or ((not x) and z) + +proc G(x, y, z: int32): int32 {.inline.} = + Result = (x and z) or (y and (not z)) + +proc H(x, y, z: int32): int32 {.inline.} = + Result = x xor y xor z + +proc I(x, y, z: int32): int32 {.inline.} = + Result = y xor (x or (not z)) + +proc rot(x: var int32, n: int8) {.inline.} = + x = toU32(x shl ze(n)) or (x shr toU32(32 -% ze(n))) + +proc FF(a: var int32, b, c, d, x: int32, s: int8, ac: int32) = + a = a +% F(b, c, d) +% x +% ac + rot(a, s) + a = a +% b + +proc GG(a: var int32, b, c, d, x: int32, s: int8, ac: int32) = + a = a +% G(b, c, d) +% x +% ac + rot(a, s) + a = a +% b + +proc HH(a: var int32, b, c, d, x: int32, s: int8, ac: int32) = + a = a +% H(b, c, d) +% x +% ac + rot(a, s) + a = a +% b + +proc II(a: var int32, b, c, d, x: int32, s: int8, ac: int32) = + a = a +% I(b, c, d) +% x +% ac + rot(a, s) + a = a +% b + +proc encode(dest: var MD5Block, src: cstring) = + var j = 0 + for i in 0..high(dest): + dest[i] = toU32(ord(src[j]) or + ord(src[j+1]) shl 8 or + ord(src[j+2]) shl 16 or + ord(src[j+3]) shl 24) + inc(j, 4) + +proc decode(dest: var openarray[int8], src: openarray[int32]) = + var i = 0 + for j in 0..high(src): + dest[i] = toU8(src[j] and 0xff'i32) + dest[i+1] = toU8(src[j] shr 8'i32 and 0xff'i32) + dest[i+2] = toU8(src[j] shr 16'i32 and 0xff'i32) + dest[i+3] = toU8(src[j] shr 24'i32 and 0xff'i32) + inc(i, 4) + +proc transform(Buffer: pointer, State: var MD5State) = + var + myBlock: MD5Block + encode(myBlock, cast[cstring](buffer)) + var a = State[0] + var b = State[1] + var c = State[2] + var d = State[3] + FF(a, b, c, d, myBlock[0], 7'i8, 0xD76AA478'i32) + FF(d, a, b, c, myBlock[1], 12'i8, 0xE8C7B756'i32) + FF(c, d, a, b, myBlock[2], 17'i8, 0x242070DB'i32) + FF(b, c, d, a, myBlock[3], 22'i8, 0xC1BDCEEE'i32) + FF(a, b, c, d, myBlock[4], 7'i8, 0xF57C0FAF'i32) + FF(d, a, b, c, myBlock[5], 12'i8, 0x4787C62A'i32) + FF(c, d, a, b, myBlock[6], 17'i8, 0xA8304613'i32) + FF(b, c, d, a, myBlock[7], 22'i8, 0xFD469501'i32) + FF(a, b, c, d, myBlock[8], 7'i8, 0x698098D8'i32) + FF(d, a, b, c, myBlock[9], 12'i8, 0x8B44F7AF'i32) + FF(c, d, a, b, myBlock[10], 17'i8, 0xFFFF5BB1'i32) + FF(b, c, d, a, myBlock[11], 22'i8, 0x895CD7BE'i32) + FF(a, b, c, d, myBlock[12], 7'i8, 0x6B901122'i32) + FF(d, a, b, c, myBlock[13], 12'i8, 0xFD987193'i32) + FF(c, d, a, b, myBlock[14], 17'i8, 0xA679438E'i32) + FF(b, c, d, a, myBlock[15], 22'i8, 0x49B40821'i32) + GG(a, b, c, d, myBlock[1], 5'i8, 0xF61E2562'i32) + GG(d, a, b, c, myBlock[6], 9'i8, 0xC040B340'i32) + GG(c, d, a, b, myBlock[11], 14'i8, 0x265E5A51'i32) + GG(b, c, d, a, myBlock[0], 20'i8, 0xE9B6C7AA'i32) + GG(a, b, c, d, myBlock[5], 5'i8, 0xD62F105D'i32) + GG(d, a, b, c, myBlock[10], 9'i8, 0x02441453'i32) + GG(c, d, a, b, myBlock[15], 14'i8, 0xD8A1E681'i32) + GG(b, c, d, a, myBlock[4], 20'i8, 0xE7D3FBC8'i32) + GG(a, b, c, d, myBlock[9], 5'i8, 0x21E1CDE6'i32) + GG(d, a, b, c, myBlock[14], 9'i8, 0xC33707D6'i32) + GG(c, d, a, b, myBlock[3], 14'i8, 0xF4D50D87'i32) + GG(b, c, d, a, myBlock[8], 20'i8, 0x455A14ED'i32) + GG(a, b, c, d, myBlock[13], 5'i8, 0xA9E3E905'i32) + GG(d, a, b, c, myBlock[2], 9'i8, 0xFCEFA3F8'i32) + GG(c, d, a, b, myBlock[7], 14'i8, 0x676F02D9'i32) + GG(b, c, d, a, myBlock[12], 20'i8, 0x8D2A4C8A'i32) + HH(a, b, c, d, myBlock[5], 4'i8, 0xFFFA3942'i32) + HH(d, a, b, c, myBlock[8], 11'i8, 0x8771F681'i32) + HH(c, d, a, b, myBlock[11], 16'i8, 0x6D9D6122'i32) + HH(b, c, d, a, myBlock[14], 23'i8, 0xFDE5380C'i32) + HH(a, b, c, d, myBlock[1], 4'i8, 0xA4BEEA44'i32) + HH(d, a, b, c, myBlock[4], 11'i8, 0x4BDECFA9'i32) + HH(c, d, a, b, myBlock[7], 16'i8, 0xF6BB4B60'i32) + HH(b, c, d, a, myBlock[10], 23'i8, 0xBEBFBC70'i32) + HH(a, b, c, d, myBlock[13], 4'i8, 0x289B7EC6'i32) + HH(d, a, b, c, myBlock[0], 11'i8, 0xEAA127FA'i32) + HH(c, d, a, b, myBlock[3], 16'i8, 0xD4EF3085'i32) + HH(b, c, d, a, myBlock[6], 23'i8, 0x04881D05'i32) + HH(a, b, c, d, myBlock[9], 4'i8, 0xD9D4D039'i32) + HH(d, a, b, c, myBlock[12], 11'i8, 0xE6DB99E5'i32) + HH(c, d, a, b, myBlock[15], 16'i8, 0x1FA27CF8'i32) + HH(b, c, d, a, myBlock[2], 23'i8, 0xC4AC5665'i32) + II(a, b, c, d, myBlock[0], 6'i8, 0xF4292244'i32) + II(d, a, b, c, myBlock[7], 10'i8, 0x432AFF97'i32) + II(c, d, a, b, myBlock[14], 15'i8, 0xAB9423A7'i32) + II(b, c, d, a, myBlock[5], 21'i8, 0xFC93A039'i32) + II(a, b, c, d, myBlock[12], 6'i8, 0x655B59C3'i32) + II(d, a, b, c, myBlock[3], 10'i8, 0x8F0CCC92'i32) + II(c, d, a, b, myBlock[10], 15'i8, 0xFFEFF47D'i32) + II(b, c, d, a, myBlock[1], 21'i8, 0x85845DD1'i32) + II(a, b, c, d, myBlock[8], 6'i8, 0x6FA87E4F'i32) + II(d, a, b, c, myBlock[15], 10'i8, 0xFE2CE6E0'i32) + II(c, d, a, b, myBlock[6], 15'i8, 0xA3014314'i32) + II(b, c, d, a, myBlock[13], 21'i8, 0x4E0811A1'i32) + II(a, b, c, d, myBlock[4], 6'i8, 0xF7537E82'i32) + II(d, a, b, c, myBlock[11], 10'i8, 0xBD3AF235'i32) + II(c, d, a, b, myBlock[2], 15'i8, 0x2AD7D2BB'i32) + II(b, c, d, a, myBlock[9], 21'i8, 0xEB86D391'i32) + State[0] = State[0] +% a + State[1] = State[1] +% b + State[2] = State[2] +% c + State[3] = State[3] +% d + +proc MD5Init*(c: var MD5Context) = + ## initializes a MD5Context + c.State[0] = 0x67452301'i32 + c.State[1] = 0xEFCDAB89'i32 + c.State[2] = 0x98BADCFE'i32 + c.State[3] = 0x10325476'i32 + c.Count[0] = 0'i32 + c.Count[1] = 0'i32 + ZeroMem(addr(c.Buffer), SizeOf(MD5Buffer)) + +proc MD5Update*(c: var MD5Context, input: cstring, len: int) = + ## updates the MD5Context with the `input` data of length `len` + var input = input + var Index = (c.Count[0] shr 3) and 0x3F + c.Count[0] = c.count[0] +% toU32(len shl 3) + if c.Count[0] < (len shl 3): c.Count[1] = c.count[1] +% 1'i32 + c.Count[1] = c.count[1] +% toU32(len shr 29) + var PartLen = 64 - Index + if len >= PartLen: + CopyMem(addr(c.Buffer[Index]), Input, PartLen) + transform(addr(c.Buffer), c.State) + var i = PartLen + while i + 63 < len: + Transform(addr(Input[I]), c.State) + inc(i, 64) + CopyMem(addr(c.Buffer[0]), addr(Input[i]), len-i) + else: + CopyMem(addr(c.Buffer[Index]), addr(Input[0]), len) + +proc MD5Final*(c: var MD5Context, digest: var MD5Digest) = + ## finishes the MD5Context and stores the result in `digest` + var + Bits: MD5CBits + PadLen: int + decode(bits, c.Count) + var Index = (c.Count[0] shr 3) and 0x3F + if Index < 56: PadLen = 56 - Index + else: PadLen = 120 - Index + MD5Update(c, padding, PadLen) + MD5Update(c, cast[cstring](addr(Bits)), 8) + decode(digest, c.State) + ZeroMem(addr(c), SizeOf(MD5Context)) + +proc toMD5*(s: string): MD5Digest = + ## computes the MD5Digest value for a string `s` + var c: MD5Context + MD5Init(c) + MD5Update(c, cstring(s), len(s)) + MD5Final(c, result) + +proc `$`*(D: MD5Digest): string = + ## converts a MD5Digest value into its string representation + const digits = "0123456789abcdef" + result = "" + for i in 0..15: + add(result, Digits[(D[I] shr 4) and 0xF]) + add(result, Digits[D[I] and 0xF]) + +proc getMD5*(s: string): string = + ## computes an MD5 value of `s` and returns its string representation + var + c: MD5Context + d: MD5Digest + MD5Init(c) + MD5Update(c, cstring(s), len(s)) + MD5Final(c, d) + result = $d + +proc `==`*(D1, D2: MD5Digest): bool = + ## checks if two MD5Digest values are identical + for i in 0..15: + if D1[i] != D2[i]: return false + return true + +when isMainModule: + assert(getMD5("Franz jagt im komplett verwahrlosten Taxi quer durch Bayern") == + "a3cca2b2aa1e3b5b3b5aad99a8529074") + assert(getMD5("Frank jagt im komplett verwahrlosten Taxi quer durch Bayern") == + "7e716d0e702df0505fc72e2b89467910") + assert($toMD5("") == "d41d8cd98f00b204e9800998ecf8427e") + + diff --git a/nimlib/pure/os.nim b/nimlib/pure/os.nim new file mode 100755 index 000000000..afa145e9f --- /dev/null +++ b/nimlib/pure/os.nim @@ -0,0 +1,1147 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module contains basic operating system facilities like +## retrieving environment variables, reading command line arguments, +## working with directories, running shell commands, etc. +{.deadCodeElim: on.} + +{.push debugger: off.} + +import + strutils, times + +when defined(windows): + import winlean +elif defined(posix): + import posix +else: + {.error: "OS module not ported to your operating system!".} + +include "system/ansi_c" + +# copied from excpt.nim, because I don't want to make this template public +template newException(exceptn, message: expr): expr = + block: # open a new scope + var + e: ref exceptn + new(e) + e.msg = message + e + +const + doslike = defined(windows) or defined(OS2) or defined(DOS) + # DOS-like filesystem + +when defined(Nimdoc): # only for proper documentation: + const + CurDir* = '.' + ## The constant string used by the operating system to refer to the + ## current directory. + ## + ## For example: '.' for POSIX or ':' for the classic Macintosh. + + ParDir* = ".." + ## The constant string used by the operating system to refer to the parent + ## directory. + ## + ## For example: ".." for POSIX or "::" for the classic Macintosh. + + DirSep* = '/' + ## The character used by the operating system to separate pathname + ## components, for example, '/' for POSIX or ':' for the classic + ## Macintosh. + + AltSep* = '/' + ## An alternative character used by the operating system to separate + ## pathname components, or the same as `DirSep` if only one separator + ## character exists. This is set to '/' on Windows systems where `DirSep` + ## is a backslash. + + PathSep* = ':' + ## The character conventionally used by the operating system to separate + ## search patch components (as in PATH), such as ':' for POSIX or ';' for + ## Windows. + + FileSystemCaseSensitive* = True + ## True if the file system is case sensitive, false otherwise. Used by + ## `cmpPaths` to compare filenames properly. + + ExeExt* = "" + ## The file extension of native executables. For example: + ## "" for POSIX, "exe" on Windows. + + ScriptExt* = "" + ## The file extension of a script file. For example: "" for POSIX, + ## "bat" on Windows. + +elif defined(macos): + const + curdir* = ':' + pardir* = "::" + dirsep* = ':' + altsep* = dirsep + pathsep* = ',' + FileSystemCaseSensitive* = false + ExeExt* = "" + ScriptExt* = "" + + # MacOS paths + # =========== + # MacOS directory separator is a colon ":" which is the only character not + # allowed in filenames. + # + # A path containing no colon or which begins with a colon is a partial path. + # E.g. ":kalle:petter" ":kalle" "kalle" + # + # All other paths are full (absolute) paths. E.g. "HD:kalle:" "HD:" + # When generating paths, one is safe if one ensures that all partial paths + # begin with a colon, and all full paths end with a colon. + # In full paths the first name (e g HD above) is the name of a mounted + # volume. + # These names are not unique, because, for instance, two diskettes with the + # same names could be inserted. This means that paths on MacOS are not + # waterproof. In case of equal names the first volume found will do. + # Two colons "::" are the relative path to the parent. Three is to the + # grandparent etc. +elif doslike: + const + curdir* = '.' + pardir* = ".." + dirsep* = '\\' # seperator within paths + altsep* = '/' + pathSep* = ';' # seperator between paths + FileSystemCaseSensitive* = false + ExeExt* = "exe" + ScriptExt* = "bat" +elif defined(PalmOS) or defined(MorphOS): + const + dirsep* = '/' + altsep* = dirsep + PathSep* = ';' + pardir* = ".." + FileSystemCaseSensitive* = false + ExeExt* = "" + ScriptExt* = "" +elif defined(RISCOS): + const + dirsep* = '.' + altsep* = '.' + pardir* = ".." # is this correct? + pathSep* = ',' + FileSystemCaseSensitive* = true + ExeExt* = "" + ScriptExt* = "" +else: # UNIX-like operating system + const + curdir* = '.' + pardir* = ".." + dirsep* = '/' + altsep* = dirsep + pathSep* = ':' + FileSystemCaseSensitive* = true + ExeExt* = "" + ScriptExt* = "" + +const + ExtSep* = '.' + ## The character which separates the base filename from the extension; + ## for example, the '.' in ``os.nim``. + +# procs dealing with command line arguments: +proc paramCount*(): int + ## Returns the number of command line arguments given to the + ## application. + +proc paramStr*(i: int): string + ## Returns the `i`-th command line arguments given to the + ## application. + ## + ## `i` should be in the range `1..paramCount()`, else + ## the `EOutOfIndex` exception is raised. + +proc OSError*(msg: string = "") {.noinline.} = + ## raises an EOS exception with the given message ``msg``. + ## If ``msg == ""``, the operating system's error flag + ## (``errno``) is converted to a readable error message. On Windows + ## ``GetLastError`` is checked before ``errno``. + ## If no error flag is set, the message ``unknown OS error`` is used. + if len(msg) == 0: + when defined(Windows): + var err = GetLastError() + if err != 0'i32: + # sigh, why is this is so difficult? + var msgbuf: cstring + if FormatMessageA(0x00000100 or 0x00001000 or 0x00000200, + nil, err, 0, addr(msgbuf), 0, nil) != 0'i32: + var m = $msgbuf + if msgbuf != nil: + LocalFree(msgbuf) + raise newException(EOS, m) + if errno != 0'i32: + raise newException(EOS, $os.strerror(errno)) + else: + raise newException(EOS, "unknown OS error") + else: + raise newException(EOS, msg) + +proc UnixToNativePath*(path: string): string {.noSideEffect.} = + ## Converts an UNIX-like path to a native one. + ## + ## On an UNIX system this does nothing. Else it converts + ## '/', '.', '..' to the appropriate things. + when defined(unix): + result = path + else: + var start: int + if path[0] == '/': + # an absolute path + when doslike: + result = r"C:\" + elif defined(macos): + result = "" # must not start with ':' + else: + result = $dirSep + start = 1 + elif path[0] == '.' and path[1] == '/': + # current directory + result = $curdir + start = 2 + else: + result = "" + start = 0 + + var i = start + while i < len(path): # ../../../ --> :::: + if path[i] == '.' and path[i+1] == '.' and path[i+2] == '/': + # parent directory + when defined(macos): + if result[high(result)] == ':': + add result, ':' + else: + add result, pardir + else: + add result, pardir & dirSep + inc(i, 3) + elif path[i] == '/': + add result, dirSep + inc(i) + else: + add result, path[i] + inc(i) + +proc existsFile*(filename: string): bool = + ## Returns true if the file exists, false otherwise. + when defined(windows): + var a = GetFileAttributesA(filename) + if a != -1'i32: + result = (a and FILE_ATTRIBUTE_DIRECTORY) == 0'i32 + else: + var res: TStat + return stat(filename, res) >= 0'i32 and S_ISREG(res.st_mode) + +proc existsDir*(dir: string): bool = + ## Returns true iff the directory `dir` exists. If `dir` is a file, false + ## is returned. + when defined(windows): + var a = GetFileAttributesA(dir) + if a != -1'i32: + result = (a and FILE_ATTRIBUTE_DIRECTORY) != 0'i32 + else: + var res: TStat + return stat(dir, res) >= 0'i32 and S_ISDIR(res.st_mode) + +proc getLastModificationTime*(file: string): TTime = + ## Returns the `file`'s last modification time. + when defined(posix): + var res: TStat + if stat(file, res) < 0'i32: OSError() + return res.st_mtime + else: + var f: TWIN32_Find_Data + var h = findfirstFileA(file, f) + if h == -1'i32: OSError() + result = winTimeToUnixTime(rdFileTime(f.ftLastWriteTime)) + findclose(h) + +proc getLastAccessTime*(file: string): TTime = + ## Returns the `file`'s last read or write access time. + when defined(posix): + var res: TStat + if stat(file, res) < 0'i32: OSError() + return res.st_atime + else: + var f: TWIN32_Find_Data + var h = findfirstFileA(file, f) + if h == -1'i32: OSError() + result = winTimeToUnixTime(rdFileTime(f.ftLastAccessTime)) + findclose(h) + +proc getCreationTime*(file: string): TTime = + ## Returns the `file`'s creation time. + when defined(posix): + var res: TStat + if stat(file, res) < 0'i32: OSError() + return res.st_ctime + else: + var f: TWIN32_Find_Data + var h = findfirstFileA(file, f) + if h == -1'i32: OSError() + result = winTimeToUnixTime(rdFileTime(f.ftCreationTime)) + findclose(h) + +proc fileNewer*(a, b: string): bool = + ## Returns true if the file `a` is newer than file `b`, i.e. if `a`'s + ## modification time is later than `b`'s. + result = getLastModificationTime(a) - getLastModificationTime(b) > 0 + +proc getCurrentDir*(): string = + ## Returns the current working directory. + const bufsize = 512 # should be enough + result = newString(bufsize) + when defined(windows): + var L = GetCurrentDirectoryA(bufsize, result) + if L == 0'i32: OSError() + setLen(result, L) + else: + if getcwd(result, bufsize) != nil: + setlen(result, c_strlen(result)) + else: + OSError() + +proc setCurrentDir*(newDir: string) {.inline.} = + ## Sets the current working directory; `EOS` is raised if + ## `newDir` cannot been set. + when defined(Windows): + if SetCurrentDirectoryA(newDir) == 0'i32: OSError() + else: + if chdir(newDir) != 0'i32: OSError() + +proc JoinPath*(head, tail: string): string {.noSideEffect.} = + ## Joins two directory names to one. + ## + ## For example on Unix: + ## + ## ..code-block:: nimrod + ## JoinPath("usr", "lib") + ## + ## results in: + ## + ## ..code-block:: nimrod + ## "usr/lib" + ## + ## If head is the empty string, tail is returned. + ## If tail is the empty string, head is returned. + if len(head) == 0: + result = tail + elif head[len(head)-1] in {DirSep, AltSep}: + if tail[0] in {DirSep, AltSep}: + result = head & copy(tail, 1) + else: + result = head & tail + else: + if tail[0] in {DirSep, AltSep}: + result = head & tail + else: + result = head & DirSep & tail + +proc JoinPath*(parts: openarray[string]): string {.noSideEffect.} = + ## The same as `JoinPath(head, tail)`, but works with any number + ## of directory parts. + result = parts[0] + for i in 1..high(parts): + result = JoinPath(result, parts[i]) + +proc `/` * (head, tail: string): string {.noSideEffect.} = + ## The same as ``joinPath(head, tail)`` + return joinPath(head, tail) + +proc SplitPath*(path: string, head, tail: var string) {.noSideEffect, + deprecated.} = + ## **Deprecated since version 0.8.2**: use the version that returns a tuple + ## instead + var + sepPos = -1 + for i in countdown(len(path)-1, 0): + if path[i] in {dirsep, altsep}: + sepPos = i + break + if sepPos >= 0: + head = copy(path, 0, sepPos-1) + tail = copy(path, sepPos+1) + else: + head = "" + tail = path # make a string copy here + +proc SplitPath*(path: string): tuple[head, tail: string] {.noSideEffect.} = + ## Splits a directory into (head, tail), so that + ## ``JoinPath(head, tail) == path``. + ## + ## Examples: + ## .. code-block:: nimrod + ## SplitPath("usr/local/bin") -> ("usr/local", "bin") + ## SplitPath("usr/local/bin/") -> ("usr/local/bin", "") + ## SplitPath("bin") -> ("", "bin") + ## SplitPath("/bin") -> ("", "bin") + ## SplitPath("") -> ("", "") + var + sepPos = -1 + for i in countdown(len(path)-1, 0): + if path[i] in {dirsep, altsep}: + sepPos = i + break + if sepPos >= 0: + result.head = copy(path, 0, sepPos-1) + result.tail = copy(path, sepPos+1) + else: + result.head = "" + result.tail = path + +proc parentDir*(path: string): string {.noSideEffect.} = + ## Returns the parent directory of `path`. + ## + ## This is often the same as the ``head`` result of ``splitPath``. + ## If there is no parent, ``path`` is returned. + ## Example: ``parentDir("/usr/local/bin") == "/usr/local"``. + ## Example: ``parentDir("/usr/local/bin/") == "/usr/local"``. + var + sepPos = -1 + q = 1 + if path[len(path)-1] in {dirsep, altsep}: + q = 2 + for i in countdown(len(path)-q, 0): + if path[i] in {dirsep, altsep}: + sepPos = i + break + if sepPos >= 0: + result = copy(path, 0, sepPos-1) + else: + result = path + +proc `/../` * (head, tail: string): string {.noSideEffect.} = + ## The same as ``parentDir(head) / tail`` + return parentDir(head) / tail + +proc normExt(ext: string): string = + if ext == "" or ext[0] == extSep: result = ext # no copy needed here + else: result = extSep & ext + +proc searchExtPos(s: string): int = + # BUGFIX: do not search until 0! .DS_Store is no file extension! + result = -1 + for i in countdown(len(s)-1, 1): + if s[i] == extsep: + result = i + break + elif s[i] in {dirsep, altsep}: + break # do not skip over path + +proc splitFile*(path: string): tuple[dir, name, ext: string] {.noSideEffect.} = + ## Splits a filename into (dir, filename, extension). + ## `dir` does not end in `DirSep`. + ## `extension` includes the leading dot. + ## + ## Example: + ## + ## .. code-block:: nimrod + ## var (dir, name, ext) = splitFile("usr/local/nimrodc.html") + ## assert dir == "usr/local" + ## assert name == "nimrodc" + ## assert ext == ".html" + ## + ## If `path` has no extension, `ext` is the empty string. + ## If `path` has no directory component, `dir` is the empty string. + ## If `path` has no filename component, `name` and `ext` are empty strings. + if path.len == 0 or path[path.len-1] in {dirSep, altSep}: + result = (path, "", "") + else: + var sepPos = -1 + var dotPos = path.len + for i in countdown(len(path)-1, 0): + if path[i] == ExtSep: + if dotPos == path.len and i > 0: dotPos = i + elif path[i] in {dirsep, altsep}: + sepPos = i + break + result.dir = copy(path, 0, sepPos-1) + result.name = copy(path, sepPos+1, dotPos-1) + result.ext = copy(path, dotPos) + +proc extractDir*(path: string): string {.noSideEffect, deprecated.} = + ## Extracts the directory of a given path. This is almost the + ## same as the `head` result of `splitPath`, except that + ## ``extractDir("/usr/lib/") == "/usr/lib/"``. + ## **Deprecated since version 0.8.2**: Use ``splitFile(path).dir`` instead. + result = splitFile(path).dir + +proc extractFilename*(path: string): string {.noSideEffect.} = + ## Extracts the filename of a given `path`. This is the same as + ## ``name & ext`` from ``splitFile(path)``. + if path.len == 0 or path[path.len-1] in {dirSep, altSep}: + result = "" + else: + result = splitPath(path).tail + +proc expandFilename*(filename: string): string = + ## Returns the full path of `filename`, raises EOS in case of an error. + when defined(windows): + var unused: cstring + result = newString(3072) + var L = GetFullPathNameA(filename, 3072'i32, result, unused) + if L <= 0'i32 or L >= 3072'i32: OSError() + setLen(result, L) + else: + var res = realpath(filename, nil) + if res == nil: OSError() + result = $res + c_free(res) + +proc SplitFilename*(filename: string, name, extension: var string) {. + noSideEffect, deprecated.} = + ## Splits a filename into (name, extension), so that + ## ``name & extension == filename``. + ## + ## Example: After ``SplitFilename("usr/local/nimrodc.html", name, ext)``, + ## `name` is "usr/local/nimrodc" and `ext` is ".html". + ## If the file has no extension, extension is the empty string. + ## **Deprecated since version 0.8.2**: Use ``splitFile(filename)`` instead. + var extPos = searchExtPos(filename) + if extPos >= 0: + name = copy(filename, 0, extPos-1) + extension = copy(filename, extPos) + else: + name = filename # make a string copy here + extension = "" + +proc extractFileExt*(filename: string): string {.noSideEffect, deprecated.} = + ## Extracts the file extension of a given `filename`. This is the + ## same as the `extension` result of `splitFilename`. + ## **Deprecated since version 0.8.2**: Use ``splitFile(filename).ext`` + ## instead. + result = splitFile(filename).ext + +proc extractFileTrunk*(filename: string): string {.noSideEffect, deprecated.} = + ## Extracts the file name of a given `filename`. This removes any + ## directory information and the file extension. + ## **Deprecated since version 0.8.2**: Use ``splitFile(path).name`` instead. + result = splitFile(filename).name + +proc ChangeFileExt*(filename, ext: string): string {.noSideEffect.} = + ## Changes the file extension to `ext`. + ## + ## If the `filename` has no extension, `ext` will be added. + ## If `ext` == "" then any extension is removed. + ## `Ext` should be given without the leading '.', because some + ## filesystems may use a different character. (Although I know + ## of none such beast.) + var extPos = searchExtPos(filename) + if extPos < 0: result = filename & normExt(ext) + else: result = copy(filename, 0, extPos-1) & normExt(ext) + +proc addFileExt*(filename, ext: string): string {.noSideEffect.} = + ## Adds the file extension `ext` to `filename`, unless + ## `filename` already has an extension. + ## + ## `Ext` should be given without the leading '.', because some + ## filesystems may use a different character. + ## (Although I know of none such beast.) + var extPos = searchExtPos(filename) + if extPos < 0: result = filename & normExt(ext) + else: result = filename + +proc AppendFileExt*(filename, ext: string): string {. + noSideEffect, deprecated.} = + ## **Deprecated since version 0.8.2**: Use `addFileExt` instead. + result = addFileExt(filename, ext) + +proc cmpPaths*(pathA, pathB: string): int {.noSideEffect.} = + ## Compares two paths. + ## + ## On a case-sensitive filesystem this is done + ## case-sensitively otherwise case-insensitively. Returns: + ## + ## | 0 iff pathA == pathB + ## | < 0 iff pathA < pathB + ## | > 0 iff pathA > pathB + if FileSystemCaseSensitive: + result = cmp(pathA, pathB) + else: + result = cmpIgnoreCase(pathA, pathB) + +proc sameFile*(path1, path2: string): bool = + ## Returns True if both pathname arguments refer to the same file or + ## directory (as indicated by device number and i-node number). + ## Raises an exception if an os.stat() call on either pathname fails. + when defined(Windows): + var + a, b: TWin32FindData + var resA = findfirstFileA(path1, a) + var resB = findfirstFileA(path2, b) + if resA != -1 and resB != -1: + result = $a.cFileName == $b.cFileName + else: + # work around some ``findfirstFileA`` bugs + result = cmpPaths(path1, path2) == 0 + if resA != -1: findclose(resA) + if resB != -1: findclose(resB) + else: + var + a, b: TStat + if stat(path1, a) < 0'i32 or stat(path2, b) < 0'i32: + result = cmpPaths(path1, path2) == 0 # be consistent with Windows + else: + result = a.st_dev == b.st_dev and a.st_ino == b.st_ino + +proc sameFileContent*(path1, path2: string): bool = + ## Returns True if both pathname arguments refer to files with identical + ## binary content. + const + bufSize = 8192 # 8K buffer + var + a, b: TFile + if not open(a, path1): return false + if not open(b, path2): + close(a) + return false + var bufA = alloc(bufsize) + var bufB = alloc(bufsize) + while True: + var readA = readBuffer(a, bufA, bufsize) + var readB = readBuffer(b, bufB, bufsize) + if readA != readB: + result = false + break + if readA == 0: + result = true + break + result = equalMem(bufA, bufB, readA) + if not result: break + if readA != bufSize: break # end of file + dealloc(bufA) + dealloc(bufB) + close(a) + close(b) + +proc copyFile*(dest, source: string) = + ## Copies a file from `source` to `dest`. If this fails, + ## `EOS` is raised. + when defined(Windows): + if CopyFileA(source, dest, 0'i32) == 0'i32: OSError() + else: + # generic version of copyFile which works for any platform: + const + bufSize = 8192 # 8K buffer + var + d, s: TFile + if not open(s, source): OSError() + if not open(d, dest, fmWrite): + close(s) + OSError() + var + buf: Pointer = alloc(bufsize) + bytesread, byteswritten: int + while True: + bytesread = readBuffer(s, buf, bufsize) + byteswritten = writeBuffer(d, buf, bytesread) + if bytesread != bufSize: break + if bytesread != bytesWritten: OSError() + dealloc(buf) + close(s) + close(d) + +proc moveFile*(dest, source: string) = + ## Moves a file from `source` to `dest`. If this fails, `EOS` is raised. + if crename(source, dest) != 0'i32: OSError() + +proc removeFile*(file: string) = + ## Removes the `file`. If this fails, `EOS` is raised. + if cremove(file) != 0'i32: OSError() + +proc executeShellCommand*(command: string): int {.deprecated.} = + ## **Deprecated since version 0.8.2**: Use `execShellCmd` instead. + result = csystem(command) + +proc execShellCmd*(command: string): int = + ## Executes a shell command. + ## + ## Command has the form 'program args' where args are the command + ## line arguments given to program. The proc returns the error code + ## of the shell when it has finished. The proc does not return until + ## the process has finished. To execute a program without having a + ## shell involved, use the `execProcess` proc of the `osproc` + ## module. + result = csystem(command) + +var + envComputed: bool = false + environment: seq[string] = @[] + +when defined(windows): + # because we support Windows GUI applications, things get really + # messy here... + proc strEnd(cstr: CString, c = 0'i32): CString {. + importc: "strchr", header: "<string.h>".} + + proc getEnvVarsC() = + if not envComputed: + var + env = getEnvironmentStringsA() + e = env + if e == nil: return # an error occured + while True: + var eend = strEnd(e) + add(environment, $e) + e = cast[CString](cast[TAddress](eend)+1) + if eend[1] == '\0': break + envComputed = true + discard FreeEnvironmentStringsA(env) + +else: + var + gEnv {.importc: "gEnv".}: ptr array [0..10_000, CString] + + proc getEnvVarsC() = + # retrieves the variables of char** env of C's main proc + if not envComputed: + var i = 0 + while True: + if gEnv[i] == nil: break + add environment, $gEnv[i] + inc(i) + envComputed = true + +proc findEnvVar(key: string): int = + getEnvVarsC() + var temp = key & '=' + for i in 0..high(environment): + if startsWith(environment[i], temp): return i + return -1 + +proc getEnv*(key: string): string = + ## Returns the value of the environment variable named `key`. + ## + ## If the variable does not exist, "" is returned. To distinguish + ## whether a variable exists or it's value is just "", call + ## `existsEnv(key)`. + var i = findEnvVar(key) + if i >= 0: + return copy(environment[i], find(environment[i], '=')+1) + else: + var env = cgetenv(key) + if env == nil: return "" + result = $env + +proc existsEnv*(key: string): bool = + ## Checks whether the environment variable named `key` exists. + ## Returns true if it exists, false otherwise. + if cgetenv(key) != nil: return true + else: return findEnvVar(key) >= 0 + +proc putEnv*(key, val: string) = + ## Sets the value of the environment variable named `key` to `val`. + ## If an error occurs, `EInvalidEnvVar` is raised. + + # Note: by storing the string in the environment sequence, + # we gurantee that we don't free the memory before the program + # ends (this is needed for POSIX compliance). It is also needed so that + # the process itself may access its modified environment variables! + var indx = findEnvVar(key) + if indx >= 0: + environment[indx] = key & '=' & val + else: + add environment, (key & '=' & val) + indx = high(environment) + when defined(unix): + if cputenv(environment[indx]) != 0'i32: + OSError() + else: + if SetEnvironmentVariableA(key, val) == 0'i32: + OSError() + +iterator iterOverEnvironment*(): tuple[key, value: string] {.deprecated.} = + ## Iterate over all environments variables. In the first component of the + ## tuple is the name of the current variable stored, in the second its value. + ## **Deprecated since version 0.8.2**: Use `envPairs` instead. + getEnvVarsC() + for i in 0..high(environment): + var p = find(environment[i], '=') + yield (copy(environment[i], 0, p-1), copy(environment[i], p+1)) + +iterator envPairs*(): tuple[key, value: string] = + ## Iterate over all environments variables. In the first component of the + ## tuple is the name of the current variable stored, in the second its value. + getEnvVarsC() + for i in 0..high(environment): + var p = find(environment[i], '=') + yield (copy(environment[i], 0, p-1), copy(environment[i], p+1)) + +iterator walkFiles*(pattern: string): string = + ## Iterate over all the files that match the `pattern`. + ## + ## `pattern` is OS dependant, but at least the "\*.ext" + ## notation is supported. + when defined(windows): + var + f: TWin32FindData + res: int + res = findfirstFileA(pattern, f) + if res != -1: + while true: + if f.cFileName[0] != '.': + yield splitFile(pattern).dir / extractFilename($f.cFileName) + if findnextFileA(res, f) == 0'i32: break + findclose(res) + else: # here we use glob + var + f: TGlob + res: int + f.gl_offs = 0 + f.gl_pathc = 0 + f.gl_pathv = nil + res = glob(pattern, 0, nil, addr(f)) + if res == 0: + for i in 0.. f.gl_pathc - 1: + assert(f.gl_pathv[i] != nil) + yield $f.gl_pathv[i] + globfree(addr(f)) + +type + TPathComponent* = enum ## Enumeration specifying a path component. + pcFile, ## path refers to a file + pcLinkToFile, ## path refers to a symbolic link to a file + pcDir, ## path refers to a directory + pcLinkToDir ## path refers to a symbolic link to a directory + +const + pcDirectory* {.deprecated.} = pcDir ## deprecated alias + pcLinkToDirectory* {.deprecated.} = pcLinkToDir ## deprecated alias + +iterator walkDir*(dir: string): tuple[kind: TPathComponent, path: string] = + ## walks over the directory `dir` and yields for each directory or file in + ## `dir`. The component type and full path for each item is returned. + ## Walking is not recursive. + ## Example: This directory structure:: + ## dirA / dirB / fileB1.txt + ## / dirC + ## / fileA1.txt + ## / fileA2.txt + ## + ## and this code: + ## + ## .. code-block:: Nimrod + ## for kind, path in walkDir("dirA"): + ## echo(path) + ## + ## produces this output (though not necessarily in this order!):: + ## dirA/dirB + ## dirA/dirC + ## dirA/fileA1.txt + ## dirA/fileA2.txt + when defined(windows): + var f: TWIN32_Find_Data + var h = findfirstFileA(dir / "*", f) + if h != -1: + while true: + var k = pcFile + if f.cFilename[0] != '.': + if (f.dwFileAttributes and FILE_ATTRIBUTE_DIRECTORY) != 0'i32: + k = pcDir + yield (k, dir / extractFilename($f.cFilename)) + if findnextFileA(h, f) == 0'i32: break + findclose(h) + else: + var d = openDir(dir) + if d != nil: + while true: + var x = readDir(d) + if x == nil: break + var y = $x.d_name + if y != "." and y != "..": + var s: TStat + y = dir / y + if stat(y, s) < 0'i32: break + var k = pcFile + if S_ISDIR(s.st_mode): k = pcDir + if S_ISLNK(s.st_mode): k = succ(k) + yield (k, y) + discard closeDir(d) + +iterator walkDirRec*(dir: string, filter={pcFile, pcDir}): string = + ## walks over the directory `dir` and yields for each file in `dir`. The + ## full path for each file is returned. + ## Walking is recursive. `filter` controls the behaviour of the iterator: + ## + ## --------------------- --------------------------------------------- + ## filter meaning + ## --------------------- --------------------------------------------- + ## ``pcFile`` yield real files + ## ``pcLinkToFile`` yield symbolic links to files + ## ``pcDir`` follow real directories + ## ``pcLinkToDir`` follow symbolic links to directories + ## --------------------- --------------------------------------------- + ## + var stack = @[dir] + while stack.len > 0: + for k,p in walkDir(stack.pop()): + if k in filter: + case k + of pcFile, pcLinkToFile: yield p + of pcDir, pcLinkToDir: stack.add(p) + +proc rawRemoveDir(dir: string) = + when defined(windows): + if RemoveDirectoryA(dir) == 0'i32: OSError() + else: + if rmdir(dir) != 0'i32: OSError() + +proc removeDir*(dir: string) = + ## Removes the directory `dir` including all subdirectories and files + ## in `dir` (recursively). If this fails, `EOS` is raised. + for kind, path in walkDir(dir): + case kind + of pcFile, pcLinkToFile, pcLinkToDir: removeFile(path) + of pcDir: removeDir(path) + rawRemoveDir(dir) + +proc rawCreateDir(dir: string) = + when defined(unix): + if mkdir(dir, 0o711) != 0'i32 and errno != EEXIST: + OSError() + else: + if CreateDirectoryA(dir, nil) == 0'i32 and GetLastError() != 183'i32: + OSError() + +proc createDir*(dir: string) = + ## Creates the directory `dir`. + ## + ## The directory may contain several subdirectories that do not exist yet. + ## The full path is created. If this fails, `EOS` is raised. It does **not** + ## fail if the path already exists because for most usages this does not + ## indicate an error. + for i in 1.. dir.len-1: + if dir[i] in {dirsep, altsep}: rawCreateDir(copy(dir, 0, i-1)) + rawCreateDir(dir) + +proc parseCmdLine*(c: string): seq[string] = + ## Splits a command line into several components; components are separated by + ## whitespace unless the whitespace occurs within ``"`` or ``'`` quotes. + ## This proc is only occassionally useful, better use the `parseopt` module. + result = @[] + var i = 0 + var a = "" + while true: + setLen(a, 0) + while c[i] >= '\1' and c[i] <= ' ': inc(i) # skip whitespace + case c[i] + of '\'', '\"': + var delim = c[i] + inc(i) # skip ' or " + while c[i] != '\0' and c[i] != delim: + add a, c[i] + inc(i) + if c[i] != '\0': inc(i) + of '\0': break + else: + while c[i] > ' ': + add(a, c[i]) + inc(i) + add(result, a) + +type + TFilePermission* = enum ## file access permission; modelled after UNIX + fpUserExec, ## execute access for the file owner + fpUserWrite, ## write access for the file owner + fpUserRead, ## read access for the file owner + fpGroupExec, ## execute access for the group + fpGroupWrite, ## write access for the group + fpGroupRead, ## read access for the group + fpOthersExec, ## execute access for others + fpOthersWrite, ## write access for others + fpOthersRead ## read access for others + +proc getFilePermissions*(filename: string): set[TFilePermission] = + ## retrieves file permissions for `filename`. `OSError` is raised in case of + ## an error. On Windows, only the ``readonly`` flag is checked, every other + ## permission is available in any case. + when defined(posix): + var a: TStat + if stat(filename, a) < 0'i32: OSError() + result = {} + if (a.st_mode and S_IRUSR) != 0'i32: result.incl(fpUserRead) + if (a.st_mode and S_IWUSR) != 0'i32: result.incl(fpUserWrite) + if (a.st_mode and S_IXUSR) != 0'i32: result.incl(fpUserExec) + + if (a.st_mode and S_IRGRP) != 0'i32: result.incl(fpGroupRead) + if (a.st_mode and S_IWGRP) != 0'i32: result.incl(fpGroupWrite) + if (a.st_mode and S_IXGRP) != 0'i32: result.incl(fpGroupExec) + + if (a.st_mode and S_IROTH) != 0'i32: result.incl(fpOthersRead) + if (a.st_mode and S_IWOTH) != 0'i32: result.incl(fpOthersWrite) + if (a.st_mode and S_IXOTH) != 0'i32: result.incl(fpOthersExec) + else: + var res = GetFileAttributesA(filename) + if res == -1'i32: OSError() + if (res and FILE_ATTRIBUTE_READONLY) != 0'i32: + result = {fpUserExec, fpUserRead, fpGroupExec, fpGroupRead, + fpOthersExec, fpOthersRead} + else: + result = {fpUserExec..fpOthersRead} + +proc setFilePermissions*(filename: string, permissions: set[TFilePermission]) = + ## sets the file permissions for `filename`. `OSError` is raised in case of + ## an error. On Windows, only the ``readonly`` flag is changed, depending on + ## ``fpUserWrite``. + when defined(posix): + var p = 0'i32 + if fpUserRead in permissions: p = p or S_IRUSR + if fpUserWrite in permissions: p = p or S_IWUSR + if fpUserExec in permissions: p = p or S_IXUSR + + if fpGroupRead in permissions: p = p or S_IRGRP + if fpGroupWrite in permissions: p = p or S_IWGRP + if fpGroupExec in permissions: p = p or S_IXGRP + + if fpOthersRead in permissions: p = p or S_IROTH + if fpOthersWrite in permissions: p = p or S_IWOTH + if fpOthersExec in permissions: p = p or S_IXOTH + + if chmod(filename, p) != 0: OSError() + else: + var res = GetFileAttributesA(filename) + if res == -1'i32: OSError() + if fpUserWrite in permissions: + res = res and not FILE_ATTRIBUTE_READONLY + else: + res = res or FILE_ATTRIBUTE_READONLY + if SetFileAttributesA(filename, res) != 0'i32: + OSError() + +proc inclFilePermissions*(filename: string, + permissions: set[TFilePermission]) = + ## a convenience procedure for: + ## + ## .. code-block:: nimrod + ## setFilePermissions(filename, getFilePermissions(filename)+permissions) + setFilePermissions(filename, getFilePermissions(filename)+permissions) + +proc exclFilePermissions*(filename: string, + permissions: set[TFilePermission]) = + ## a convenience procedure for: + ## + ## .. code-block:: nimrod + ## setFilePermissions(filename, getFilePermissions(filename)-permissions) + setFilePermissions(filename, getFilePermissions(filename)-permissions) + +proc getHomeDir*(): string = + ## Returns the home directory of the current user. + when defined(windows): return getEnv("USERPROFILE") & "\\" + else: return getEnv("HOME") & "/" + +proc getConfigDir*(): string = + ## Returns the config directory of the current user for applications. + when defined(windows): return getEnv("APPDATA") & "\\" + else: return getEnv("HOME") & "/.config/" + +when defined(windows): + # Since we support GUI applications with Nimrod, we sometimes generate + # a WinMain entry proc. But a WinMain proc has no access to the parsed + # command line arguments. The way to get them differs. Thus we parse them + # ourselves. This has the additional benefit that the program's behaviour + # is always the same -- independent of the used C compiler. + var + ownArgv: seq[string] + + proc paramStr(i: int): string = + if isNil(ownArgv): ownArgv = parseCmdLine($getCommandLineA()) + return ownArgv[i] + + proc paramCount(): int = + if isNil(ownArgv): ownArgv = parseCmdLine($getCommandLineA()) + result = ownArgv.len-1 + +else: + var + cmdCount {.importc: "cmdCount".}: cint + cmdLine {.importc: "cmdLine".}: cstringArray + + proc paramStr(i: int): string = + if i < cmdCount and i >= 0: return $cmdLine[i] + raise newException(EInvalidIndex, "invalid index") + + proc paramCount(): int = return cmdCount-1 + +when defined(linux) or defined(solaris) or defined(bsd) or defined(aix): + proc getApplAux(procPath: string): string = + result = newString(256) + var len = readlink(procPath, result, 256) + if len > 256: + result = newString(len+1) + len = readlink(procPath, result, len) + setlen(result, len) + +when defined(macosx): + # a really hacky solution: since we like to include 2 headers we have to + # define two procs which in reality are the same + proc getExecPath1(c: cstring, size: var int32) {. + importc: "_NSGetExecutablePath", header: "<sys/param.h>".} + proc getExecPath2(c: cstring, size: var int32): bool {. + importc: "_NSGetExecutablePath", header: "<mach-o/dyld.h>".} + +proc getApplicationFilename*(): string = + ## Returns the filename of the application's executable. + + # Linux: /proc/<pid>/exe + # Solaris: + # /proc/<pid>/object/a.out (filename only) + # /proc/<pid>/path/a.out (complete pathname) + # *BSD (and maybe Darwin too): + # /proc/<pid>/file + when defined(windows): + result = newString(256) + var len = getModuleFileNameA(0, result, 256) + setlen(result, int(len)) + elif defined(linux) or defined(aix): + result = getApplAux("/proc/self/exe") + elif defined(solaris): + result = getApplAux("/proc/" & $getpid() & "/path/a.out") + elif defined(bsd): + result = getApplAux("/proc/" & $getpid() & "/file") + elif defined(macosx): + var size: int32 + getExecPath1(nil, size) + result = newString(int(size)) + if getExecPath2(result, size): + result = "" # error! + else: + # little heuristic that may work on other POSIX-like systems: + result = getEnv("_") + if len(result) == 0: + result = ParamStr(0) # POSIX guaranties that this contains the executable + # as it has been executed by the calling process + if len(result) > 0 and result[0] != DirSep: # not an absolute path? + # iterate over any path in the $PATH environment variable + for p in split(getEnv("PATH"), {PathSep}): + var x = joinPath(p, result) + if ExistsFile(x): return x + +proc getApplicationDir*(): string = + ## Returns the directory of the application's executable. + result = splitFile(getApplicationFilename()).dir + +proc sleep*(milsecs: int) = + ## sleeps `milsecs` milliseconds. + when defined(windows): + winlean.sleep(int32(milsecs)) + else: + var a, b: Ttimespec + a.tv_sec = TTime(milsecs div 1000) + a.tv_nsec = (milsecs mod 1000) * 1000 + discard posix.nanosleep(a, b) + +{.pop.} diff --git a/nimlib/pure/osproc.nim b/nimlib/pure/osproc.nim new file mode 100755 index 000000000..d76825531 --- /dev/null +++ b/nimlib/pure/osproc.nim @@ -0,0 +1,543 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements an advanced facility for executing OS processes +## and process communication. + +import + strutils, os, strtabs, streams + +when defined(windows): + import winlean +else: + import posix + +type + TProcess = object of TObject + when defined(windows): + FProcessHandle: Thandle + inputHandle, outputHandle, errorHandle: TFileHandle + else: + inputHandle, outputHandle, errorHandle: TFileHandle + id: cint + exitCode: cint + + PProcess* = ref TProcess ## represents an operating system process + + TProcessOption* = enum ## options that can be passed `startProcess` + poEchoCmd, ## echo the command before execution + poUseShell, ## use the shell to execute the command; NOTE: This + ## often creates a security whole! + poStdErrToStdOut, ## merge stdout and stderr to the stdout stream + poParentStreams ## use the parent's streams + +proc execProcess*(command: string, + options: set[TProcessOption] = {poStdErrToStdOut, + poUseShell}): string + ## A convience procedure that executes ``command`` with ``startProcess`` + ## and returns its output as a string. + +proc executeProcess*(command: string, + options: set[TProcessOption] = {poStdErrToStdOut, + poUseShell}): string {. + deprecated.} = + ## **Deprecated since version 0.8.2**: Use `execProcess` instead. + result = execProcess(command, options) + +proc execCmd*(command: string): int + ## Executes ``command`` and returns its error code. Standard input, output, + ## error streams are inherited from the calling process. + +proc executeCommand*(command: string): int {.deprecated.} = + ## **Deprecated since version 0.8.2**: Use `execCmd` instead. + result = execCmd(command) + + +proc startProcess*(command: string, + workingDir: string = "", + args: openarray[string] = [], + env: PStringTable = nil, + options: set[TProcessOption] = {poStdErrToStdOut}): PProcess + ## Starts a process. `Command` is the executable file, `workingDir` is the + ## process's working directory. If ``workingDir == ""`` the current directory + ## is used. `args` are the command line arguments that are passed to the + ## process. On many operating systems, the first command line argument is the + ## name of the executable. `args` should not contain this argument! + ## `env` is the environment that will be passed to the process. + ## If ``env == nil`` the environment is inherited of + ## the parent process. `options` are additional flags that may be passed + ## to `startProcess`. See the documentation of ``TProcessOption`` for the + ## meaning of these flags. + ## + ## Return value: The newly created process object. Nil is never returned, + ## but ``EOS`` is raised in case of an error. + +proc suspend*(p: PProcess) + ## Suspends the process `p`. + +proc resume*(p: PProcess) + ## Resumes the process `p`. + +proc terminate*(p: PProcess) + ## Terminates the process `p`. + +proc running*(p: PProcess): bool + ## Returns true iff the process `p` is still running. Returns immediately. + +proc processID*(p: PProcess): int = + ## returns `p`'s process ID. + return p.id + +proc waitForExit*(p: PProcess): int + ## waits for the process to finish and returns `p`'s error code. + +proc inputStream*(p: PProcess): PStream + ## returns ``p``'s input stream for writing to + +proc outputStream*(p: PProcess): PStream + ## returns ``p``'s output stream for reading from + +proc errorStream*(p: PProcess): PStream + ## returns ``p``'s output stream for reading from + +when defined(macosx) or defined(bsd): + const + CTL_HW = 6 + HW_AVAILCPU = 25 + HW_NCPU = 3 + proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, + a: var int, b: pointer, c: int): cint {. + importc: "sysctl", header: "<sys/sysctl.h>".} + +proc countProcessors*(): int = + ## returns the numer of the processors/cores the machine has. + ## Returns 0 if it cannot be determined. + when defined(windows): + var x = getenv("NUMBER_OF_PROCESSORS") + if x.len > 0: result = parseInt(x) + elif defined(macosx) or defined(bsd): + var + mib: array[0..3, cint] + len, numCPU: int + mib[0] = CTL_HW + mib[1] = HW_AVAILCPU + len = sizeof(numCPU) + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + if numCPU < 1: + mib[1] = HW_NCPU + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + result = numCPU + elif defined(hpux): + result = mpctl(MPC_GETNUMSPUS, nil, nil) + elif defined(irix): + var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint + result = sysconf(SC_NPROC_ONLN) + else: + result = sysconf(SC_NPROCESSORS_ONLN) + if result <= 0: result = 1 + +proc startProcessAux(cmd: string, options: set[TProcessOption]): PProcess = + var c = parseCmdLine(cmd) + var a: seq[string] = @[] # slicing is not yet implemented :-( + for i in 1 .. c.len-1: add(a, c[i]) + result = startProcess(command=c[0], args=a, options=options) + +proc execProcesses*(cmds: openArray[string], + options = {poStdErrToStdOut, poParentStreams}, + n = countProcessors()): int = + ## executes the commands `cmds` in parallel. Creates `n` processes + ## that execute in parallel. The highest return value of all processes + ## is returned. + assert n > 0 + if n > 1: + var q: seq[PProcess] + newSeq(q, n) + var m = min(n, cmds.len) + for i in 0..m-1: + q[i] = startProcessAux(cmds[i], options=options) + when defined(noBusyWaiting): + var r = 0 + for i in m..high(cmds): + when defined(debugExecProcesses): + var err = "" + var outp = outputStream(q[r]) + while running(q[r]) or not outp.atEnd(outp): + err.add(outp.readLine()) + err.add("\n") + echo(err) + result = max(waitForExit(q[r]), result) + q[r] = startProcessAux(cmds[i], options=options) + r = (r + 1) mod n + else: + var i = m + while i <= high(cmds): + sleep(50) + for r in 0..n-1: + if not running(q[r]): + #echo(outputStream(q[r]).readLine()) + result = max(waitForExit(q[r]), result) + q[r] = startProcessAux(cmds[i], options=options) + inc(i) + if i > high(cmds): break + for i in 0..m-1: + result = max(waitForExit(q[i]), result) + else: + for i in 0..high(cmds): + var p = startProcessAux(cmds[i], options=options) + result = max(waitForExit(p), result) + +when true: + nil +else: + proc startGUIProcess*(command: string, + workingDir: string = "", + args: openarray[string] = [], + env: PStringTable = nil, + x = -1, + y = -1, + width = -1, + height = -1): PProcess + +proc execProcess(command: string, + options: set[TProcessOption] = {poStdErrToStdOut, + poUseShell}): string = + var p = startProcessAux(command, options=options) + var outp = outputStream(p) + result = "" + while running(p) or not outp.atEnd(outp): + result.add(outp.readLine()) + result.add("\n") + +when false: + proc deallocCStringArray(a: cstringArray) = + var i = 0 + while a[i] != nil: + dealloc(a[i]) + inc(i) + dealloc(a) + +when defined(Windows): + # We need to implement a handle stream for Windows: + type + PFileHandleStream = ref TFileHandleStream + TFileHandleStream = object of TStream + handle: THandle + atTheEnd: bool + + proc hsClose(s: PFileHandleStream) = nil # nothing to do here + proc hsAtEnd(s: PFileHandleStream): bool = return s.atTheEnd + + proc hsReadData(s: PFileHandleStream, buffer: pointer, bufLen: int): int = + if s.atTheEnd: return 0 + var br: int32 + var a = winlean.ReadFile(s.handle, buffer, bufLen, br, nil) + # TRUE and zero bytes returned (EOF). + # TRUE and n (>0) bytes returned (good data). + # FALSE and bytes returned undefined (system error). + if a == 0 and br != 0: OSError() + s.atTheEnd = br < bufLen + result = br + + proc hsWriteData(s: PFileHandleStream, buffer: pointer, bufLen: int) = + var bytesWritten: int32 + var a = winlean.writeFile(s.handle, buffer, bufLen, bytesWritten, nil) + if a == 0: OSError() + + proc newFileHandleStream(handle: THandle): PFileHandleStream = + new(result) + result.handle = handle + result.close = hsClose + result.atEnd = hsAtEnd + result.readData = hsReadData + result.writeData = hsWriteData + + proc buildCommandLine(a: string, args: openarray[string]): cstring = + var res = quoteIfContainsWhite(a) + for i in 0..high(args): + res.add(' ') + res.add(quoteIfContainsWhite(args[i])) + result = cast[cstring](alloc0(res.len+1)) + copyMem(result, cstring(res), res.len) + + proc buildEnv(env: PStringTable): cstring = + var L = 0 + for key, val in pairs(env): inc(L, key.len + val.len + 2) + result = cast[cstring](alloc0(L+2)) + L = 0 + for key, val in pairs(env): + var x = key & "=" & val + copyMem(addr(result[L]), cstring(x), x.len+1) # copy \0 + inc(L, x.len+1) + + #proc open_osfhandle(osh: THandle, mode: int): int {. + # importc: "_open_osfhandle", header: "<fcntl.h>".} + + #var + # O_WRONLY {.importc: "_O_WRONLY", header: "<fcntl.h>".}: int + # O_RDONLY {.importc: "_O_RDONLY", header: "<fcntl.h>".}: int + + proc CreatePipeHandles(Rdhandle, WrHandle: var THandle) = + var piInheritablePipe: TSecurityAttributes + piInheritablePipe.nlength = SizeOF(TSecurityAttributes) + piInheritablePipe.lpSecurityDescriptor = nil + piInheritablePipe.Binherithandle = 1 + if CreatePipe(Rdhandle, Wrhandle, piInheritablePipe, 1024) == 0'i32: + OSError() + + proc fileClose(h: THandle) {.inline.} = + if h > 4: discard CloseHandle(h) + + proc startProcess(command: string, + workingDir: string = "", + args: openarray[string] = [], + env: PStringTable = nil, + options: set[TProcessOption] = {poStdErrToStdOut}): PProcess = + var + SI: TStartupInfo + ProcInfo: TProcessInformation + success: int + hi, ho, he: THandle + new(result) + SI.cb = SizeOf(SI) + if poParentStreams notin options: + SI.dwFlags = STARTF_USESTDHANDLES # STARTF_USESHOWWINDOW or + CreatePipeHandles(SI.hStdInput, HI) + CreatePipeHandles(HO, Si.hStdOutput) + if poStdErrToStdOut in options: + SI.hStdError = SI.hStdOutput + HE = HO + else: + CreatePipeHandles(HE, Si.hStdError) + result.inputHandle = hi + result.outputHandle = ho + result.errorHandle = he + else: + SI.hStdError = GetStdHandle(STD_ERROR_HANDLE) + SI.hStdInput = GetStdHandle(STD_INPUT_HANDLE) + SI.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE) + result.inputHandle = si.hStdInput + result.outputHandle = si.hStdOutput + result.errorHandle = si.hStdError + + var cmdl: cstring + if false: # poUseShell in options: + cmdl = buildCommandLine(getEnv("COMSPEC"), @["/c", command] & args) + else: + cmdl = buildCommandLine(command, args) + var wd: cstring = nil + var e: cstring = nil + if len(workingDir) > 0: wd = workingDir + if env != nil: e = buildEnv(env) + if poEchoCmd in options: echo($cmdl) + success = winlean.CreateProcess(nil, + cmdl, nil, nil, 1, NORMAL_PRIORITY_CLASS, e, wd, SI, ProcInfo) + + if poParentStreams notin options: + FileClose(si.hStdInput) + FileClose(si.hStdOutput) + if poStdErrToStdOut notin options: + FileClose(si.hStdError) + + if e != nil: dealloc(e) + dealloc(cmdl) + if success == 0: OSError() + # Close the handle now so anyone waiting is woken: + discard closeHandle(procInfo.hThread) + result.FProcessHandle = procInfo.hProcess + result.id = procInfo.dwProcessID + + proc suspend(p: PProcess) = + discard SuspendThread(p.FProcessHandle) + + proc resume(p: PProcess) = + discard ResumeThread(p.FProcessHandle) + + proc running(p: PProcess): bool = + var x = waitForSingleObject(p.FProcessHandle, 50) + return x == WAIT_TIMEOUT + + proc terminate(p: PProcess) = + if running(p): + discard TerminateProcess(p.FProcessHandle, 0) + + proc waitForExit(p: PProcess): int = + discard WaitForSingleObject(p.FProcessHandle, Infinite) + var res: int32 + discard GetExitCodeProcess(p.FProcessHandle, res) + result = res + discard CloseHandle(p.FProcessHandle) + + proc inputStream(p: PProcess): PStream = + result = newFileHandleStream(p.inputHandle) + + proc outputStream(p: PProcess): PStream = + result = newFileHandleStream(p.outputHandle) + + proc errorStream(p: PProcess): PStream = + result = newFileHandleStream(p.errorHandle) + + proc execCmd(command: string): int = + var + SI: TStartupInfo + ProcInfo: TProcessInformation + process: THandle + L: int32 + SI.cb = SizeOf(SI) + SI.hStdError = GetStdHandle(STD_ERROR_HANDLE) + SI.hStdInput = GetStdHandle(STD_INPUT_HANDLE) + SI.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE) + if winlean.CreateProcess(nil, command, nil, nil, 0, + NORMAL_PRIORITY_CLASS, nil, nil, SI, ProcInfo) == 0: + OSError() + else: + Process = ProcInfo.hProcess + discard CloseHandle(ProcInfo.hThread) + if WaitForSingleObject(Process, INFINITE) != -1: + discard GetExitCodeProcess(Process, L) + result = int(L) + else: + result = -1 + discard CloseHandle(Process) + +else: + const + readIdx = 0 + writeIdx = 1 + + proc addCmdArgs(command: string, args: openarray[string]): string = + result = quoteIfContainsWhite(command) + for i in 0 .. high(args): + add(result, " ") + add(result, quoteIfContainsWhite(args[i])) + + proc toCStringArray(b, a: openarray[string]): cstringArray = + result = cast[cstringArray](alloc0((a.len + b.len + 1) * sizeof(cstring))) + for i in 0..high(b): + result[i] = cast[cstring](alloc(b[i].len+1)) + copyMem(result[i], cstring(b[i]), b[i].len+1) + for i in 0..high(a): + result[i+b.len] = cast[cstring](alloc(a[i].len+1)) + copyMem(result[i+b.len], cstring(a[i]), a[i].len+1) + + proc ToCStringArray(t: PStringTable): cstringArray = + result = cast[cstringArray](alloc0((t.len + 1) * sizeof(cstring))) + var i = 0 + for key, val in pairs(t): + var x = key & "=" & val + result[i] = cast[cstring](alloc(x.len+1)) + copyMem(result[i], addr(x[0]), x.len+1) + inc(i) + + proc startProcess(command: string, + workingDir: string = "", + args: openarray[string] = [], + env: PStringTable = nil, + options: set[TProcessOption] = {poStdErrToStdOut}): PProcess = + var + p_stdin, p_stdout, p_stderr: array [0..1, cint] + new(result) + result.exitCode = 3 # for ``waitForExit`` + if pipe(p_stdin) != 0'i32 or pipe(p_stdout) != 0'i32: + OSError("failed to create a pipe") + var Pid = fork() + if Pid < 0: + OSError("failed to fork process") + + if pid == 0: + ## child process: + discard close(p_stdin[writeIdx]) + if dup2(p_stdin[readIdx], readIdx) < 0: OSError() + discard close(p_stdout[readIdx]) + if dup2(p_stdout[writeIdx], writeIdx) < 0: OSError() + if poStdErrToStdOut in options: + if dup2(p_stdout[writeIdx], 2) < 0: OSError() + else: + if pipe(p_stderr) != 0'i32: OSError("failed to create a pipe") + discard close(p_stderr[readIdx]) + if dup2(p_stderr[writeIdx], 2) < 0: OSError() + + if workingDir.len > 0: + os.setCurrentDir(workingDir) + if poUseShell notin options: + var a = toCStringArray([extractFilename(command)], args) + if env == nil: + discard execv(command, a) + else: + discard execve(command, a, ToCStringArray(env)) + else: + var x = addCmdArgs(command, args) + var a = toCStringArray(["sh", "-c"], [x]) + if env == nil: + discard execv("/bin/sh", a) + else: + discard execve("/bin/sh", a, ToCStringArray(env)) + # too risky to raise an exception here: + quit("execve call failed: " & $strerror(errno)) + # Parent process. Copy process information. + if poEchoCmd in options: + echo(command & " " & join(args, " ")) + result.id = pid + + result.inputHandle = p_stdin[writeIdx] + result.outputHandle = p_stdout[readIdx] + if poStdErrToStdOut in options: + result.errorHandle = result.outputHandle + else: + result.errorHandle = p_stderr[readIdx] + discard close(p_stderr[writeIdx]) + discard close(p_stdin[readIdx]) + discard close(p_stdout[writeIdx]) + + proc suspend(p: PProcess) = + discard kill(p.id, SIGSTOP) + + proc resume(p: PProcess) = + discard kill(p.id, SIGCONT) + + proc running(p: PProcess): bool = + result = waitPid(p.id, p.exitCode, WNOHANG) == int(p.id) + + proc terminate(p: PProcess) = + if kill(p.id, SIGTERM) == 0'i32: + if running(p): discard kill(p.id, SIGKILL) + + proc waitForExit(p: PProcess): int = + #if waitPid(p.id, p.exitCode, 0) == int(p.id): + # ``waitPid`` fails if the process is not running anymore. But then + # ``running`` probably set ``p.exitCode`` for us. Since ``p.exitCode`` is + # initialized with 3, wrong success exit codes are prevented. + var oldExitCode = p.exitCode + if waitPid(p.id, p.exitCode, 0) < 0: + # failed, so restore old exitCode + p.exitCode = oldExitCode + result = int(p.exitCode) + + proc inputStream(p: PProcess): PStream = + var f: TFile + if not open(f, p.inputHandle, fmWrite): OSError() + result = newFileStream(f) + + proc outputStream(p: PProcess): PStream = + var f: TFile + if not open(f, p.outputHandle, fmRead): OSError() + result = newFileStream(f) + + proc errorStream(p: PProcess): PStream = + var f: TFile + if not open(f, p.errorHandle, fmRead): OSError() + result = newFileStream(f) + + proc csystem(cmd: cstring): cint {.nodecl, importc: "system".} + + proc execCmd(command: string): int = + result = csystem(command) + +when isMainModule: + var x = execProcess("gcc -v") + echo "ECHO ", x diff --git a/nimlib/pure/parsecfg.nim b/nimlib/pure/parsecfg.nim new file mode 100755 index 000000000..c26dab099 --- /dev/null +++ b/nimlib/pure/parsecfg.nim @@ -0,0 +1,352 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2008 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## The ``parsecfg`` module implements a high performance configuration file +## parser. The configuration file's syntax is similar to the Windows ``.ini`` +## format, but much more powerful, as it is not a line based parser. String +## literals, raw string literals and triple quoted string literals are supported +## as in the Nimrod programming language. + +## This is an example of how a configuration file may look like: +## +## .. include:: doc/mytest.cfg +## :literal: +## The file ``tests/tparscfg.nim`` demonstrates how to use the +## configuration file parser: +## +## .. code-block:: nimrod +## :file: tests/tparscfg.nim + + +import + hashes, strutils, lexbase, streams + +type + TCfgEventKind* = enum ## enumation of all events that may occur when parsing + cfgEof, ## end of file reached + cfgSectionStart, ## a ``[section]`` has been parsed + cfgKeyValuePair, ## a ``key=value`` pair has been detected + cfgOption, ## a ``--key=value`` command line option + cfgError ## an error ocurred during parsing + + TCfgEvent* = object of TObject ## describes a parsing event + case kind*: TCfgEventKind ## the kind of the event + of cfgEof: nil + of cfgSectionStart: + section*: string ## `section` contains the name of the + ## parsed section start (syntax: ``[section]``) + of cfgKeyValuePair, cfgOption: + key*, value*: string ## contains the (key, value) pair if an option + ## of the form ``--key: value`` or an ordinary + ## ``key= value`` pair has been parsed. + ## ``value==""`` if it was not specified in the + ## configuration file. + of cfgError: ## the parser encountered an error: `msg` + msg*: string ## contains the error message. No exceptions + ## are thrown if a parse error occurs. + + TTokKind = enum + tkInvalid, tkEof, + tkSymbol, tkEquals, tkColon, tkBracketLe, tkBracketRi, tkDashDash + TToken {.final.} = object # a token + kind: TTokKind # the type of the token + literal: string # the parsed (string) literal + + TParserState = enum + startState # , commaState # not yet used + TCfgParser* = object of TBaseLexer ## the parser object. + tok: TToken + state: TParserState + filename: string + +proc open*(c: var TCfgParser, input: PStream, filename: string) + ## initializes the parser with an input stream. `Filename` is only used + ## for nice error messages. + +proc close*(c: var TCfgParser) + ## closes the parser `c` and its associated input stream. + +proc next*(c: var TCfgParser): TCfgEvent + ## retrieves the first/next event. This controls the parser. + +proc getColumn*(c: TCfgParser): int + ## get the current column the parser has arrived at. + +proc getLine*(c: TCfgParser): int + ## get the current line the parser has arrived at. + +proc getFilename*(c: TCfgParser): string + ## get the filename of the file that the parser processes. + +proc errorStr*(c: TCfgParser, msg: string): string + ## returns a properly formated error message containing current line and + ## column information. + + +# implementation + +const + SymChars: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} + +proc rawGetTok(c: var TCfgParser, tok: var TToken) +proc open(c: var TCfgParser, input: PStream, filename: string) = + lexbase.open(c, input) + c.filename = filename + c.state = startState + c.tok.kind = tkInvalid + c.tok.literal = "" + rawGetTok(c, c.tok) + +proc close(c: var TCfgParser) = + lexbase.close(c) + +proc getColumn(c: TCfgParser): int = + result = getColNumber(c, c.bufPos) + +proc getLine(c: TCfgParser): int = + result = c.linenumber + +proc getFilename(c: TCfgParser): string = + result = c.filename + +proc handleHexChar(c: var TCfgParser, xi: var int) = + case c.buf[c.bufpos] + of '0'..'9': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) + inc(c.bufpos) + of 'a'..'f': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) + inc(c.bufpos) + of 'A'..'F': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) + inc(c.bufpos) + else: + nil + +proc handleDecChars(c: var TCfgParser, xi: var int) = + while c.buf[c.bufpos] in {'0'..'9'}: + xi = (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0')) + inc(c.bufpos) + +proc getEscapedChar(c: var TCfgParser, tok: var TToken) = + inc(c.bufpos) # skip '\' + case c.buf[c.bufpos] + of 'n', 'N': + add(tok.literal, "\n") + Inc(c.bufpos) + of 'r', 'R', 'c', 'C': + add(tok.literal, '\c') + Inc(c.bufpos) + of 'l', 'L': + add(tok.literal, '\L') + Inc(c.bufpos) + of 'f', 'F': + add(tok.literal, '\f') + inc(c.bufpos) + of 'e', 'E': + add(tok.literal, '\e') + Inc(c.bufpos) + of 'a', 'A': + add(tok.literal, '\a') + Inc(c.bufpos) + of 'b', 'B': + add(tok.literal, '\b') + Inc(c.bufpos) + of 'v', 'V': + add(tok.literal, '\v') + Inc(c.bufpos) + of 't', 'T': + add(tok.literal, '\t') + Inc(c.bufpos) + of '\'', '\"': + add(tok.literal, c.buf[c.bufpos]) + Inc(c.bufpos) + of '\\': + add(tok.literal, '\\') + Inc(c.bufpos) + of 'x', 'X': + inc(c.bufpos) + var xi = 0 + handleHexChar(c, xi) + handleHexChar(c, xi) + add(tok.literal, Chr(xi)) + of '0'..'9': + var xi = 0 + handleDecChars(c, xi) + if (xi <= 255): add(tok.literal, Chr(xi)) + else: tok.kind = tkInvalid + else: tok.kind = tkInvalid + +proc HandleCRLF(c: var TCfgParser, pos: int): int = + case c.buf[pos] + of '\c': result = lexbase.HandleCR(c, pos) + of '\L': result = lexbase.HandleLF(c, pos) + else: result = pos + +proc getString(c: var TCfgParser, tok: var TToken, rawMode: bool) = + var pos = c.bufPos + 1 # skip " + var buf = c.buf # put `buf` in a register + tok.kind = tkSymbol + if (buf[pos] == '\"') and (buf[pos + 1] == '\"'): + # long string literal: + inc(pos, 2) # skip "" + # skip leading newline: + pos = HandleCRLF(c, pos) + buf = c.buf + while true: + case buf[pos] + of '\"': + if (buf[pos + 1] == '\"') and (buf[pos + 2] == '\"'): break + add(tok.literal, '\"') + Inc(pos) + of '\c', '\L': + pos = HandleCRLF(c, pos) + buf = c.buf + add(tok.literal, "\n") + of lexbase.EndOfFile: + tok.kind = tkInvalid + break + else: + add(tok.literal, buf[pos]) + Inc(pos) + c.bufpos = pos + 3 # skip the three """ + else: + # ordinary string literal + while true: + var ch = buf[pos] + if ch == '\"': + inc(pos) # skip '"' + break + if ch in {'\c', '\L', lexbase.EndOfFile}: + tok.kind = tkInvalid + break + if (ch == '\\') and not rawMode: + c.bufPos = pos + getEscapedChar(c, tok) + pos = c.bufPos + else: + add(tok.literal, ch) + Inc(pos) + c.bufpos = pos + +proc getSymbol(c: var TCfgParser, tok: var TToken) = + var pos = c.bufpos + var buf = c.buf + while true: + add(tok.literal, buf[pos]) + Inc(pos) + if not (buf[pos] in SymChars): break + c.bufpos = pos + tok.kind = tkSymbol + +proc skip(c: var TCfgParser) = + var pos = c.bufpos + var buf = c.buf + while true: + case buf[pos] + of ' ', '\t': + Inc(pos) + of '#', ';': + while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) + of '\c', '\L': + pos = HandleCRLF(c, pos) + buf = c.buf + else: + break # EndOfFile also leaves the loop + c.bufpos = pos + +proc rawGetTok(c: var TCfgParser, tok: var TToken) = + tok.kind = tkInvalid + setlen(tok.literal, 0) + skip(c) + case c.buf[c.bufpos] + of '=': + tok.kind = tkEquals + inc(c.bufpos) + tok.literal = "=" + of '-': + inc(c.bufPos) + if c.buf[c.bufPos] == '-': inc(c.bufPos) + tok.kind = tkDashDash + tok.literal = "--" + of ':': + tok.kind = tkColon + inc(c.bufpos) + tok.literal = ":" + of 'r', 'R': + if c.buf[c.bufPos + 1] == '\"': + Inc(c.bufPos) + getString(c, tok, true) + else: + getSymbol(c, tok) + of '[': + tok.kind = tkBracketLe + inc(c.bufpos) + tok.literal = "]" + of ']': + tok.kind = tkBracketRi + Inc(c.bufpos) + tok.literal = "]" + of '\"': + getString(c, tok, false) + of lexbase.EndOfFile: + tok.kind = tkEof + tok.literal = "[EOF]" + else: getSymbol(c, tok) + +proc errorStr(c: TCfgParser, msg: string): string = + result = `%`("$1($2, $3) Error: $4", + [c.filename, $getLine(c), $getColumn(c), msg]) + +proc getKeyValPair(c: var TCfgParser, kind: TCfgEventKind): TCfgEvent = + if c.tok.kind == tkSymbol: + result.kind = kind + result.key = c.tok.literal + result.value = "" + rawGetTok(c, c.tok) + if c.tok.kind in {tkEquals, tkColon}: + rawGetTok(c, c.tok) + if c.tok.kind == tkSymbol: + result.value = c.tok.literal + else: + result.kind = cfgError + result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) + rawGetTok(c, c.tok) + else: + result.kind = cfgError + result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) + rawGetTok(c, c.tok) + +proc next(c: var TCfgParser): TCfgEvent = + case c.tok.kind + of tkEof: + result.kind = cfgEof + of tkDashDash: + rawGetTok(c, c.tok) + result = getKeyValPair(c, cfgOption) + of tkSymbol: + result = getKeyValPair(c, cfgKeyValuePair) + of tkBracketLe: + rawGetTok(c, c.tok) + if c.tok.kind == tkSymbol: + result.kind = cfgSectionStart + result.section = c.tok.literal + else: + result.kind = cfgError + result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) + rawGetTok(c, c.tok) + if c.tok.kind == tkBracketRi: + rawGetTok(c, c.tok) + else: + result.kind = cfgError + result.msg = errorStr(c, "\']\' expected, but found: " & c.tok.literal) + of tkInvalid, tkEquals, tkColon, tkBracketRi: + result.kind = cfgError + result.msg = errorStr(c, "invalid token: " & c.tok.literal) + rawGetTok(c, c.tok) diff --git a/nimlib/pure/parsecsv.nim b/nimlib/pure/parsecsv.nim new file mode 100755 index 000000000..5970f2090 --- /dev/null +++ b/nimlib/pure/parsecsv.nim @@ -0,0 +1,178 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a simple high performance `CSV`:idx: +## (`comma separated value`:idx:) parser. +## +## Example: How to use the parser +## ============================== +## +## .. code-block:: nimrod +## import os, parsecsv, streams +## var s = newFileStream(ParamStr(1), fmRead) +## if s == nil: quit("cannot open the file" & ParamStr(1)) +## var x: TCsvParser +## open(x, s, ParamStr(1)) +## while readRow(x): +## Echo "new row: " +## for val in items(x.row): +## Echo "##", val, "##" +## close(x) +## + +import + lexbase, streams + +type + TCsvRow* = seq[string] ## a row in a CSV file + TCsvParser* = object of TBaseLexer ## the parser object. + row*: TCsvRow ## the current row + filename: string + sep, quote, esc: char + skipWhite: bool + currRow: int + + EInvalidCsv* = object of EIO ## exception that is raised if + ## a parsing error occurs + +proc raiseEInvalidCsv(filename: string, line, col: int, + msg: string) {.noreturn.} = + var e: ref EInvalidCsv + new(e) + e.msg = filename & "(" & $line & ", " & $col & ") Error: " & msg + raise e + +proc error(my: TCsvParser, pos: int, msg: string) = + raiseEInvalidCsv(my.filename, my.LineNumber, getColNumber(my, pos), msg) + +proc open*(my: var TCsvParser, input: PStream, filename: string, + separator = ',', quote = '"', escape = '\0', + skipInitialSpace = false) = + ## initializes the parser with an input stream. `Filename` is only used + ## for nice error messages. The parser's behaviour can be controlled by + ## the diverse optional parameters: + ## - `separator`: character used to separate fields + ## - `quote`: Used to quote fields containing special characters like + ## `separator`, `quote` or new-line characters. '\0' disables the parsing + ## of quotes. + ## - `escape`: removes any special meaning from the following character; + ## '\0' disables escaping; if escaping is disabled and `quote` is not '\0', + ## two `quote` characters are parsed one literal `quote` character. + ## - `skipInitialSpace`: If true, whitespace immediately following the + ## `separator` is ignored. + lexbase.open(my, input) + my.filename = filename + my.sep = separator + my.quote = quote + my.esc = escape + my.skipWhite = skipInitialSpace + my.row = @[] + my.currRow = 0 + +proc parseField(my: var TCsvParser, a: var string) = + var pos = my.bufpos + var buf = my.buf + if my.skipWhite: + while buf[pos] in {' ', '\t'}: inc(pos) + setLen(a, 0) # reuse memory + if buf[pos] == my.quote and my.quote != '\0': + inc(pos) + while true: + var c = buf[pos] + if c == '\0': + my.bufpos = pos # can continue after exception? + error(my, pos, my.quote & " expected") + break + elif c == my.quote: + if my.esc == '\0' and buf[pos+1] == my.quote: + add(a, my.quote) + inc(pos, 2) + else: + inc(pos) + break + elif c == my.esc: + add(a, buf[pos+1]) + inc(pos, 2) + else: + case c + of '\c': + pos = handleCR(my, pos) + buf = my.buf + add(a, "\n") + of '\l': + pos = handleLF(my, pos) + buf = my.buf + add(a, "\n") + else: + add(a, c) + inc(pos) + else: + while true: + var c = buf[pos] + if c == my.sep: break + if c in {'\c', '\l', '\0'}: break + add(a, c) + inc(pos) + my.bufpos = pos + +proc processedRows*(my: var TCsvParser): int = + ## returns number of the processed rows + return my.currRow + +proc readRow*(my: var TCsvParser, columns = 0): bool = + ## reads the next row; if `columns` > 0, it expects the row to have + ## exactly this many columns. Returns false if the end of the file + ## has been encountered else true. + var col = 0 # current column + var oldpos = my.bufpos + while my.buf[my.bufpos] != '\0': + var oldlen = my.row.len + if oldlen < col+1: + setLen(my.row, col+1) + my.row[col] = "" + parseField(my, my.row[col]) + inc(col) + if my.buf[my.bufpos] == my.sep: + inc(my.bufpos) + else: + case my.buf[my.bufpos] + of '\c', '\l': + # skip empty lines: + while true: + case my.buf[my.bufpos] + of '\c': my.bufpos = handleCR(my, my.bufpos) + of '\l': my.bufpos = handleLF(my, my.bufpos) + else: break + of '\0': nil + else: error(my, my.bufpos, my.sep & " expected") + break + + setlen(my.row, col) + result = col > 0 + if result and col != columns and columns > 0: + error(my, oldpos+1, $columns & " columns expected, but found " & + $col & " columns") + inc(my.currRow) + +proc close*(my: var TCsvParser) {.inline.} = + ## closes the parser `my` and its associated input stream. + lexbase.close(my) + +when isMainModule: + import os + var s = newFileStream(ParamStr(1), fmRead) + if s == nil: quit("cannot open the file" & ParamStr(1)) + var x: TCsvParser + open(x, s, ParamStr(1)) + while readRow(x): + Echo "new row: " + for val in items(x.row): + Echo "##", val, "##" + close(x) + diff --git a/nimlib/pure/parseopt.nim b/nimlib/pure/parseopt.nim new file mode 100755 index 000000000..8f4be98f4 --- /dev/null +++ b/nimlib/pure/parseopt.nim @@ -0,0 +1,152 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module provides the standard Nimrod command line parser. +## It supports one convenience iterator over all command line options and some +## lower-level features. + +{.push debugger: off.} + +import + os, strutils + +type + TCmdLineKind* = enum ## the detected command line token + cmdEnd, ## end of command line reached + cmdArgument, ## argument detected + cmdLongoption, ## a long option ``--option`` detected + cmdShortOption ## a short option ``-c`` detected + TOptParser* = + object of TObject ## this object implements the command line parser + cmd: string + pos: int + inShortState: bool + kind*: TCmdLineKind ## the dected command line token + key*, val*: string ## key and value pair; ``key`` is the option + ## or the argument, ``value`` is not "" if + ## the option was given a value + +proc initOptParser*(cmdline = ""): TOptParser = + ## inits the option parser. If ``cmdline == ""``, the real command line + ## (as provided by the ``OS`` module) is taken. + result.pos = 0 + result.inShortState = false + if cmdline != "": + result.cmd = cmdline + else: + result.cmd = "" + for i in countup(1, ParamCount()): + result.cmd = result.cmd & quoteIfContainsWhite(paramStr(i)) & ' ' + result.kind = cmdEnd + result.key = "" + result.val = "" + +proc init*(cmdline: string = ""): TOptParser {.deprecated.} = + ## **Deprecated since version 0.8.2**: Use `initOptParser` instead. + result = initOptParser(cmdline) + +proc parseWord(s: string, i: int, w: var string, + delim: TCharSet = {'\x09', ' ', '\0'}): int = + result = i + if s[result] == '\"': + inc(result) + while not (s[result] in {'\0', '\"'}): + add(w, s[result]) + inc(result) + if s[result] == '\"': inc(result) + else: + while not (s[result] in delim): + add(w, s[result]) + inc(result) + +proc handleShortOption(p: var TOptParser) = + var i = p.pos + p.kind = cmdShortOption + add(p.key, p.cmd[i]) + inc(i) + p.inShortState = true + while p.cmd[i] in {'\x09', ' '}: + inc(i) + p.inShortState = false + if p.cmd[i] in {':', '='}: + inc(i) + p.inShortState = false + while p.cmd[i] in {'\x09', ' '}: inc(i) + i = parseWord(p.cmd, i, p.val) + if p.cmd[i] == '\0': p.inShortState = false + p.pos = i + +proc next*(p: var TOptParser) = + ## parses the first or next option; ``p.kind`` describes what token has been + ## parsed. ``p.key`` and ``p.val`` are set accordingly. + var i = p.pos + while p.cmd[i] in {'\x09', ' '}: inc(i) + p.pos = i + setlen(p.key, 0) + setlen(p.val, 0) + if p.inShortState: + handleShortOption(p) + return + case p.cmd[i] + of '\0': + p.kind = cmdEnd + of '-': + inc(i) + if p.cmd[i] == '-': + p.kind = cmdLongOption + inc(i) + i = parseWord(p.cmd, i, p.key, {'\0', ' ', '\x09', ':', '='}) + while p.cmd[i] in {'\x09', ' '}: inc(i) + if p.cmd[i] in {':', '='}: + inc(i) + while p.cmd[i] in {'\x09', ' '}: inc(i) + p.pos = parseWord(p.cmd, i, p.val) + else: + p.pos = i + else: + p.pos = i + handleShortOption(p) + else: + p.kind = cmdArgument + p.pos = parseWord(p.cmd, i, p.key) + +proc cmdLineRest*(p: TOptParser): string = + ## retrieves the rest of the command line that has not been parsed yet. + result = strip(copy(p.cmd, p.pos, len(p.cmd) - 1)) + +proc getRestOfCommandLine*(p: TOptParser): string {.deprecated.} = + ## **Deprecated since version 0.8.2**: Use `cmdLineRest` instead. + result = cmdLineRest(p) + +iterator getopt*(): tuple[kind: TCmdLineKind, key, val: string] = + ## This is an convenience iterator for iterating over the command line. + ## This uses the TOptParser object. Example: + ## + ## .. code-block:: nimrod + ## var + ## filename = "" + ## for kind, key, val in getopt(): + ## case kind + ## of cmdArgument: + ## filename = key + ## of cmdLongOption, cmdShortOption: + ## case key + ## of "help", "h": writeHelp() + ## of "version", "v": writeVersion() + ## of cmdEnd: assert(false) # cannot happen + ## if filename == "": + ## # no filename has been given, so we show the help: + ## writeHelp() + var p = initOptParser() + while true: + next(p) + if p.kind == cmdEnd: break + yield (p.kind, p.key, p.val) + +{.pop.} diff --git a/nimlib/pure/parsesql.nim b/nimlib/pure/parsesql.nim new file mode 100755 index 000000000..2109c273a --- /dev/null +++ b/nimlib/pure/parsesql.nim @@ -0,0 +1,1345 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## The ``parsesql`` module implements a high performance SQL file +## parser. It parses PostgreSQL syntax and the SQL ANSI standard. + +import + hashes, strutils, lexbase, streams + +# ------------------- scanner ------------------------------------------------- + +type + TTokKind = enum ## enumeration of all SQL tokens + tkInvalid, ## invalid token + tkEof, ## end of file reached + tkIdentifier, ## abc + tkQuotedIdentifier, ## "abc" + tkStringConstant, ## 'abc' + tkEscapeConstant, ## e'abc' + tkDollarQuotedConstant, ## $tag$abc$tag$ + tkBitStringConstant, ## B'00011' + tkHexStringConstant, ## x'00011' + tkInteger, + tkNumeric, + tkOperator, ## + - * / < > = ~ ! @ # % ^ & | ` ? + tkSemicolon, ## ';' + tkColon, ## ':' + tkComma, ## ',' + tkParLe, ## '(' + tkParRi, ## ')' + tkBracketLe, ## '[' + tkBracketRi, ## ']' + tkDot ## '.' + + TToken {.final.} = object # a token + kind: TTokKind # the type of the token + literal: string # the parsed (string) literal + + TSqlLexer* = object of TBaseLexer ## the parser object. + filename: string + +const + tokKindToStr: array[TTokKind, string] = [ + "invalid", "[EOF]", "identifier", "quoted identifier", "string constant", + "escape string constant", "dollar quoted constant", "bit string constant", + "hex string constant", "integer constant", "numeric constant", "operator", + ";", ":", ",", "(", ")", "[", "]", "." + ] + +proc open(L: var TSqlLexer, input: PStream, filename: string) = + lexbase.open(L, input) + L.filename = filename + +proc close(L: var TSqlLexer) = + lexbase.close(L) + +proc getColumn(L: TSqlLexer): int = + ## get the current column the parser has arrived at. + result = getColNumber(L, L.bufPos) + +proc getLine(L: TSqlLexer): int = + result = L.linenumber + +proc handleHexChar(c: var TSqlLexer, xi: var int) = + case c.buf[c.bufpos] + of '0'..'9': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) + inc(c.bufpos) + of 'a'..'f': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) + inc(c.bufpos) + of 'A'..'F': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) + inc(c.bufpos) + else: + nil + +proc handleOctChar(c: var TSqlLexer, xi: var int) = + if c.buf[c.bufpos] in {'0'..'7'}: + xi = (xi shl 3) or (ord(c.buf[c.bufpos]) - ord('0')) + inc(c.bufpos) + +proc getEscapedChar(c: var TSqlLexer, tok: var TToken) = + inc(c.bufpos) + case c.buf[c.bufpos] + of 'n', 'N': + add(tok.literal, '\L') + Inc(c.bufpos) + of 'r', 'R', 'c', 'C': + add(tok.literal, '\c') + Inc(c.bufpos) + of 'l', 'L': + add(tok.literal, '\L') + Inc(c.bufpos) + of 'f', 'F': + add(tok.literal, '\f') + inc(c.bufpos) + of 'e', 'E': + add(tok.literal, '\e') + Inc(c.bufpos) + of 'a', 'A': + add(tok.literal, '\a') + Inc(c.bufpos) + of 'b', 'B': + add(tok.literal, '\b') + Inc(c.bufpos) + of 'v', 'V': + add(tok.literal, '\v') + Inc(c.bufpos) + of 't', 'T': + add(tok.literal, '\t') + Inc(c.bufpos) + of '\'', '\"': + add(tok.literal, c.buf[c.bufpos]) + Inc(c.bufpos) + of '\\': + add(tok.literal, '\\') + Inc(c.bufpos) + of 'x', 'X': + inc(c.bufpos) + var xi = 0 + handleHexChar(c, xi) + handleHexChar(c, xi) + add(tok.literal, Chr(xi)) + of '0'..'7': + var xi = 0 + handleOctChar(c, xi) + handleOctChar(c, xi) + handleOctChar(c, xi) + if (xi <= 255): add(tok.literal, Chr(xi)) + else: tok.kind = tkInvalid + else: tok.kind = tkInvalid + +proc HandleCRLF(c: var TSqlLexer, pos: int): int = + case c.buf[pos] + of '\c': result = lexbase.HandleCR(c, pos) + of '\L': result = lexbase.HandleLF(c, pos) + else: result = pos + +proc skip(c: var TSqlLexer) = + var pos = c.bufpos + var buf = c.buf + var nested = 0 + while true: + case buf[pos] + of ' ', '\t': + Inc(pos) + of '-': + if buf[pos+1] == '-': + while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) + else: + break + of '/': + if buf[pos+1] == '*': + inc(pos,2) + while true: + case buf[pos] + of '\0': break + of '\c', '\L': + pos = HandleCRLF(c, pos) + buf = c.buf + of '*': + if buf[pos+1] == '/': + inc(pos, 2) + if nested <= 0: break + dec(nested) + else: + inc(pos) + of '/': + if buf[pos+1] == '*': + inc(pos, 2) + inc(nested) + else: + inc(pos) + else: inc(pos) + else: break + of '\c', '\L': + pos = HandleCRLF(c, pos) + buf = c.buf + else: + break # EndOfFile also leaves the loop + c.bufpos = pos + +proc getString(c: var TSqlLexer, tok: var TToken, kind: TTokKind) = + var pos = c.bufPos + 1 + var buf = c.buf + tok.kind = kind + block parseLoop: + while true: + while true: + var ch = buf[pos] + if ch == '\'': + if buf[pos+1] == '\'': + inc(pos, 2) + add(tok.literal, '\'') + else: + inc(pos) + break + elif ch in {'\c', '\L', lexbase.EndOfFile}: + tok.kind = tkInvalid + break parseLoop + elif (ch == '\\') and kind == tkEscapeConstant: + c.bufPos = pos + getEscapedChar(c, tok) + pos = c.bufPos + else: + add(tok.literal, ch) + Inc(pos) + c.bufpos = pos + var line = c.linenumber + skip(c) + if c.linenumber > line: + # a new line whitespace has been parsed, so we check if the string + # continues after the whitespace: + buf = c.buf # may have been reallocated + pos = c.bufpos + if buf[pos] == '\'': inc(pos) + else: break parseLoop + else: break parseLoop + c.bufpos = pos + +proc getDollarString(c: var TSqlLexer, tok: var TToken) = + var pos = c.bufPos + 1 + var buf = c.buf + tok.kind = tkDollarQuotedConstant + var tag = "$" + while buf[pos] in IdentChars: + add(tag, buf[pos]) + inc(pos) + if buf[pos] == '$': inc(pos) + else: + tok.kind = tkInvalid + return + while true: + case buf[pos] + of '\c', '\L': + pos = HandleCRLF(c, pos) + buf = c.buf + add(tok.literal, "\L") + of '\0': + tok.kind = tkInvalid + break + of '$': + inc(pos) + var tag2 = "$" + while buf[pos] in IdentChars: + add(tag2, buf[pos]) + inc(pos) + if buf[pos] == '$': inc(pos) + if tag2 == tag: break + add(tok.literal, tag2) + add(tok.literal, '$') + else: + add(tok.literal, buf[pos]) + inc(pos) + c.bufpos = pos + +proc getSymbol(c: var TSqlLexer, tok: var TToken) = + var pos = c.bufpos + var buf = c.buf + while true: + add(tok.literal, buf[pos]) + Inc(pos) + if not (buf[pos] in {'a'..'z','A'..'Z','0'..'9','_','$', '\128'..'\255'}): + break + c.bufpos = pos + tok.kind = tkIdentifier + +proc getQuotedIdentifier(c: var TSqlLexer, tok: var TToken) = + var pos = c.bufPos + 1 + var buf = c.buf + tok.kind = tkQuotedIdentifier + while true: + var ch = buf[pos] + if ch == '\"': + if buf[pos+1] == '\"': + inc(pos, 2) + add(tok.literal, '\"') + else: + inc(pos) + break + elif ch in {'\c', '\L', lexbase.EndOfFile}: + tok.kind = tkInvalid + break + else: + add(tok.literal, ch) + Inc(pos) + c.bufpos = pos + +proc getBitHexString(c: var TSqlLexer, tok: var TToken, validChars: TCharSet) = + var pos = c.bufPos + 1 + var buf = c.buf + block parseLoop: + while true: + while true: + var ch = buf[pos] + if ch in validChars: + add(tok.literal, ch) + Inc(pos) + elif ch == '\'': + inc(pos) + break + else: + tok.kind = tkInvalid + break parseLoop + c.bufpos = pos + var line = c.linenumber + skip(c) + if c.linenumber > line: + # a new line whitespace has been parsed, so we check if the string + # continues after the whitespace: + buf = c.buf # may have been reallocated + pos = c.bufpos + if buf[pos] == '\'': inc(pos) + else: break parseLoop + else: break parseLoop + c.bufpos = pos + +proc getNumeric(c: var TSqlLexer, tok: var TToken) = + tok.kind = tkInteger + var pos = c.bufPos + var buf = c.buf + while buf[pos] in Digits: + add(tok.literal, buf[pos]) + inc(pos) + if buf[pos] == '.': + tok.kind = tkNumeric + add(tok.literal, buf[pos]) + inc(pos) + while buf[pos] in Digits: + add(tok.literal, buf[pos]) + inc(pos) + if buf[pos] in {'E', 'e'}: + tok.kind = tkNumeric + add(tok.literal, buf[pos]) + inc(pos) + if buf[pos] == '+': + inc(pos) + elif buf[pos] == '-': + add(tok.literal, buf[pos]) + inc(pos) + if buf[pos] in Digits: + while buf[pos] in Digits: + add(tok.literal, buf[pos]) + inc(pos) + else: + tok.kind = tkInvalid + c.bufpos = pos + +proc getOperator(c: var TSqlLexer, tok: var TToken) = + const operators = {'+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%', + '^', '&', '|', '`', '?'} + tok.kind = tkOperator + var pos = c.bufPos + var buf = c.buf + var trailingPlusMinus = false + while true: + case buf[pos] + of '-': + if buf[pos] == '-': break + if not trailingPlusMinus and buf[pos+1] notin operators and + tok.literal.len > 0: break + of '/': + if buf[pos] == '*': break + of '~', '!', '@', '#', '%', '^', '&', '|', '`', '?': + trailingPlusMinus = true + of '+': + if not trailingPlusMinus and buf[pos+1] notin operators and + tok.literal.len > 0: break + of '*', '<', '>', '=': nil + else: break + add(tok.literal, buf[pos]) + inc(pos) + c.bufpos = pos + +proc getTok(c: var TSqlLexer, tok: var TToken) = + tok.kind = tkInvalid + setlen(tok.literal, 0) + skip(c) + case c.buf[c.bufpos] + of ';': + tok.kind = tkSemiColon + inc(c.bufPos) + add(tok.literal, ';') + of ',': + tok.kind = tkComma + inc(c.bufpos) + add(tok.literal, ',') + of ':': + tok.kind = tkColon + inc(c.bufpos) + add(tok.literal, ':') + of 'e', 'E': + if c.buf[c.bufPos + 1] == '\'': + Inc(c.bufPos) + getString(c, tok, tkEscapeConstant) + else: + getSymbol(c, tok) + of 'b', 'B': + if c.buf[c.bufPos + 1] == '\'': + tok.kind = tkBitStringConstant + getBitHexString(c, tok, {'0'..'1'}) + else: + getSymbol(c, tok) + of 'x', 'X': + if c.buf[c.bufPos + 1] == '\'': + tok.kind = tkHexStringConstant + getBitHexString(c, tok, {'a'..'f','A'..'F','0'..'9'}) + else: + getSymbol(c, tok) + of '$': getDollarString(c, tok) + of '[': + tok.kind = tkBracketLe + inc(c.bufpos) + add(tok.literal, '[') + of ']': + tok.kind = tkBracketRi + Inc(c.bufpos) + add(tok.literal, ']') + of '(': + tok.kind = tkParLe + Inc(c.bufpos) + add(tok.literal, '(') + of ')': + tok.kind = tkParRi + Inc(c.bufpos) + add(tok.literal, ')') + of '.': + if c.buf[c.bufPos + 1] in Digits: + getNumeric(c, tok) + else: + tok.kind = tkDot + inc(c.bufpos) + add(tok.literal, '.') + of '0'..'9': getNumeric(c, tok) + of '\'': getString(c, tok, tkStringConstant) + of '"': getQuotedIdentifier(c, tok) + of lexbase.EndOfFile: + tok.kind = tkEof + tok.literal = "[EOF]" + of 'a', 'c', 'd', 'f'..'w', 'y', 'z', 'A', 'C', 'D', 'F'..'W', 'Y', 'Z', '_', + '\128'..'\255': + getSymbol(c, tok) + of '+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%', + '^', '&', '|', '`', '?': + getOperator(c, tok) + else: + add(tok.literal, c.buf[c.bufpos]) + inc(c.bufpos) + +proc errorStr(L: TSqlLexer, msg: string): string = + result = "$1($2, $3) Error: $4" % [L.filename, $getLine(L), $getColumn(L), msg] + + +# ----------------------------- parser ---------------------------------------- + +# Operator/Element Associativity Description +# . left table/column name separator +# :: left PostgreSQL-style typecast +# [ ] left array element selection +# - right unary minus +# ^ left exponentiation +# * / % left multiplication, division, modulo +# + - left addition, subtraction +# IS IS TRUE, IS FALSE, IS UNKNOWN, IS NULL +# ISNULL test for null +# NOTNULL test for not null +# (any other) left all other native and user-defined oprs +# IN set membership +# BETWEEN range containment +# OVERLAPS time interval overlap +# LIKE ILIKE SIMILAR string pattern matching +# < > less than, greater than +# = right equality, assignment +# NOT right logical negation +# AND left logical conjunction +# OR left logical disjunction + +type + TSqlNodeKind* = enum ## kind of SQL abstract syntax tree + nkNone, + nkIdent, + nkStringLit, + nkBitStringLit, + nkHexStringLit, + nkIntegerLit, + nkNumericLit, + nkPrimaryKey, + nkForeignKey, + nkNotNull, + + nkStmtList, + nkDot, + nkDotDot, + nkPrefix, + nkInfix, + nkCall, + nkColumnReference, + nkReferences, + nkDefault, + nkCheck, + nkConstraint, + nkUnique, + nkIdentity, + nkColumnDef, ## name, datatype, constraints + nkInsert, + nkUpdate, + nkDelete, + nkSelect, + nkSelectDistinct, + nkSelectColumns, + nkAsgn, + nkFrom, + nkGroup, + nkHaving, + nkOrder, + nkDesc, + nkUnion, + nkIntersect, + nkExcept, + nkColumnList, + nkValueList, + nkWhere, + nkCreateTable, + nkCreateTableIfNotExists, + nkCreateType, + nkCreateTypeIfNotExists, + nkCreateIndex, + nkCreateIndexIfNotExists, + nkEnumDef + +type + EInvalidSql* = object of EBase ## Invalid SQL encountered + PSqlNode* = ref TSqlNode ## an SQL abstract syntax tree node + TSqlNode* = object ## an SQL abstract syntax tree node + case kind*: TSqlNodeKind ## kind of syntax tree + of nkIdent, nkStringLit, nkBitStringLit, nkHexStringLit, + nkIntegerLit, nkNumericLit: + strVal*: string ## AST leaf: the identifier, numeric literal + ## string literal, etc. + else: + sons*: seq[PSqlNode] ## the node's children + + TSqlParser* = object of TSqlLexer ## SQL parser object + tok: TToken + +proc newNode(k: TSqlNodeKind): PSqlNode = + new(result) + result.kind = k + +proc newNode(k: TSqlNodeKind, s: string): PSqlNode = + new(result) + result.kind = k + result.strVal = s + +proc len*(n: PSqlNode): int = + if isNil(n.sons): result = 0 + else: result = n.sons.len + +proc add*(father, n: PSqlNode) = + if isNil(father.sons): father.sons = @[] + add(father.sons, n) + +proc getTok(p: var TSqlParser) = + getTok(p, p.tok) + +proc sqlError(p: TSqlParser, msg: string) = + var e: ref EInvalidSql + new(e) + e.msg = errorStr(p, msg) + raise e + +proc isKeyw(p: TSqlParser, keyw: string): bool = + result = p.tok.kind == tkIdentifier and + cmpIgnoreCase(p.tok.literal, keyw) == 0 + +proc isOpr(p: TSqlParser, opr: string): bool = + result = p.tok.kind == tkOperator and + cmpIgnoreCase(p.tok.literal, opr) == 0 + +proc optKeyw(p: var TSqlParser, keyw: string) = + if p.tok.kind == tkIdentifier and cmpIgnoreCase(p.tok.literal, keyw) == 0: + getTok(p) + +proc expectIdent(p: TSqlParser) = + if p.tok.kind != tkIdentifier and p.tok.kind != tkQuotedIdentifier: + sqlError(p, "identifier expected") + +proc expect(p: TSqlParser, kind: TTokKind) = + if p.tok.kind != kind: + sqlError(p, tokKindToStr[kind] & " expected") + +proc eat(p: var TSqlParser, kind: TTokKind) = + if p.tok.kind == kind: + getTok(p) + else: + sqlError(p, tokKindToStr[kind] & " expected") + +proc eat(p: var TSqlParser, keyw: string) = + if isKeyw(p, keyw): + getTok(p) + else: + sqlError(p, keyw.toUpper() & " expected") + +proc parseDataType(p: var TSqlParser): PSqlNode = + if isKeyw(p, "enum"): + result = newNode(nkEnumDef) + getTok(p) + if p.tok.kind == tkParLe: + getTok(p) + result.add(newNode(nkStringLit, p.tok.literal)) + getTok(p) + while p.tok.kind == tkComma: + getTok(p) + result.add(newNode(nkStringLit, p.tok.literal)) + getTok(p) + eat(p, tkParRi) + else: + expectIdent(p) + result = newNode(nkIdent, p.tok.literal) + getTok(p) + # ignore (12, 13) part: + if p.tok.kind == tkParLe: + getTok(p) + expect(p, tkInteger) + getTok(p) + while p.tok.kind == tkComma: + getTok(p) + expect(p, tkInteger) + getTok(p) + eat(p, tkParRi) + +proc getPrecedence(p: TSqlParser): int = + if isOpr(p, "*") or isOpr(p, "/") or isOpr(p, "%"): + result = 6 + elif isOpr(p, "+") or isOpr(p, "-"): + result = 5 + elif isOpr(p, "=") or isOpr(p, "<") or isOpr(p, ">") or isOpr(p, ">=") or + isOpr(p, "<=") or isOpr(p, "<>") or isOpr(p, "!=") or isKeyw(p, "is") or + isKeyw(p, "like"): + result = 3 + elif isKeyw(p, "and"): + result = 2 + elif isKeyw(p, "or"): + result = 1 + elif p.tok.kind == tkOperator: + # user-defined operator: + result = 0 + else: + result = - 1 + +proc parseExpr(p: var TSqlParser): PSqlNode + +proc identOrLiteral(p: var TSqlParser): PSqlNode = + case p.tok.kind + of tkIdentifier, tkQuotedIdentifier: + result = newNode(nkIdent, p.tok.literal) + getTok(p) + of tkStringConstant, tkEscapeConstant, tkDollarQuotedConstant: + result = newNode(nkStringLit, p.tok.literal) + getTok(p) + of tkBitStringConstant: + result = newNode(nkBitStringLit, p.tok.literal) + getTok(p) + of tkHexStringConstant: + result = newNode(nkHexStringLit, p.tok.literal) + getTok(p) + of tkInteger: + result = newNode(nkIntegerLit, p.tok.literal) + getTok(p) + of tkNumeric: + result = newNode(nkNumericLit, p.tok.literal) + getTok(p) + of tkParLe: + getTok(p) + result = parseExpr(p) + eat(p, tkParRi) + else: + sqlError(p, "expression expected") + getTok(p) # we must consume a token here to prevend endless loops! + +proc primary(p: var TSqlParser): PSqlNode = + if p.tok.kind == tkOperator or isKeyw(p, "not"): + result = newNode(nkPrefix) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + result.add(primary(p)) + return + result = identOrLiteral(p) + while true: + case p.tok.kind + of tkParLe: + var a = result + result = newNode(nkCall) + result.add(a) + getTok(p) + while true: + result.add(parseExpr(p)) + if p.tok.kind == tkComma: getTok(p) + else: break + eat(p, tkParRi) + of tkDot: + getTok(p) + var a = result + if p.tok.kind == tkDot: + getTok(p) + result = newNode(nkDotDot) + else: + result = newNode(nkDot) + result.add(a) + if isOpr(p, "*"): + result.add(newNode(nkIdent, "*")) + elif p.tok.kind in {tkIdentifier, tkQuotedIdentifier}: + result.add(newNode(nkIdent, p.tok.literal)) + else: + sqlError(p, "identifier expected") + getTok(p) + else: break + +proc lowestExprAux(p: var TSqlParser, v: var PSqlNode, limit: int): int = + var + v2, node, opNode: PSqlNode + v = primary(p) # expand while operators have priorities higher than 'limit' + var opPred = getPrecedence(p) + result = opPred + while opPred > limit: + node = newNode(nkInfix) + opNode = newNode(nkIdent, p.tok.literal) + getTok(p) + result = lowestExprAux(p, v2, opPred) + node.add(opNode) + node.add(v) + node.add(v2) + v = node + opPred = getPrecedence(p) + +proc parseExpr(p: var TSqlParser): PSqlNode = + discard lowestExprAux(p, result, - 1) + +proc parseTableName(p: var TSqlParser): PSqlNode = + expectIdent(p) + result = primary(p) + +proc parseColumnReference(p: var TSqlParser): PSqlNode = + result = parseTableName(p) + if p.tok.kind == tkParLe: + getTok(p) + var a = result + result = newNode(nkColumnReference) + result.add(a) + result.add(parseTableName(p)) + while p.tok.kind == tkComma: + getTok(p) + result.add(parseTableName(p)) + eat(p, tkParRi) + +proc parseCheck(p: var TSqlParser): PSqlNode = + getTok(p) + result = newNode(nkCheck) + result.add(parseExpr(p)) + +proc parseConstraint(p: var TSqlParser): PSqlNode = + getTok(p) + result = newNode(nkConstraint) + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + eat(p, "check") + result.add(parseExpr(p)) + +proc parseColumnConstraints(p: var TSqlParser, result: PSqlNode) = + while true: + if isKeyw(p, "default"): + getTok(p) + var n = newNode(nkDefault) + n.add(parseExpr(p)) + result.add(n) + elif isKeyw(p, "references"): + getTok(p) + var n = newNode(nkReferences) + n.add(parseColumnReference(p)) + result.add(n) + elif isKeyw(p, "not"): + getTok(p) + eat(p, "null") + result.add(newNode(nkNotNull)) + elif isKeyw(p, "identity"): + getTok(p) + result.add(newNode(nkIdentity)) + elif isKeyw(p, "primary"): + getTok(p) + eat(p, "key") + result.add(newNode(nkPrimaryKey)) + elif isKeyw(p, "check"): + result.add(parseCheck(p)) + elif isKeyw(p, "constraint"): + result.add(parseConstraint(p)) + elif isKeyw(p, "unique"): + result.add(newNode(nkUnique)) + else: + break + +proc parseColumnDef(p: var TSqlParser): PSqlNode = + expectIdent(p) + result = newNode(nkColumnDef) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + result.add(parseDataType(p)) + parseColumnConstraints(p, result) + +proc parseIfNotExists(p: var TSqlParser, k: TSqlNodeKind): PSqlNode = + getTok(p) + if isKeyw(p, "if"): + getTok(p) + eat(p, "not") + eat(p, "exists") + result = newNode(succ(k)) + else: + result = newNode(k) + +proc parseParIdentList(p: var TSqlParser, father: PSqlNode) = + eat(p, tkParLe) + while true: + expectIdent(p) + father.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + if p.tok.kind != tkComma: break + getTok(p) + eat(p, tkParRi) + +proc parseTableConstraint(p: var TSqlParser): PSqlNode = + if isKeyw(p, "primary"): + getTok(p) + eat(p, "key") + result = newNode(nkPrimaryKey) + parseParIdentList(p, result) + elif isKeyw(p, "foreign"): + getTok(p) + eat(p, "key") + result = newNode(nkForeignKey) + parseParIdentList(p, result) + eat(p, "references") + var m = newNode(nkReferences) + m.add(parseColumnReference(p)) + result.add(m) + elif isKeyw(p, "unique"): + getTok(p) + eat(p, "key") + result = newNode(nkUnique) + parseParIdentList(p, result) + elif isKeyw(p, "check"): + result = parseCheck(p) + elif isKeyw(p, "constraint"): + result = parseConstraint(p) + else: + sqlError(p, "column definition expected") + +proc parseTableDef(p: var TSqlParser): PSqlNode = + result = parseIfNotExists(p, nkCreateTable) + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + if p.tok.kind == tkParLe: + while true: + getTok(p) + if p.tok.kind == tkIdentifier or p.tok.kind == tkQuotedIdentifier: + result.add(parseColumnDef(p)) + else: + result.add(parseTableConstraint(p)) + if p.tok.kind != tkComma: break + eat(p, tkParRi) + +proc parseTypeDef(p: var TSqlParser): PSqlNode = + result = parseIfNotExists(p, nkCreateType) + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + eat(p, "as") + result.add(parseDataType(p)) + +proc parseWhere(p: var TSqlParser): PSqlNode = + getTok(p) + result = newNode(nkWhere) + result.add(parseExpr(p)) + +proc parseIndexDef(p: var TSqlParser): PSqlNode = + result = parseIfNotExists(p, nkCreateIndex) + if isKeyw(p, "primary"): + getTok(p) + eat(p, "key") + result.add(newNode(nkPrimaryKey)) + else: + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + eat(p, "on") + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + eat(p, tkParLe) + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + while p.tok.kind == tkComma: + getTok(p) + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + eat(p, tkParRi) + +proc parseInsert(p: var TSqlParser): PSqlNode = + getTok(p) + eat(p, "into") + expectIdent(p) + result = newNode(nkInsert) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + if p.tok.kind == tkParLe: + var n = newNode(nkColumnList) + parseParIdentList(p, n) + else: + result.add(nil) + if isKeyw(p, "default"): + getTok(p) + eat(p, "values") + result.add(newNode(nkDefault)) + else: + eat(p, "values") + eat(p, tkParLe) + var n = newNode(nkValueList) + while true: + n.add(parseExpr(p)) + if p.tok.kind != tkComma: break + getTok(p) + result.add(n) + eat(p, tkParRi) + +proc parseUpdate(p: var TSqlParser): PSqlNode = + getTok(p) + result = newNode(nkUpdate) + result.add(primary(p)) + eat(p, "set") + while true: + var a = newNode(nkAsgn) + expectIdent(p) + a.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + if isOpr(p, "="): getTok(p) + else: sqlError(p, "= expected") + a.add(parseExpr(p)) + result.add(a) + if p.tok.kind != tkComma: break + getTok(p) + if isKeyw(p, "where"): + result.add(parseWhere(p)) + else: + result.add(nil) + +proc parseDelete(p: var TSqlParser): PSqlNode = + getTok(p) + result = newNode(nkDelete) + eat(p, "from") + result.add(primary(p)) + if isKeyw(p, "where"): + result.add(parseWhere(p)) + else: + result.add(nil) + +proc parseSelect(p: var TSqlParser): PSqlNode = + getTok(p) + if isKeyw(p, "distinct"): + getTok(p) + result = newNode(nkSelectDistinct) + elif isKeyw(p, "all"): + getTok(p) + result = newNode(nkSelect) + var a = newNode(nkSelectColumns) + while true: + if isOpr(p, "*"): + a.add(newNode(nkIdent, "*")) + getTok(p) + else: + a.add(parseExpr(p)) + if p.tok.kind != tkComma: break + getTok(p) + result.add(a) + if isKeyw(p, "from"): + var f = newNode(nkFrom) + while true: + getTok(p) + f.add(parseExpr(p)) + if p.tok.kind != tkComma: break + result.add(f) + if isKeyw(p, "where"): + result.add(parseWhere(p)) + if isKeyw(p, "group"): + getTok(p) + eat(p, "by") + var g = newNode(nkGroup) + while true: + g.add(parseExpr(p)) + if p.tok.kind != tkComma: break + getTok(p) + result.add(g) + if isKeyw(p, "having"): + var h = newNode(nkHaving) + while true: + getTok(p) + h.add(parseExpr(p)) + if p.tok.kind != tkComma: break + result.add(h) + if isKeyw(p, "union"): + result.add(newNode(nkUnion)) + getTok(p) + elif isKeyw(p, "intersect"): + result.add(newNode(nkIntersect)) + getTok(p) + elif isKeyw(p, "except"): + result.add(newNode(nkExcept)) + getTok(p) + if isKeyw(p, "order"): + getTok(p) + eat(p, "by") + var n = newNode(nkOrder) + while true: + var e = parseExpr(p) + if isKeyw(p, "asc"): getTok(p) # is default + elif isKeyw(p, "desc"): + getTok(p) + var x = newNode(nkDesc) + x.add(e) + e = x + n.add(e) + if p.tok.kind != tkComma: break + getTok(p) + result.add(n) + +proc parseStmt(p: var TSqlParser): PSqlNode = + if isKeyw(p, "create"): + getTok(p) + optKeyw(p, "cached") + optKeyw(p, "memory") + optKeyw(p, "temp") + optKeyw(p, "global") + optKeyw(p, "local") + optKeyw(p, "temporary") + optKeyw(p, "unique") + optKeyw(p, "hash") + if isKeyw(p, "table"): + result = parseTableDef(p) + elif isKeyw(p, "type"): + result = parseTypeDef(p) + elif isKeyw(p, "index"): + result = parseIndexDef(p) + else: + sqlError(p, "TABLE expected") + elif isKeyw(p, "insert"): + result = parseInsert(p) + elif isKeyw(p, "update"): + result = parseUpdate(p) + elif isKeyw(p, "delete"): + result = parseDelete(p) + elif isKeyw(p, "select"): + result = parseSelect(p) + else: + sqlError(p, "CREATE expected") + +proc open(p: var TSqlParser, input: PStream, filename: string) = + ## opens the parser `p` and assigns the input stream `input` to it. + ## `filename` is only used for error messages. + open(TSqlLexer(p), input, filename) + p.tok.kind = tkInvalid + p.tok.literal = "" + getTok(p) + +proc parse(p: var TSqlParser): PSqlNode = + ## parses the content of `p`'s input stream and returns the SQL AST. + ## Syntax errors raise an `EInvalidSql` exception. + result = newNode(nkStmtList) + while p.tok.kind != tkEof: + var s = parseStmt(p) + eat(p, tkSemiColon) + result.add(s) + if result.len == 1: + result = result.sons[0] + +proc close(p: var TSqlParser) = + ## closes the parser `p`. The associated input stream is closed too. + close(TSqlLexer(p)) + +proc parseSQL*(input: PStream, filename: string): PSqlNode = + ## parses the SQL from `input` into an AST and returns the AST. + ## `filename` is only used for error messages. + ## Syntax errors raise an `EInvalidSql` exception. + var p: TSqlParser + open(p, input, filename) + try: + result = parse(p) + finally: + close(p) + +proc ra(n: PSqlNode, s: var string, indent: int) + +proc rs(n: PSqlNode, s: var string, indent: int, + prefix = "(", suffix = ")", + sep = ", ") = + if n.len > 0: + s.add(prefix) + for i in 0 .. n.len-1: + if i > 0: s.add(sep) + ra(n.sons[i], s, indent) + s.add(suffix) + +proc ra(n: PSqlNode, s: var string, indent: int) = + if n == nil: return + case n.kind + of nkNone: nil + of nkIdent: + if allCharsInSet(n.strVal, {'\33'..'\127'}): + s.add(n.strVal) + else: + s.add("\"" & replace(n.strVal, "\"", "\"\"") & "\"") + of nkStringLit: + s.add(escape(n.strVal, "e'", "'")) + of nkBitStringLit: + s.add("b'" & n.strVal & "'") + of nkHexStringLit: + s.add("x'" & n.strVal & "'") + of nkIntegerLit, nkNumericLit: + s.add(n.strVal) + of nkPrimaryKey: + s.add(" primary key") + rs(n, s, indent) + of nkForeignKey: + s.add(" foreign key") + rs(n, s, indent) + of nkNotNull: + s.add(" not null") + of nkDot: + ra(n.sons[0], s, indent) + s.add(".") + ra(n.sons[1], s, indent) + of nkDotDot: + ra(n.sons[0], s, indent) + s.add(". .") + ra(n.sons[1], s, indent) + of nkPrefix: + s.add('(') + ra(n.sons[0], s, indent) + s.add(' ') + ra(n.sons[1], s, indent) + s.add(')') + of nkInfix: + s.add('(') + ra(n.sons[1], s, indent) + s.add(' ') + ra(n.sons[0], s, indent) + s.add(' ') + ra(n.sons[2], s, indent) + s.add(')') + of nkCall, nkColumnReference: + ra(n.sons[0], s, indent) + s.add('(') + for i in 1..n.len-1: + if i > 1: s.add(", ") + ra(n.sons[i], s, indent) + s.add(')') + of nkReferences: + s.add(" references ") + ra(n.sons[0], s, indent) + of nkDefault: + s.add(" default ") + ra(n.sons[0], s, indent) + of nkCheck: + s.add(" check ") + ra(n.sons[0], s, indent) + of nkConstraint: + s.add(" constraint ") + ra(n.sons[0], s, indent) + s.add(" check ") + ra(n.sons[1], s, indent) + of nkUnique: + s.add(" unique") + rs(n, s, indent) + of nkIdentity: + s.add(" identity") + of nkColumnDef: + s.add("\n ") + rs(n, s, indent, "", "", " ") + of nkStmtList: + for i in 0..n.len-1: + ra(n.sons[i], s, indent) + s.add("\n") + of nkInsert: + assert n.len == 3 + s.add("insert into ") + ra(n.sons[0], s, indent) + ra(n.sons[1], s, indent) + if n.sons[2].kind == nkDefault: + s.add("default values") + else: + s.add("\nvalues ") + ra(n.sons[2], s, indent) + s.add(';') + of nkUpdate: + s.add("update ") + ra(n.sons[0], s, indent) + s.add(" set ") + var L = n.len + for i in 1 .. L-2: + if i > 1: s.add(", ") + var it = n.sons[i] + assert it.kind == nkAsgn + ra(it, s, indent) + ra(n.sons[L-1], s, indent) + s.add(';') + of nkDelete: + s.add("delete from ") + ra(n.sons[0], s, indent) + ra(n.sons[1], s, indent) + s.add(';') + of nkSelect, nkSelectDistinct: + s.add("select ") + if n.kind == nkSelectDistinct: + s.add("distinct ") + rs(n.sons[0], s, indent, "", "", ", ") + for i in 1 .. n.len-1: ra(n.sons[i], s, indent) + s.add(';') + of nkSelectColumns: + assert(false) + of nkAsgn: + ra(n.sons[0], s, indent) + s.add(" = ") + ra(n.sons[1], s, indent) + of nkFrom: + s.add("\nfrom ") + rs(n, s, indent, "", "", ", ") + of nkGroup: + s.add("\ngroup by") + rs(n, s, indent, "", "", ", ") + of nkHaving: + s.add("\nhaving") + rs(n, s, indent, "", "", ", ") + of nkOrder: + s.add("\norder by ") + rs(n, s, indent, "", "", ", ") + of nkDesc: + ra(n.sons[0], s, indent) + s.add(" desc") + of nkUnion: + s.add(" union") + of nkIntersect: + s.add(" intersect") + of nkExcept: + s.add(" except") + of nkColumnList: + rs(n, s, indent) + of nkValueList: + s.add("values ") + rs(n, s, indent) + of nkWhere: + s.add("\nwhere ") + ra(n.sons[0], s, indent) + of nkCreateTable, nkCreateTableIfNotExists: + s.add("create table ") + if n.kind == nkCreateTableIfNotExists: + s.add("if not exists ") + ra(n.sons[0], s, indent) + s.add('(') + for i in 1..n.len-1: + if i > 1: s.add(", ") + ra(n.sons[i], s, indent) + s.add(");") + of nkCreateType, nkCreateTypeIfNotExists: + s.add("create type ") + if n.kind == nkCreateTypeIfNotExists: + s.add("if not exists ") + ra(n.sons[0], s, indent) + s.add(" as ") + ra(n.sons[1], s, indent) + s.add(';') + of nkCreateIndex, nkCreateIndexIfNotExists: + s.add("create index ") + if n.kind == nkCreateIndexIfNotExists: + s.add("if not exists ") + ra(n.sons[0], s, indent) + s.add(" on ") + ra(n.sons[1], s, indent) + s.add('(') + for i in 2..n.len-1: + if i > 2: s.add(", ") + ra(n.sons[i], s, indent) + s.add(");") + of nkEnumDef: + s.add("enum ") + rs(n, s, indent) + +# What I want: +# +#select(columns = [T1.all, T2.name], +# fromm = [T1, T2], +# where = T1.name ==. T2.name, +# orderby = [name]): +# +#for row in dbQuery(db, """select x, y, z +# from a, b +# where a.name = b.name"""): +# + +#select x, y, z: +# fromm: Table1, Table2 +# where: x.name == y.name +#db.select(fromm = [t1, t2], where = t1.name == t2.name): +#for x, y, z in db.select(fromm = a, b where = a.name == b.name): +# writeln x, y, z + +proc renderSQL*(n: PSqlNode): string = + ## Converts an SQL abstract syntax tree to its string representation. + result = "" + ra(n, result, 0) + +when isMainModule: + echo(renderSQL(parseSQL(newStringStream(""" + CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic'); + CREATE TABLE holidays ( + num_weeks int, + happiness happiness + ); + CREATE INDEX table1_attr1 ON table1(attr1); + + SELECT * FROM myTab WHERE col1 = 'happy'; + """), "stdin"))) + +# CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic'); +# CREATE TABLE holidays ( +# num_weeks int, +# happiness happiness +# ); +# CREATE INDEX table1_attr1 ON table1(attr1) diff --git a/nimlib/pure/parsexml.nim b/nimlib/pure/parsexml.nim new file mode 100755 index 000000000..54f62a9a4 --- /dev/null +++ b/nimlib/pure/parsexml.nim @@ -0,0 +1,635 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a simple high performance `XML`:idx: / `HTML`:idx: +## parser. +## The only encoding that is supported is UTF-8. The parser has been designed +## to be somewhat error correcting, so that even most "wild HTML" found on the +## web can be parsed with it. **Note:** This parser does not check that each +## ``<tag>`` has a corresponding ``</tag>``! These checks have do be +## implemented by the client code for various reasons: +## +## * Old HTML contains tags that have no end tag: ``<br>`` for example. +## * HTML tags are case insensitive, XML tags are case sensitive. Since this +## library can parse both, only the client knows which comparison is to be +## used. +## * Thus the checks would have been very difficult to implement properly with +## little benefit, especially since they are simple to implement in the +## client. The client should use the `errorMsgExpected` proc to generate +## a nice error message that fits the other error messages this library +## creates. +## +## +## Example 1: Retrieve HTML title +## ============================== +## +## The file ``examples/htmltitle.nim`` demonstrates how to use the +## XML parser to accomplish a simple task: To determine the title of an HTML +## document. +## +## .. code-block:: nimrod +## :file: examples/htmltitle.nim +## +## +## Example 2: Retrieve all HTML links +## ================================== +## +## The file ``examples/htmlrefs.nim`` demonstrates how to use the +## XML parser to accomplish another simple task: To determine all the links +## an HTML document contains. +## +## .. code-block:: nimrod +## :file: examples/htmlrefs.nim +## + +import + hashes, strutils, lexbase, streams, unicode + +# the parser treats ``<br />`` as ``<br></br>`` + +type + TXmlEventKind* = enum ## enumation of all events that may occur when parsing + xmlError, ## an error ocurred during parsing + xmlEof, ## end of file reached + xmlCharData, ## character data + xmlWhitespace, ## whitespace has been parsed + xmlComment, ## a comment has been parsed + xmlPI, ## processing instruction (``<?name something ?>``) + xmlElementStart, ## ``<elem>`` + xmlElementEnd, ## ``</elem>`` + xmlElementOpen, ## ``<elem + xmlAttribute, ## ``key = "value"`` pair + xmlElementClose, ## ``>`` + xmlCData, ## ``<![CDATA[`` ... data ... ``]]>`` + xmlEntity, ## &entity; + xmlSpecial ## ``<! ... data ... >`` + + TXmlError* = enum ## enumeration that lists all errors that can occur + errNone, ## no error + errEndOfCDataExpected, ## ``]]>`` expected + errNameExpected, ## name expected + errSemicolonExpected, ## ``;`` expected + errQmGtExpected, ## ``?>`` expected + errGtExpected, ## ``>`` expected + errEqExpected, ## ``=`` expected + errQuoteExpected, ## ``"`` or ``'`` expected + errEndOfCommentExpected ## ``-->`` expected + + TParserState = enum + stateStart, stateNormal, stateAttr, stateEmptyElementTag, stateError + + TXmlParseOption* = enum ## options for the XML parser + reportWhitespace, ## report whitespace + reportComments ## report comments + + TXmlParser* = object of TBaseLexer ## the parser object. + a, b: string + kind: TXmlEventKind + err: TXmlError + state: TParserState + filename: string + options: set[TXmlParseOption] + +const + errorMessages: array [TXmlError, string] = [ + "no error", + "']]>' expected", + "name expected", + "';' expected", + "'?>' expected", + "'>' expected", + "'=' expected", + "'\"' or \"'\" expected", + "'-->' expected" + ] + +proc open*(my: var TXmlParser, input: PStream, filename: string, + options: set[TXmlParseOption] = {}) = + ## initializes the parser with an input stream. `Filename` is only used + ## for nice error messages. The parser's behaviour can be controlled by + ## the `options` parameter: If `options` contains ``reportWhitespace`` + ## a whitespace token is reported as an ``xmlWhitespace`` event. + ## If `options` contains ``reportComments`` a comment token is reported as an + ## ``xmlComment`` event. + lexbase.open(my, input) + my.filename = filename + my.state = stateStart + my.kind = xmlError + my.a = "" + my.b = "" + my.options = options + +proc close*(my: var TXmlParser) {.inline.} = + ## closes the parser `my` and its associated input stream. + lexbase.close(my) + +proc charData*(my: TXmlParser): string {.inline.} = + ## returns the character data for the events: ``xmlCharData``, + ## ``xmlWhitespace``, ``xmlComment``, ``xmlCData``, ``xmlSpecial`` + assert(my.kind in {xmlCharData, xmlWhitespace, xmlComment, xmlCData, + xmlSpecial}) + return my.a + +proc kind*(my: TXmlParser): TXmlEventKind {.inline.} = + ## returns the current event type for the XML parser + return my.kind + +proc elementName*(my: TXmlParser): string {.inline.} = + ## returns the element name for the events: ``xmlElementStart``, + ## ``xmlElementEnd``, ``xmlElementOpen`` + assert(my.kind in {xmlElementStart, xmlElementEnd, xmlElementOpen}) + return my.a + +proc entityName*(my: TXmlParser): string {.inline.} = + ## returns the entity name for the event: ``xmlEntity`` + assert(my.kind == xmlEntity) + return my.a + +proc attrKey*(my: TXmlParser): string {.inline.} = + ## returns the attribute key for the event ``xmlAttribute`` + assert(my.kind == xmlAttribute) + return my.a + +proc attrValue*(my: TXmlParser): string {.inline.} = + ## returns the attribute value for the event ``xmlAttribute`` + assert(my.kind == xmlAttribute) + return my.b + +proc PIName*(my: TXmlParser): string {.inline.} = + ## returns the processing instruction name for the event ``xmlPI`` + assert(my.kind == xmlPI) + return my.a + +proc PIRest*(my: TXmlParser): string {.inline.} = + ## returns the rest of the processing instruction for the event ``xmlPI`` + assert(my.kind == xmlPI) + return my.b + +proc getColumn*(my: TXmlParser): int {.inline.} = + ## get the current column the parser has arrived at. + result = getColNumber(my, my.bufPos) + +proc getLine*(my: TXmlParser): int {.inline.} = + ## get the current line the parser has arrived at. + result = my.linenumber + +proc getFilename*(my: TXmlParser): string {.inline.} = + ## get the filename of the file that the parser processes. + result = my.filename + +proc errorMsg*(my: TXmlParser): string = + ## returns a helpful error message for the event ``xmlError`` + assert(my.kind == xmlError) + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]] + +proc errorMsgExpected*(my: TXmlParser, tag: string): string = + ## returns an error message "<tag> expected" in the same format as the + ## other error messages + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), "<$1> expected" % tag] + +proc markError(my: var TXmlParser, kind: TXmlError) {.inline.} = + my.err = kind + my.state = stateError + +proc parseCDATA(my: var TXMLParser) = + var pos = my.bufpos + len("<![CDATA[") + var buf = my.buf + while true: + case buf[pos] + of ']': + if buf[pos+1] == ']' and buf[pos+2] == '>': + inc(pos, 3) + break + add(my.a, ']') + inc(pos) + of '\0': + markError(my, errEndOfCDataExpected) + break + of '\c': + pos = lexbase.HandleCR(my, pos) + buf = my.buf + add(my.a, '\L') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + add(my.a, '\L') + else: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos # store back + my.kind = xmlCDATA + +proc parseComment(my: var TXMLParser) = + var pos = my.bufpos + len("<!--") + var buf = my.buf + while true: + case buf[pos] + of '-': + if buf[pos+1] == '-' and buf[pos+2] == '>': + inc(pos, 3) + break + if my.options.contains(reportComments): add(my.a, '-') + inc(pos) + of '\0': + markError(my, errEndOfCommentExpected) + break + of '\c': + pos = lexbase.HandleCR(my, pos) + buf = my.buf + if my.options.contains(reportComments): add(my.a, '\L') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + if my.options.contains(reportComments): add(my.a, '\L') + else: + if my.options.contains(reportComments): add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos + my.kind = xmlComment + +proc parseWhitespace(my: var TXmlParser, skip=False) = + var pos = my.bufpos + var buf = my.buf + while true: + case buf[pos] + of ' ', '\t': + if not skip: add(my.a, buf[pos]) + Inc(pos) + of '\c': + # the specification says that CR-LF, CR are to be transformed to LF + pos = lexbase.HandleCR(my, pos) + buf = my.buf + if not skip: add(my.a, '\L') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + if not skip: add(my.a, '\L') + else: + break + my.bufpos = pos + +const + NameStartChar = {'A'..'Z', 'a'..'z', '_', ':', '\128'..'\255'} + NameChar = {'A'..'Z', 'a'..'z', '0'..'9', '.', '-', '_', ':', '\128'..'\255'} + +proc parseName(my: var TXmlParser, dest: var string) = + var pos = my.bufpos + var buf = my.buf + if buf[pos] in nameStartChar: + while true: + add(dest, buf[pos]) + inc(pos) + if buf[pos] notin NameChar: break + my.bufpos = pos + else: + markError(my, errNameExpected) + +proc parseEntity(my: var TXmlParser, dest: var string) = + var pos = my.bufpos+1 + var buf = my.buf + my.kind = xmlCharData + if buf[pos] == '#': + var r: int + inc(pos) + if buf[pos] == 'x': + inc(pos) + while true: + case buf[pos] + of '0'..'9': r = (r shl 4) or (ord(buf[pos]) - ord('0')) + of 'a'..'f': r = (r shl 4) or (ord(buf[pos]) - ord('a') + 10) + of 'A'..'F': r = (r shl 4) or (ord(buf[pos]) - ord('A') + 10) + else: break + inc(pos) + else: + while buf[pos] in {'0'..'9'}: + r = r * 10 + (ord(buf[pos]) - ord('0')) + inc(pos) + add(dest, toUTF8(TRune(r))) + elif buf[pos] == 'l' and buf[pos+1] == 't': + add(dest, '<') + inc(pos, 2) + elif buf[pos] == 'g' and buf[pos+1] == 't': + add(dest, '>') + inc(pos, 2) + elif buf[pos] == 'a' and buf[pos+1] == 'm' and buf[pos+2] == 'p': + add(dest, '&') + inc(pos, 3) + elif buf[pos] == 'a' and buf[pos+1] == 'p' and buf[pos+2] == 'o' and + buf[pos+3] == 's': + add(dest, '\'') + inc(pos, 4) + elif buf[pos] == 'q' and buf[pos+1] == 'u' and buf[pos+2] == 'o' and + buf[pos+3] == 't': + add(dest, '"') + inc(pos, 4) + else: + my.bufpos = pos + parseName(my, dest) + pos = my.bufpos + if my.err != errNameExpected: + my.kind = xmlEntity + else: + add(dest, '&') + if buf[pos] == ';': + inc(pos) + else: + markError(my, errSemiColonExpected) + my.bufpos = pos + +proc parsePI(my: var TXmlParser) = + inc(my.bufpos, "<?".len) + parseName(my, my.a) + var pos = my.bufpos + var buf = my.buf + setLen(my.b, 0) + while true: + case buf[pos] + of '\0': + markError(my, errQmGtExpected) + break + of '?': + if buf[pos+1] == '>': + inc(pos, 2) + break + add(my.b, '?') + inc(pos) + of '\c': + # the specification says that CR-LF, CR are to be transformed to LF + pos = lexbase.HandleCR(my, pos) + buf = my.buf + add(my.b, '\L') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + add(my.b, '\L') + else: + add(my.b, buf[pos]) + inc(pos) + my.bufpos = pos + my.kind = xmlPI + +proc parseSpecial(my: var TXmlParser) = + # things that start with <! + var pos = my.bufpos + 2 + var buf = my.buf + var opentags = 0 + while true: + case buf[pos] + of '\0': + markError(my, errGtExpected) + break + of '<': + inc(opentags) + inc(pos) + add(my.a, '<') + of '>': + if opentags <= 0: + inc(pos) + break + dec(opentags) + inc(pos) + add(my.a, '>') + of '\c': + pos = lexbase.HandleCR(my, pos) + buf = my.buf + add(my.a, '\L') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + add(my.a, '\L') + else: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos + my.kind = xmlSpecial + +proc parseTag(my: var TXmlParser) = + inc(my.bufpos) + parseName(my, my.a) + # if we have no name, do not interpret the '<': + if my.a.len == 0: + my.kind = xmlCharData + add(my.a, '<') + return + parseWhitespace(my, skip=True) + if my.buf[my.bufpos] in NameStartChar: + # an attribute follows: + my.kind = xmlElementOpen + my.state = stateAttr + else: + my.kind = xmlElementStart + if my.buf[my.bufpos] == '/' and my.buf[my.bufpos+1] == '>': + inc(my.bufpos, 2) + my.state = stateEmptyElementTag + elif my.buf[my.bufpos] == '>': + inc(my.bufpos) + else: + markError(my, errGtExpected) + +proc parseEndTag(my: var TXmlParser) = + inc(my.bufpos, 2) + parseName(my, my.a) + parseWhitespace(my, skip=True) + if my.buf[my.bufpos] == '>': + inc(my.bufpos) + else: + markError(my, errGtExpected) + my.kind = xmlElementEnd + +proc parseAttribute(my: var TXmlParser) = + my.kind = xmlAttribute + setLen(my.a, 0) + setLen(my.b, 0) + parseName(my, my.a) + # if we have no name, we have '<tag attr= key %&$$%': + if my.a.len == 0: + markError(my, errGtExpected) + return + parseWhitespace(my, skip=True) + if my.buf[my.bufpos] != '=': + markError(my, errEqExpected) + return + inc(my.bufpos) + parseWhitespace(my, skip=True) + + var pos = my.bufpos + var buf = my.buf + if buf[pos] in {'\'', '"'}: + var quote = buf[pos] + var pendingSpace = false + inc(pos) + while true: + case buf[pos] + of '\0': + markError(my, errQuoteExpected) + break + of '&': + if pendingSpace: + add(my.b, ' ') + pendingSpace = false + my.bufpos = pos + parseEntity(my, my.b) + my.kind = xmlAttribute # parseEntity overwrites my.kind! + pos = my.bufpos + of ' ', '\t': + pendingSpace = true + inc(pos) + of '\c': + pos = lexbase.HandleCR(my, pos) + buf = my.buf + pendingSpace = true + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + pendingSpace = true + else: + if buf[pos] == quote: + inc(pos) + break + else: + if pendingSpace: + add(my.b, ' ') + pendingSpace = false + add(my.b, buf[pos]) + inc(pos) + else: + markError(my, errQuoteExpected) + my.bufpos = pos + parseWhitespace(my, skip=True) + +proc parseCharData(my: var TXmlParser) = + var pos = my.bufpos + var buf = my.buf + while true: + case buf[pos] + of '\0', '<', '&': break + of '\c': + # the specification says that CR-LF, CR are to be transformed to LF + pos = lexbase.HandleCR(my, pos) + buf = my.buf + add(my.a, '\L') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + add(my.a, '\L') + else: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos + my.kind = xmlCharData + +proc rawGetTok(my: var TXmlParser) = + my.kind = xmlError + setLen(my.a, 0) + var pos = my.bufpos + var buf = my.buf + case buf[pos] + of '<': + case buf[pos+1] + of '/': + parseEndTag(my) + of '!': + if buf[pos+2] == '[' and buf[pos+3] == 'C' and buf[pos+4] == 'D' and + buf[pos+5] == 'A' and buf[pos+6] == 'T' and buf[pos+7] == 'A' and + buf[pos+8] == '[': + parseCDATA(my) + elif buf[pos+2] == '-' and buf[pos+3] == '-': + parseComment(my) + else: + parseSpecial(my) + of '?': + parsePI(my) + else: + parseTag(my) + of ' ', '\t', '\c', '\l': + parseWhiteSpace(my) + my.kind = xmlWhitespace + of '\0': + my.kind = xmlEof + of '&': + parseEntity(my, my.a) + else: + parseCharData(my) + assert my.kind != xmlError + +proc getTok(my: var TXmlParser) = + while true: + rawGetTok(my) + case my.kind + of xmlComment: + if my.options.contains(reportComments): break + of xmlWhitespace: + if my.options.contains(reportWhitespace): break + else: break + +proc next*(my: var TXmlParser) = + ## retrieves the first/next event. This controls the parser. + case my.state + of stateNormal: + getTok(my) + of stateStart: + getTok(my) + if my.kind == xmlPI and my.a == "xml": + # just skip the first ``<?xml >`` processing instruction + getTok(my) + my.state = stateNormal + of stateAttr: + # parse an attribute key-value pair: + if my.buf[my.bufpos] == '>': + my.kind = xmlElementClose + inc(my.bufpos) + my.state = stateNormal + elif my.buf[my.bufpos] == '/' and my.buf[my.bufpos+1] == '>': + my.kind = xmlElementClose + inc(my.bufpos, 2) + my.state = stateEmptyElementTag + else: + parseAttribute(my) + # state remains the same + of stateEmptyElementTag: + my.state = stateNormal + my.kind = xmlElementEnd + of stateError: + my.kind = xmlError + my.state = stateNormal + +when isMainModule: + import os + var s = newFileStream(ParamStr(1), fmRead) + if s == nil: quit("cannot open the file" & ParamStr(1)) + var x: TXmlParser + open(x, s, ParamStr(1)) + while true: + next(x) + case x.kind + of xmlError: Echo(x.errorMsg()) + of xmlEof: break + of xmlCharData: echo(x.charData) + of xmlWhitespace: echo("|$1|" % x.charData) + of xmlComment: echo("<!-- $1 -->" % x.charData) + of xmlPI: echo("<? $1 ## $2 ?>" % [x.PIName, x.PIRest]) + of xmlElementStart: echo("<$1>" % x.elementName) + of xmlElementEnd: echo("</$1>" % x.elementName) + + of xmlElementOpen: echo("<$1" % x.elementName) + of xmlAttribute: + echo("Key: " & x.attrKey) + echo("Value: " & x.attrValue) + + of xmlElementClose: echo(">") + of xmlCData: + echo("<![CDATA[$1]]>" % x.charData) + of xmlEntity: + echo("&$1;" % x.entityName) + of xmlSpecial: + echo("SPECIAL: " & x.charData) + close(x) + diff --git a/nimlib/pure/pegs.nim b/nimlib/pure/pegs.nim new file mode 100755 index 000000000..488e42c7d --- /dev/null +++ b/nimlib/pure/pegs.nim @@ -0,0 +1,1365 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Simple PEG (Parsing expression grammar) matching. Uses no memorization, but +## uses superoperators and symbol inlining to improve performance. Note: +## Matching performance is hopefully competitive with optimized regular +## expression engines. +## +## .. include:: ../doc/pegdocs.txt +## + +const + useUnicode = true ## change this to deactivate proper UTF-8 support + +import + strutils + +when useUnicode: + import unicode + +const + InlineThreshold = 5 ## number of leaves; -1 to disable inlining + +type + TPegKind = enum + pkEmpty, + pkAny, ## any character (.) + pkAnyRune, ## any Unicode character (_) + pkNewLine, ## CR-LF, LF, CR + pkTerminal, + pkTerminalIgnoreCase, + pkTerminalIgnoreStyle, + pkChar, ## single character to match + pkCharChoice, + pkNonTerminal, + pkSequence, ## a b c ... --> Internal DSL: peg(a, b, c) + pkOrderedChoice, ## a / b / ... --> Internal DSL: a / b or /[a, b, c] + pkGreedyRep, ## a* --> Internal DSL: *a + ## a+ --> (a a*) + pkGreedyRepChar, ## x* where x is a single character (superop) + pkGreedyRepSet, ## [set]* (superop) + pkGreedyAny, ## .* or _* (superop) + pkOption, ## a? --> Internal DSL: ?a + pkAndPredicate, ## &a --> Internal DSL: &a + pkNotPredicate, ## !a --> Internal DSL: !a + pkCapture, ## {a} --> Internal DSL: capture(a) + pkSearch, ## @a --> Internal DSL: @a + pkRule, ## a <- b + pkList ## a, b + TNonTerminalFlag = enum + ntDeclared, ntUsed + TNonTerminal {.final.} = object ## represents a non terminal symbol + name: string ## the name of the symbol + line: int ## the line the symbol has been declared/used in + col: int ## the column the symbol has been declared/used in + flags: set[TNonTerminalFlag] ## the nonterminal's flags + rule: TNode ## the rule that the symbol refers to + TNode {.final.} = object + case kind: TPegKind + of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine: nil + of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle: term: string + of pkChar, pkGreedyRepChar: ch: char + of pkCharChoice, pkGreedyRepSet: charChoice: ref set[char] + of pkNonTerminal: nt: PNonTerminal + else: sons: seq[TNode] + PNonTerminal* = ref TNonTerminal + + TPeg* = TNode ## type that represents a PEG + +proc term*(t: string): TPeg = + ## constructs a PEG from a terminal string + if t.len != 1: + result.kind = pkTerminal + result.term = t + else: + result.kind = pkChar + result.ch = t[0] + +proc termIgnoreCase*(t: string): TPeg = + ## constructs a PEG from a terminal string; ignore case for matching + result.kind = pkTerminalIgnoreCase + result.term = t + +proc termIgnoreStyle*(t: string): TPeg = + ## constructs a PEG from a terminal string; ignore style for matching + result.kind = pkTerminalIgnoreStyle + result.term = t + +proc term*(t: char): TPeg = + ## constructs a PEG from a terminal char + assert t != '\0' + result.kind = pkChar + result.ch = t + +proc charSet*(s: set[char]): TPeg = + ## constructs a PEG from a character set `s` + assert '\0' notin s + result.kind = pkCharChoice + new(result.charChoice) + result.charChoice^ = s + +proc len(a: TPeg): int {.inline.} = return a.sons.len +proc add(d: var TPeg, s: TPeg) {.inline.} = add(d.sons, s) + +proc addChoice(dest: var TPeg, elem: TPeg) = + var L = dest.len-1 + if L >= 0 and dest.sons[L].kind == pkCharChoice: + case elem.kind + of pkCharChoice: + dest.sons[L].charChoice^ = dest.sons[L].charChoice^ + elem.charChoice^ + of pkChar: incl(dest.sons[L].charChoice^, elem.ch) + else: add(dest, elem) + else: add(dest, elem) + +template multipleOp(k: TPegKind, localOpt: expr) = + result.kind = k + result.sons = @[] + for x in items(a): + if x.kind == k: + for y in items(x.sons): + localOpt(result, y) + else: + localOpt(result, x) + if result.len == 1: + result = result.sons[0] + +proc `/`*(a: openArray[TPeg]): TPeg = + ## constructs an ordered choice with the PEGs in `a` + multipleOp(pkOrderedChoice, addChoice) + +proc addSequence(dest: var TPeg, elem: TPeg) = + var L = dest.len-1 + if L >= 0 and dest.sons[L].kind == pkTerminal: + case elem.kind + of pkTerminal: add(dest.sons[L].term, elem.term) + of pkChar: add(dest.sons[L].term, elem.ch) + else: add(dest, elem) + else: add(dest, elem) + +proc sequence*(a: openArray[TPeg]): TPeg = + ## constructs a sequence with all the PEGs from `a` + multipleOp(pkSequence, addSequence) + +proc `?`*(a: TPeg): TPeg = + ## constructs an optional for the PEG `a` + if a.kind in {pkOption, pkGreedyRep, pkGreedyAny, pkGreedyRepChar, + pkGreedyRepSet}: + # a* ? --> a* + # a? ? --> a? + result = a + else: + result.kind = pkOption + result.sons = @[a] + +proc `*`*(a: TPeg): TPeg = + ## constructs a "greedy repetition" for the PEG `a` + case a.kind + of pkGreedyRep, pkGreedyRepChar, pkGreedyRepSet, pkGreedyAny, pkOption: + assert false + # produces endless loop! + of pkChar: + result.kind = pkGreedyRepChar + result.ch = a.ch + of pkCharChoice: + result.kind = pkGreedyRepSet + result.charChoice = a.charChoice # copying a reference suffices! + of pkAny, pkAnyRune: + result.kind = pkGreedyAny + else: + result.kind = pkGreedyRep + result.sons = @[a] + +proc `@`*(a: TPeg): TPeg = + ## constructs a "search" for the PEG `a` + result.kind = pkSearch + result.sons = @[a] + +when false: + proc contains(a: TPeg, k: TPegKind): bool = + if a.kind == k: return true + case a.kind + of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine, pkTerminal, + pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, pkGreedyRepChar, + pkCharChoice, pkGreedyRepSet: nil + of pkNonTerminal: return true + else: + for i in 0..a.sons.len-1: + if contains(a.sons[i], k): return true + +proc `+`*(a: TPeg): TPeg = + ## constructs a "greedy positive repetition" with the PEG `a` + return sequence(a, *a) + +proc `&`*(a: TPeg): TPeg = + ## constructs an "and predicate" with the PEG `a` + result.kind = pkAndPredicate + result.sons = @[a] + +proc `!`*(a: TPeg): TPeg = + ## constructs a "not predicate" with the PEG `a` + result.kind = pkNotPredicate + result.sons = @[a] + +proc any*: TPeg {.inline.} = + ## constructs the PEG `any character`:idx: (``.``) + result.kind = pkAny + +proc anyRune*: TPeg {.inline.} = + ## constructs the PEG `any rune`:idx: (``_``) + result.kind = pkAnyRune + +proc newLine*: TPeg {.inline.} = + ## constructs the PEG `newline`:idx: (``\n``) + result.kind = pkNewline + +proc capture*(a: TPeg): TPeg = + ## constructs a capture with the PEG `a` + result.kind = pkCapture + result.sons = @[a] + +proc spaceCost(n: TPeg): int = + case n.kind + of pkEmpty: nil + of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, + pkGreedyRepChar, pkCharChoice, pkGreedyRepSet, pkAny, pkAnyRune, + pkNewLine, pkGreedyAny: + result = 1 + of pkNonTerminal: + # we cannot inline a rule with a non-terminal + result = InlineThreshold+1 + else: + for i in 0..n.len-1: + inc(result, spaceCost(n.sons[i])) + if result >= InlineThreshold: break + +proc nonterminal*(n: PNonTerminal): TPeg = + ## constructs a PEG that consists of the nonterminal symbol + assert n != nil + if ntDeclared in n.flags and spaceCost(n.rule) < InlineThreshold: + when false: echo "inlining symbol: ", n.name + result = n.rule # inlining of rule enables better optimizations + else: + result.kind = pkNonTerminal + result.nt = n + +proc newNonTerminal*(name: string, line, column: int): PNonTerminal = + ## constructs a nonterminal symbol + new(result) + result.name = name + result.line = line + result.col = column + +template letters*: expr = + ## expands to ``charset({'A'..'Z', 'a'..'z'})`` + charset({'A'..'Z', 'a'..'z'}) + +template digits*: expr = + ## expands to ``charset({'0'..'9'})`` + charset({'0'..'9'}) + +template whitespace*: expr = + ## expands to ``charset({' ', '\9'..'\13'})`` + charset({' ', '\9'..'\13'}) + +template identChars*: expr = + ## expands to ``charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})`` + charset({'a'..'z', 'A'..'Z', '0'..'9', '_'}) + +template identStartChars*: expr = + ## expands to ``charset({'A'..'Z', 'a'..'z', '_'})`` + charset({'a'..'z', 'A'..'Z', '_'}) + +template ident*: expr = + ## same as ``[a-zA-Z_][a-zA-z_0-9]*``; standard identifier + sequence(charset({'a'..'z', 'A'..'Z', '_'}), + *charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})) + +template natural*: expr = + ## same as ``\d+`` + +digits + +const + MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that + ## can be captured. More subpatterns cannot be captured! + +# ------------------------- debugging ----------------------------------------- + +proc esc(c: char, reserved = {'\0'..'\255'}): string = + case c + of '\b': result = "\\b" + of '\t': result = "\\t" + of '\c': result = "\\c" + of '\L': result = "\\l" + of '\v': result = "\\v" + of '\f': result = "\\f" + of '\e': result = "\\e" + of '\a': result = "\\a" + of '\\': result = "\\\\" + of 'a'..'z', 'A'..'Z', '0'..'9', '_': result = $c + elif c < ' ' or c >= '\128': result = '\\' & $ord(c) + elif c in reserved: result = '\\' & c + else: result = $c + +proc singleQuoteEsc(c: Char): string = return "'" & esc(c, {'\''}) & "'" + +proc singleQuoteEsc(str: string): string = + result = "'" + for c in items(str): add result, esc(c, {'\''}) + add result, '\'' + +proc charSetEscAux(cc: set[char]): string = + const reserved = {'^', '-', ']'} + result = "" + var c1 = 0 + while c1 <= 0xff: + if chr(c1) in cc: + var c2 = c1 + while c2 < 0xff and chr(succ(c2)) in cc: inc(c2) + if c1 == c2: + add result, esc(chr(c1), reserved) + elif c2 == succ(c1): + add result, esc(chr(c1), reserved) & esc(chr(c2), reserved) + else: + add result, esc(chr(c1), reserved) & '-' & esc(chr(c2), reserved) + c1 = c2 + inc(c1) + +proc CharSetEsc(cc: set[char]): string = + if card(cc) >= 128+64: + result = "[^" & CharSetEscAux({'\1'..'\xFF'} - cc) & ']' + else: + result = '[' & CharSetEscAux(cc) & ']' + +proc toStrAux(r: TPeg, res: var string) = + case r.kind + of pkEmpty: add(res, "()") + of pkAny: add(res, '.') + of pkAnyRune: add(res, '_') + of pkNewline: add(res, "\\n") + of pkTerminal: add(res, singleQuoteEsc(r.term)) + of pkTerminalIgnoreCase: + add(res, 'i') + add(res, singleQuoteEsc(r.term)) + of pkTerminalIgnoreStyle: + add(res, 'y') + add(res, singleQuoteEsc(r.term)) + of pkChar: add(res, singleQuoteEsc(r.ch)) + of pkCharChoice: add(res, charSetEsc(r.charChoice^)) + of pkNonTerminal: add(res, r.nt.name) + of pkSequence: + add(res, '(') + toStrAux(r.sons[0], res) + for i in 1 .. high(r.sons): + add(res, ' ') + toStrAux(r.sons[i], res) + add(res, ')') + of pkOrderedChoice: + add(res, '(') + toStrAux(r.sons[0], res) + for i in 1 .. high(r.sons): + add(res, " / ") + toStrAux(r.sons[i], res) + add(res, ')') + of pkGreedyRep: + toStrAux(r.sons[0], res) + add(res, '*') + of pkGreedyRepChar: + add(res, singleQuoteEsc(r.ch)) + add(res, '*') + of pkGreedyRepSet: + add(res, charSetEsc(r.charChoice^)) + add(res, '*') + of pkGreedyAny: + add(res, ".*") + of pkOption: + toStrAux(r.sons[0], res) + add(res, '?') + of pkAndPredicate: + add(res, '&') + toStrAux(r.sons[0], res) + of pkNotPredicate: + add(res, '!') + toStrAux(r.sons[0], res) + of pkSearch: + add(res, '@') + toStrAux(r.sons[0], res) + of pkCapture: + add(res, '{') + toStrAux(r.sons[0], res) + add(res, '}') + of pkRule: + toStrAux(r.sons[0], res) + add(res, " <- ") + toStrAux(r.sons[1], res) + of pkList: + for i in 0 .. high(r.sons): + toStrAux(r.sons[i], res) + add(res, "\n") + +proc `$` *(r: TPeg): string = + ## converts a PEG to its string representation + result = "" + toStrAux(r, result) + +# --------------------- core engine ------------------------------------------- + +type + TMatchClosure {.final.} = object + matches: array[0..maxSubpatterns-1, tuple[first, last: int]] + ml: int + +when not useUnicode: + type + TRune = char + template fastRuneAt(s, i, ch: expr) = + ch = s[i] + inc(i) + template runeLenAt(s, i: expr): expr = 1 + +proc m(s: string, p: TPeg, start: int, c: var TMatchClosure): int = + ## this implements a simple PEG interpreter. Thanks to superoperators it + ## has competitive performance nevertheless. + ## Returns -1 if it does not match, else the length of the match + case p.kind + of pkEmpty: result = 0 # match of length 0 + of pkAny: + if s[start] != '\0': result = 1 + else: result = -1 + of pkAnyRune: + if s[start] != '\0': + result = runeLenAt(s, start) + else: + result = -1 + of pkGreedyAny: + result = len(s) - start + of pkNewLine: + if s[start] == '\L': result = 1 + elif s[start] == '\C': + if s[start+1] == '\L': result = 2 + else: result = 1 + else: result = -1 + of pkTerminal: + result = len(p.term) + for i in 0..result-1: + if p.term[i] != s[start+i]: + result = -1 + break + of pkTerminalIgnoreCase: + var + i = 0 + a, b: TRune + result = start + while i < len(p.term): + fastRuneAt(p.term, i, a) + fastRuneAt(s, result, b) + if toLower(a) != toLower(b): + result = -1 + break + dec(result, start) + of pkTerminalIgnoreStyle: + var + i = 0 + a, b: TRune + result = start + while i < len(p.term): + while true: + fastRuneAt(p.term, i, a) + if a != TRune('_'): break + while true: + fastRuneAt(s, result, b) + if b != TRune('_'): break + if toLower(a) != toLower(b): + result = -1 + break + dec(result, start) + of pkChar: + if p.ch == s[start]: result = 1 + else: result = -1 + of pkCharChoice: + if contains(p.charChoice^, s[start]): result = 1 + else: result = -1 + of pkNonTerminal: + var oldMl = c.ml + when false: echo "enter: ", p.nt.name + result = m(s, p.nt.rule, start, c) + when false: echo "leave: ", p.nt.name + if result < 0: c.ml = oldMl + of pkSequence: + var oldMl = c.ml + result = 0 + for i in 0..high(p.sons): + var x = m(s, p.sons[i], start+result, c) + if x < 0: + c.ml = oldMl + result = -1 + break + else: inc(result, x) + of pkOrderedChoice: + var oldMl = c.ml + for i in 0..high(p.sons): + result = m(s, p.sons[i], start, c) + if result >= 0: break + c.ml = oldMl + of pkSearch: + var oldMl = c.ml + result = 0 + while start+result < s.len: + var x = m(s, p.sons[0], start+result, c) + if x >= 0: + inc(result, x) + return + inc(result) + result = -1 + c.ml = oldMl + of pkGreedyRep: + result = 0 + while true: + var x = m(s, p.sons[0], start+result, c) + # if x == 0, we have an endless loop; so the correct behaviour would be + # not to break. But endless loops can be easily introduced: + # ``(comment / \w*)*`` is such an example. Breaking for x == 0 does the + # expected thing in this case. + if x <= 0: break + inc(result, x) + of pkGreedyRepChar: + result = 0 + var ch = p.ch + while ch == s[start+result]: inc(result) + of pkGreedyRepSet: + result = 0 + while contains(p.charChoice^, s[start+result]): inc(result) + of pkOption: + result = max(0, m(s, p.sons[0], start, c)) + of pkAndPredicate: + var oldMl = c.ml + result = m(s, p.sons[0], start, c) + if result >= 0: result = 0 # do not consume anything + else: c.ml = oldMl + of pkNotPredicate: + var oldMl = c.ml + result = m(s, p.sons[0], start, c) + if result < 0: result = 0 + else: + c.ml = oldMl + result = -1 + of pkCapture: + var idx = c.ml # reserve a slot for the subpattern + inc(c.ml) + result = m(s, p.sons[0], start, c) + if result >= 0: + if idx < maxSubpatterns: + c.matches[idx] = (start, start+result-1) + #else: silently ignore the capture + else: + c.ml = idx + of pkRule, pkList: assert false + +proc match*(s: string, pattern: TPeg, matches: var openarray[string], + start = 0): bool = + ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and + ## the captured substrings in the array ``matches``. If it does not + ## match, nothing is written into ``matches`` and ``false`` is + ## returned. + var c: TMatchClosure + result = m(s, pattern, start, c) == len(s) + if result: + for i in 0..c.ml-1: + matches[i] = copy(s, c.matches[i][0], c.matches[i][1]) + +proc match*(s: string, pattern: TPeg, start = 0): bool = + ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``. + var c: TMatchClosure + result = m(s, pattern, start, c) == len(s) + +proc matchLen*(s: string, pattern: TPeg, matches: var openarray[string], + start = 0): int = + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. It's possible that a suffix of `s` remains + ## that does not belong to the match. + var c: TMatchClosure + result = m(s, pattern, start, c) + if result >= 0: + for i in 0..c.ml-1: + matches[i] = copy(s, c.matches[i][0], c.matches[i][1]) + +proc matchLen*(s: string, pattern: TPeg, start = 0): int = + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. It's possible that a suffix of `s` remains + ## that does not belong to the match. + var c: TMatchClosure + result = m(s, pattern, start, c) + +proc find*(s: string, pattern: TPeg, matches: var openarray[string], + start = 0): int = + ## returns the starting position of ``pattern`` in ``s`` and the captured + ## substrings in the array ``matches``. If it does not match, nothing + ## is written into ``matches`` and -1 is returned. + for i in 0 .. s.len-1: + if matchLen(s, pattern, matches, i) >= 0: return i + return -1 + # could also use the pattern here: (!P .)* P + +proc find*(s: string, pattern: TPeg, start = 0): int = + ## returns the starting position of ``pattern`` in ``s``. If it does not + ## match, -1 is returned. + for i in 0 .. s.len-1: + if matchLen(s, pattern, i) >= 0: return i + return -1 + +template `=~`*(s: string, pattern: TPeg): expr = + ## This calls ``match`` with an implicit declared ``matches`` array that + ## can be used in the scope of the ``=~`` call: + ## + ## .. code-block:: nimrod + ## + ## if line =~ peg"\s* {\w+} \s* '=' \s* {\w+}": + ## # matches a key=value pair: + ## echo("Key: ", matches[0]) + ## echo("Value: ", matches[1]) + ## elif line =~ peg"\s*{'#'.*}": + ## # matches a comment + ## # note that the implicit ``matches`` array is different from the + ## # ``matches`` array of the first branch + ## echo("comment: ", matches[0]) + ## else: + ## echo("syntax error") + ## + when not definedInScope(matches): + var matches: array[0..maxSubpatterns-1, string] + match(s, pattern, matches) + +# ------------------------- more string handling ------------------------------ + +proc contains*(s: string, pattern: TPeg, start = 0): bool = + ## same as ``find(s, pattern, start) >= 0`` + return find(s, pattern, start) >= 0 + +proc contains*(s: string, pattern: TPeg, matches: var openArray[string], + start = 0): bool = + ## same as ``find(s, pattern, matches, start) >= 0`` + return find(s, pattern, matches, start) >= 0 + +proc startsWith*(s: string, prefix: TPeg): bool = + ## returns true if `s` starts with the pattern `prefix` + result = matchLen(s, prefix) >= 0 + +proc endsWith*(s: string, suffix: TPeg): bool = + ## returns true if `s` ends with the pattern `prefix` + for i in 0 .. s.len-1: + if matchLen(s, suffix, i) == s.len - i: return true + +proc replace*(s: string, sub: TPeg, by: string): string = + ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` + ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: + ## + ## .. code-block:: nimrod + ## "var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## + ## "var1<-keykey; val2<-key2key2" + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + var x = matchLen(s, sub, caps, i) + if x <= 0: + add(result, s[i]) + inc(i) + else: + addf(result, by, caps) + inc(i, x) + # copy the rest: + add(result, copy(s, i)) + +proc parallelReplace*(s: string, subs: openArray[ + tuple[pattern: TPeg, repl: string]]): string = + ## Returns a modified copy of `s` with the substitutions in `subs` + ## applied in parallel. + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + block searchSubs: + for j in 0..high(subs): + var x = matchLen(s, subs[j][0], caps, i) + if x > 0: + addf(result, subs[j][1], caps) + inc(i, x) + break searchSubs + add(result, s[i]) + inc(i) + # copy the rest: + add(result, copy(s, i)) + +proc transformFile*(infile, outfile: string, + subs: openArray[tuple[pattern: TPeg, repl: string]]) = + ## reads in the file `infile`, performs a parallel replacement (calls + ## `parallelReplace`) and writes back to `outfile`. Calls ``quit`` if an + ## error occurs. This is supposed to be used for quick scripting. + var x = readFile(infile) + if not isNil(x): + var f: TFile + if open(f, outfile, fmWrite): + write(f, x.parallelReplace(subs)) + close(f) + else: + quit("cannot open for writing: " & outfile) + else: + quit("cannot open for reading: " & infile) + +iterator split*(s: string, sep: TPeg): string = + ## Splits the string `s` into substrings. + ## + ## Substrings are separated by the PEG `sep`. + ## Examples: + ## + ## .. code-block:: nimrod + ## for word in split("00232this02939is39an22example111", peg"\d+"): + ## writeln(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "this" + ## "is" + ## "an" + ## "example" + ## + var + first = 0 + last = 0 + while last < len(s): + var x = matchLen(s, sep, last) + if x > 0: inc(last, x) + first = last + while last < len(s): + inc(last) + x = matchLen(s, sep, last) + if x > 0: break + if first < last: + yield copy(s, first, last-1) + +proc split*(s: string, sep: TPeg): seq[string] {.noSideEffect.} = + ## Splits the string `s` into substrings. + accumulateResult(split(s, sep)) + +# ------------------- scanner ------------------------------------------------- + +type + TModifier = enum + modNone, + modVerbatim, + modIgnoreCase, + modIgnoreStyle + TTokKind = enum ## enumeration of all tokens + tkInvalid, ## invalid token + tkEof, ## end of file reached + tkAny, ## . + tkAnyRune, ## _ + tkIdentifier, ## abc + tkStringLit, ## "abc" or 'abc' + tkCharSet, ## [^A-Z] + tkParLe, ## '(' + tkParRi, ## ')' + tkCurlyLe, ## '{' + tkCurlyRi, ## '}' + tkArrow, ## '<-' + tkBar, ## '/' + tkStar, ## '*' + tkPlus, ## '+' + tkAmp, ## '&' + tkNot, ## '!' + tkOption, ## '?' + tkAt, ## '@' + tkBuiltin, ## \identifier + tkEscaped ## \\ + + TToken {.final.} = object ## a token + kind: TTokKind ## the type of the token + modifier: TModifier + literal: string ## the parsed (string) literal + charset: set[char] ## if kind == tkCharSet + + TPegLexer = object ## the lexer object. + bufpos: int ## the current position within the buffer + buf: cstring ## the buffer itself + LineNumber: int ## the current line number + lineStart: int ## index of last line start in buffer + colOffset: int ## column to add + filename: string + +const + tokKindToStr: array[TTokKind, string] = [ + "invalid", "[EOF]", ".", "_", "identifier", "string literal", + "character set", "(", ")", "{", "}", "<-", "/", "*", "+", "&", "!", "?", + "@", "built-in", "escaped" + ] + +proc HandleCR(L: var TPegLexer, pos: int): int = + assert(L.buf[pos] == '\c') + inc(L.linenumber) + result = pos+1 + if L.buf[result] == '\L': inc(result) + L.lineStart = result + +proc HandleLF(L: var TPegLexer, pos: int): int = + assert(L.buf[pos] == '\L') + inc(L.linenumber) + result = pos+1 + L.lineStart = result + +proc init(L: var TPegLexer, input, filename: string, line = 1, col = 0) = + L.buf = input + L.bufpos = 0 + L.lineNumber = line + L.colOffset = col + L.lineStart = 0 + L.filename = filename + +proc getColumn(L: TPegLexer): int {.inline.} = + result = abs(L.bufpos - L.lineStart) + L.colOffset + +proc getLine(L: TPegLexer): int {.inline.} = + result = L.linenumber + +proc errorStr(L: TPegLexer, msg: string, line = -1, col = -1): string = + var line = if line < 0: getLine(L) else: line + var col = if col < 0: getColumn(L) else: col + result = "$1($2, $3) Error: $4" % [L.filename, $line, $col, msg] + +proc handleHexChar(c: var TPegLexer, xi: var int) = + case c.buf[c.bufpos] + of '0'..'9': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) + inc(c.bufpos) + of 'a'..'f': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) + inc(c.bufpos) + of 'A'..'F': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) + inc(c.bufpos) + else: nil + +proc getEscapedChar(c: var TPegLexer, tok: var TToken) = + inc(c.bufpos) + case c.buf[c.bufpos] + of 'r', 'R', 'c', 'C': + add(tok.literal, '\c') + Inc(c.bufpos) + of 'l', 'L': + add(tok.literal, '\L') + Inc(c.bufpos) + of 'f', 'F': + add(tok.literal, '\f') + inc(c.bufpos) + of 'e', 'E': + add(tok.literal, '\e') + Inc(c.bufpos) + of 'a', 'A': + add(tok.literal, '\a') + Inc(c.bufpos) + of 'b', 'B': + add(tok.literal, '\b') + Inc(c.bufpos) + of 'v', 'V': + add(tok.literal, '\v') + Inc(c.bufpos) + of 't', 'T': + add(tok.literal, '\t') + Inc(c.bufpos) + of 'x', 'X': + inc(c.bufpos) + var xi = 0 + handleHexChar(c, xi) + handleHexChar(c, xi) + if xi == 0: tok.kind = tkInvalid + else: add(tok.literal, Chr(xi)) + of '0'..'9': + var val = ord(c.buf[c.bufpos]) - ord('0') + Inc(c.bufpos) + var i = 1 + while (i <= 3) and (c.buf[c.bufpos] in {'0'..'9'}): + val = val * 10 + ord(c.buf[c.bufpos]) - ord('0') + inc(c.bufpos) + inc(i) + if val > 0 and val <= 255: add(tok.literal, chr(val)) + else: tok.kind = tkInvalid + of '\0'..'\31': + tok.kind = tkInvalid + elif c.buf[c.bufpos] in strutils.letters: + tok.kind = tkInvalid + else: + add(tok.literal, c.buf[c.bufpos]) + Inc(c.bufpos) + +proc skip(c: var TPegLexer) = + var pos = c.bufpos + var buf = c.buf + while true: + case buf[pos] + of ' ', '\t': + Inc(pos) + of '#': + while not (buf[pos] in {'\c', '\L', '\0'}): inc(pos) + of '\c': + pos = HandleCR(c, pos) + buf = c.buf + of '\L': + pos = HandleLF(c, pos) + buf = c.buf + else: + break # EndOfFile also leaves the loop + c.bufpos = pos + +proc getString(c: var TPegLexer, tok: var TToken) = + tok.kind = tkStringLit + var pos = c.bufPos + 1 + var buf = c.buf + var quote = buf[pos-1] + while true: + case buf[pos] + of '\\': + c.bufpos = pos + getEscapedChar(c, tok) + pos = c.bufpos + of '\c', '\L', '\0': + tok.kind = tkInvalid + break + elif buf[pos] == quote: + inc(pos) + break + else: + add(tok.literal, buf[pos]) + Inc(pos) + c.bufpos = pos + +proc getCharSet(c: var TPegLexer, tok: var TToken) = + tok.kind = tkCharSet + tok.charset = {} + var pos = c.bufPos + 1 + var buf = c.buf + var caret = false + if buf[pos] == '^': + inc(pos) + caret = true + while true: + var ch: char + case buf[pos] + of ']': + inc(pos) + break + of '\\': + c.bufpos = pos + getEscapedChar(c, tok) + pos = c.bufpos + ch = tok.literal[tok.literal.len-1] + of '\C', '\L', '\0': + tok.kind = tkInvalid + break + else: + ch = buf[pos] + Inc(pos) + incl(tok.charset, ch) + if buf[pos] == '-': + if buf[pos+1] == ']': + incl(tok.charset, '-') + inc(pos) + else: + inc(pos) + var ch2: char + case buf[pos] + of '\\': + c.bufpos = pos + getEscapedChar(c, tok) + pos = c.bufpos + ch2 = tok.literal[tok.literal.len-1] + of '\C', '\L', '\0': + tok.kind = tkInvalid + break + else: + ch2 = buf[pos] + Inc(pos) + for i in ord(ch)+1 .. ord(ch2): + incl(tok.charset, chr(i)) + c.bufpos = pos + if caret: tok.charset = {'\1'..'\xFF'} - tok.charset + +proc getSymbol(c: var TPegLexer, tok: var TToken) = + var pos = c.bufpos + var buf = c.buf + while true: + add(tok.literal, buf[pos]) + Inc(pos) + if buf[pos] notin strutils.IdentChars: break + c.bufpos = pos + tok.kind = tkIdentifier + +proc getBuiltin(c: var TPegLexer, tok: var TToken) = + if c.buf[c.bufpos+1] in strutils.Letters: + inc(c.bufpos) + getSymbol(c, tok) + tok.kind = tkBuiltin + else: + tok.kind = tkEscaped + getEscapedChar(c, tok) # may set tok.kind to tkInvalid + +proc getTok(c: var TPegLexer, tok: var TToken) = + tok.kind = tkInvalid + tok.modifier = modNone + setlen(tok.literal, 0) + skip(c) + case c.buf[c.bufpos] + of '{': + tok.kind = tkCurlyLe + inc(c.bufpos) + add(tok.literal, '{') + of '}': + tok.kind = tkCurlyRi + inc(c.bufpos) + add(tok.literal, '}') + of '[': + getCharset(c, tok) + of '(': + tok.kind = tkParLe + Inc(c.bufpos) + add(tok.literal, '(') + of ')': + tok.kind = tkParRi + Inc(c.bufpos) + add(tok.literal, ')') + of '.': + tok.kind = tkAny + inc(c.bufpos) + add(tok.literal, '.') + of '_': + tok.kind = tkAnyRune + inc(c.bufpos) + add(tok.literal, '_') + of '\\': + getBuiltin(c, tok) + of '\'', '"': getString(c, tok) + of '\0': + tok.kind = tkEof + tok.literal = "[EOF]" + of 'a'..'z', 'A'..'Z', '\128'..'\255': + getSymbol(c, tok) + if c.buf[c.bufpos] in {'\'', '"'}: + case tok.literal + of "i": tok.modifier = modIgnoreCase + of "y": tok.modifier = modIgnoreStyle + of "v": tok.modifier = modVerbatim + else: nil + setLen(tok.literal, 0) + getString(c, tok) + if tok.modifier == modNone: tok.kind = tkInvalid + of '+': + tok.kind = tkPlus + inc(c.bufpos) + add(tok.literal, '+') + of '*': + tok.kind = tkStar + inc(c.bufpos) + add(tok.literal, '+') + of '<': + if c.buf[c.bufpos+1] == '-': + inc(c.bufpos, 2) + tok.kind = tkArrow + add(tok.literal, "<-") + else: + add(tok.literal, '<') + of '/': + tok.kind = tkBar + inc(c.bufpos) + add(tok.literal, '/') + of '?': + tok.kind = tkOption + inc(c.bufpos) + add(tok.literal, '?') + of '!': + tok.kind = tkNot + inc(c.bufpos) + add(tok.literal, '!') + of '&': + tok.kind = tkAmp + inc(c.bufpos) + add(tok.literal, '!') + of '@': + tok.kind = tkAt + inc(c.bufpos) + add(tok.literal, '@') + else: + add(tok.literal, c.buf[c.bufpos]) + inc(c.bufpos) + +proc arrowIsNextTok(c: TPegLexer): bool = + # the only look ahead we need + var pos = c.bufpos + while c.buf[pos] in {'\t', ' '}: inc(pos) + result = c.buf[pos] == '<' and c.buf[pos+1] == '-' + +# ----------------------------- parser ---------------------------------------- + +type + EInvalidPeg* = object of EBase ## raised if an invalid PEG has been detected + TPegParser = object of TPegLexer ## the PEG parser object + tok: TToken + nonterms: seq[PNonTerminal] + modifier: TModifier + +proc getTok(p: var TPegParser) = getTok(p, p.tok) + +proc pegError(p: TPegParser, msg: string, line = -1, col = -1) = + var e: ref EInvalidPeg + new(e) + e.msg = errorStr(p, msg, line, col) + raise e + +proc eat(p: var TPegParser, kind: TTokKind) = + if p.tok.kind == kind: getTok(p) + else: pegError(p, tokKindToStr[kind] & " expected") + +proc parseExpr(p: var TPegParser): TPeg + +proc getNonTerminal(p: TPegParser, name: string): PNonTerminal = + for i in 0..high(p.nonterms): + result = p.nonterms[i] + if cmpIgnoreStyle(result.name, name) == 0: return + # forward reference: + result = newNonTerminal(name, getLine(p), getColumn(p)) + add(p.nonterms, result) + +proc modifiedTerm(s: string, m: TModifier): TPeg = + case m + of modNone, modVerbatim: result = term(s) + of modIgnoreCase: result = termIgnoreCase(s) + of modIgnoreStyle: result = termIgnoreStyle(s) + +proc primary(p: var TPegParser): TPeg = + case p.tok.kind + of tkAmp: + getTok(p) + return &primary(p) + of tkNot: + getTok(p) + return !primary(p) + of tkAt: + getTok(p) + return @primary(p) + else: nil + case p.tok.kind + of tkIdentifier: + if not arrowIsNextTok(p): + var nt = getNonTerminal(p, p.tok.literal) + incl(nt.flags, ntUsed) + result = nonTerminal(nt) + getTok(p) + else: + pegError(p, "expression expected, but found: " & p.tok.literal) + of tkStringLit: + var m = p.tok.modifier + if m == modNone: m = p.modifier + result = modifiedTerm(p.tok.literal, m) + getTok(p) + of tkCharSet: + if '\0' in p.tok.charset: + pegError(p, "binary zero ('\\0') not allowed in character class") + result = charset(p.tok.charset) + getTok(p) + of tkParLe: + getTok(p) + result = parseExpr(p) + eat(p, tkParRi) + of tkCurlyLe: + getTok(p) + result = capture(parseExpr(p)) + eat(p, tkCurlyRi) + of tkAny: + result = any() + getTok(p) + of tkAnyRune: + result = anyRune() + getTok(p) + of tkBuiltin: + case p.tok.literal + of "n": result = newLine() + of "d": result = charset({'0'..'9'}) + of "D": result = charset({'\1'..'\xff'} - {'0'..'9'}) + of "s": result = charset({' ', '\9'..'\13'}) + of "S": result = charset({'\1'..'\xff'} - {' ', '\9'..'\13'}) + of "w": result = charset({'a'..'z', 'A'..'Z', '_'}) + of "W": result = charset({'\1'..'\xff'} - {'a'..'z', 'A'..'Z', '_'}) + of "ident": result = pegs.ident + else: pegError(p, "unknown built-in: " & p.tok.literal) + getTok(p) + of tkEscaped: + result = term(p.tok.literal[0]) + getTok(p) + else: + pegError(p, "expression expected, but found: " & p.tok.literal) + getTok(p) # we must consume a token here to prevent endless loops! + while true: + case p.tok.kind + of tkOption: + result = ?result + getTok(p) + of tkStar: + result = *result + getTok(p) + of tkPlus: + result = +result + getTok(p) + else: break + +proc seqExpr(p: var TPegParser): TPeg = + result = primary(p) + while true: + case p.tok.kind + of tkAmp, tkNot, tkAt, tkStringLit, tkCharset, tkParLe, tkCurlyLe, + tkAny, tkAnyRune, tkBuiltin, tkEscaped: + result = sequence(result, primary(p)) + of tkIdentifier: + if not arrowIsNextTok(p): + result = sequence(result, primary(p)) + else: break + else: break + +proc parseExpr(p: var TPegParser): TPeg = + result = seqExpr(p) + while p.tok.kind == tkBar: + getTok(p) + result = result / seqExpr(p) + +proc parseRule(p: var TPegParser): PNonTerminal = + if p.tok.kind == tkIdentifier and arrowIsNextTok(p): + result = getNonTerminal(p, p.tok.literal) + if ntDeclared in result.flags: + pegError(p, "attempt to redefine: " & result.name) + result.line = getLine(p) + result.col = getColumn(p) + getTok(p) + eat(p, tkArrow) + result.rule = parseExpr(p) + incl(result.flags, ntDeclared) # NOW inlining may be attempted + else: + pegError(p, "rule expected, but found: " & p.tok.literal) + +proc rawParse(p: var TPegParser): TPeg = + ## parses a rule or a PEG expression + if p.tok.kind == tkBuiltin: + case p.tok.literal + of "i": + p.modifier = modIgnoreCase + getTok(p) + of "y": + p.modifier = modIgnoreStyle + getTok(p) + else: nil + if p.tok.kind == tkIdentifier and arrowIsNextTok(p): + result = parseRule(p).rule + while p.tok.kind != tkEof: + discard parseRule(p) + else: + result = parseExpr(p) + if p.tok.kind != tkEof: + pegError(p, "EOF expected, but found: " & p.tok.literal) + for i in 0..high(p.nonterms): + var nt = p.nonterms[i] + if ntDeclared notin nt.flags: + pegError(p, "undeclared identifier: " & nt.name, nt.line, nt.col) + elif ntUsed notin nt.flags and i > 0: + pegError(p, "unused rule: " & nt.name, nt.line, nt.col) + +proc parsePeg*(input: string, filename = "pattern", line = 1, col = 0): TPeg = + var p: TPegParser + init(TPegLexer(p), input, filename, line, col) + p.tok.kind = tkInvalid + p.tok.modifier = modNone + p.tok.literal = "" + p.tok.charset = {} + p.nonterms = @[] + getTok(p) + result = rawParse(p) + +proc peg*(pattern: string): TPeg = + ## constructs a TPeg object from the `pattern`. The short name has been + ## chosen to encourage its use as a raw string modifier:: + ## + ## peg"{\ident} \s* '=' \s* {.*}" + result = parsePeg(pattern, "pattern") + +when isMainModule: + assert match("(a b c)", peg"'(' @ ')'") + assert match("W_HI_Le", peg"\y 'while'") + assert(not match("W_HI_L", peg"\y 'while'")) + assert(not match("W_HI_Le", peg"\y v'while'")) + assert match("W_HI_Le", peg"y'while'") + + assert($ +digits == $peg"\d+") + assert "0158787".match(peg"\d+") + assert "ABC 0232".match(peg"\w+\s+\d+") + assert "ABC".match(peg"\d+ / \w+") + + for word in split("00232this02939is39an22example111", peg"\d+"): + writeln(stdout, word) + + assert matchLen("key", ident) == 3 + + var pattern = sequence(ident, *whitespace, term('='), *whitespace, ident) + assert matchLen("key1= cal9", pattern) == 11 + + var ws = newNonTerminal("ws", 1, 1) + ws.rule = *whitespace + + var expr = newNonTerminal("expr", 1, 1) + expr.rule = sequence(capture(ident), *sequence( + nonterminal(ws), term('+'), nonterminal(ws), nonterminal(expr))) + + var c: TMatchClosure + var s = "a+b + c +d+e+f" + assert m(s, expr.rule, 0, c) == len(s) + var a = "" + for i in 0..c.ml-1: + a.add(copy(s, c.matches[i][0], c.matches[i][1])) + assert a == "abcdef" + #echo expr.rule + + #const filename = "lib/devel/peg/grammar.txt" + #var grammar = parsePeg(newFileStream(filename, fmRead), filename) + #echo "a <- [abc]*?".match(grammar) + assert find("_____abc_______", term("abc")) == 5 + assert match("_______ana", peg"A <- 'ana' / . A") + assert match("abcs%%%", peg"A <- ..A / .A / '%'") + + if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}": + assert matches[0] == "abc" + else: + assert false + + var g2 = peg"""S <- A B / C D + A <- 'a'+ + B <- 'b'+ + C <- 'c'+ + D <- 'd'+ + """ + assert($g2 == "((A B) / (C D))") + assert match("cccccdddddd", g2) + assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") == + "var1<-keykey; var2<-key2key2") + assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") + + if "aaaaaa" =~ peg"'aa' !. / ({'a'})+": + assert matches[0] == "a" + else: + assert false diff --git a/nimlib/pure/re.nim b/nimlib/pure/re.nim new file mode 100755 index 000000000..1328f5f1f --- /dev/null +++ b/nimlib/pure/re.nim @@ -0,0 +1,354 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Regular expression support for Nimrod. Consider using the pegs module +## instead. + +{.compile: "tre/tre_all.c".} + +from strutils import addf + +type + TRegExDesc {.pure, final.} = object + re_nsub: int # Number of parenthesized subexpressions. + value: pointer # For internal use only. + + TRegEx* = ref TRegExDesc ## a compiled regular expression + EInvalidRegEx* = object of EInvalidValue + ## is raised if the pattern is no valid regular expression. + + TRegMatch {.pure.} = object + so, eo: cint + +const + MaxSubpatterns* = 10 + ## defines the maximum number of subpatterns that can be captured. + ## More subpatterns cannot be captured! + +proc regnexec(preg: ptr TRegExDesc, s: cstring, len, nmatch: int, + pmatch: ptr array [0..maxSubpatterns-1, TRegMatch], + eflags: cint): cint {.importc.} +proc regncomp(preg: ptr TRegExDesc, regex: cstring, n: int, + cflags: cint): cint {.importc.} +proc regfree(preg: ptr TRegExDesc) {.importc.} + +const + # POSIX regcomp() flags + REG_EXTENDED = 1 + REG_ICASE = (REG_EXTENDED shl 1) + REG_NEWLINE = (REG_ICASE shl 1) + REG_NOSUB = (REG_NEWLINE shl 1) + # Extra regcomp() flags + REG_BASIC = 0 + REG_LITERAL = (REG_NOSUB shl 1) + REG_RIGHT_ASSOC = (REG_LITERAL shl 1) + REG_UNGREEDY = (REG_RIGHT_ASSOC shl 1) + + # POSIX regexec() flags + REG_NOTBOL = 1 + REG_NOTEOL = (REG_NOTBOL shl 1) + + # Extra regexec() flags + REG_APPROX_MATCHER = (REG_NOTEOL shl 1) + REG_BACKTRACKING_MATCHER = (REG_APPROX_MATCHER shl 1) + + ErrorMessages = [ + "No error", + "No match", + "Invalid regexp", + "Unknown collating element", + "Unknown character class name", + "Trailing backslash", + "Invalid back reference", + "Missing ']'", + "Missing ')'", + "Missing '}'", + "Invalid contents of {}", + "Invalid character range", + "Out of memory", + "Invalid use of repetition operators" + ] + +proc finalizeRegEx(x: TRegEx) = regfree(addr(x^)) + +proc re*(s: string): TRegEx = + ## Constructor of regular expressions. Note that Nimrod's + ## extended raw string literals supports this syntax ``re"[abc]"`` as + ## a short form for ``re(r"[abc]")``. + new(result, finalizeRegEx) + var err = int(regncomp(addr(result^), s, s.len, + cint(REG_EXTENDED or REG_NEWLINE))) + if err != 0: + var e: ref EInvalidRegEx + new(e) + e.msg = ErrorMessages[err] + raise e + +proc xre*(pattern: string): TRegEx = + ## deletes whitespace from a pattern that is not escaped or in a character + ## class. Then it constructs a regular expresion object via `re`. + ## This is modelled after Perl's ``/x`` modifier. + var p = "" + var i = 0 + while i < pattern.len: + case pattern[i] + of ' ', '\t': + inc i + of '\\': + add p, '\\' + add p, pattern[i+1] + inc i, 2 + of '[': + while pattern[i] != ']' and pattern[i] != '\0': + add p, pattern[i] + inc i + else: + add p, pattern[i] + inc i + result = re(p) + +proc rawmatch(s: string, pattern: TRegEx, matches: var openarray[string], + start: int): tuple[first, last: int] = + var + rawMatches: array [0..maxSubpatterns-1, TRegMatch] + cs = cstring(s) + res = int(regnexec(addr(pattern^), cast[cstring](addr(cs[start])), + s.len-start, maxSubpatterns, addr(rawMatches), cint(0))) + if res == 0: + for i in 0..min(matches.len, int(pattern.re_nsub))-1: + var a = int(rawMatches[i].so) + var b = int(rawMatches[i].eo) + echo "a: ", a, " b: ", b + if a >= 0 and b >= 0: + matches[i] = copy(s, a+start, b - 1 + start) + else: + matches[i] = "" + return (int(rawMatches[0].so), int(rawMatches[0].eo)-1) + return (-1, -1) + +proc match*(s: string, pattern: TRegEx, matches: var openarray[string], + start = 0): bool = + ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and + ## the captured substrings in the array ``matches``. If it does not + ## match, nothing is written into ``matches`` and ``false`` is + ## returned. + result = rawmatch(s, pattern, matches, start).first == 0 + +proc match*(s: string, pattern: TRegEx, start: int = 0): bool = + ## returns ``true`` if ``s`` matches the ``pattern`` beginning + ## from ``start``. + var matches: array [0..0, string] + result = rawmatch(s, pattern, matches, start).first == 0 + +proc matchLen*(s: string, pattern: TRegEx, matches: var openarray[string], + start = 0): int = + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. + var (a, b) = rawmatch(s, pattern, matches, start) + result = a - b + 1 + +proc matchLen*(s: string, pattern: TRegEx, start = 0): int = + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. + var matches: array [0..0, string] + var (a, b) = rawmatch(s, pattern, matches, start) + result = a - b + 1 + +proc find*(s: string, pattern: TRegEx, matches: var openarray[string], + start = 0): int = + ## returns ``true`` if ``pattern`` occurs in ``s`` and the captured + ## substrings in the array ``matches``. If it does not match, nothing + ## is written into ``matches``. + result = rawmatch(s, pattern, matches, start).first + if result >= 0: inc(result, start) + +proc find*(s: string, pattern: TRegEx, start = 0): int = + ## returns ``true`` if ``pattern`` occurs in ``s``. + var matches: array [0..0, string] + result = rawmatch(s, pattern, matches, start).first + if result >= 0: inc(result, start) + +template `=~`*(s: string, pattern: TRegEx): expr = + ## This calls ``match`` with an implicit declared ``matches`` array that + ## can be used in the scope of the ``=~`` call: + ## + ## .. code-block:: nimrod + ## + ## if line =~ r"\s*(\w+)\s*\=\s*(\w+)": + ## # matches a key=value pair: + ## echo("Key: ", matches[1]) + ## echo("Value: ", matches[2]) + ## elif line =~ r"\s*(\#.*)": + ## # matches a comment + ## # note that the implicit ``matches`` array is different from the + ## # ``matches`` array of the first branch + ## echo("comment: ", matches[1]) + ## else: + ## echo("syntax error") + ## + when not definedInScope(matches): + var matches: array[0..maxSubPatterns-1, string] + match(s, pattern, matches) + +# ------------------------- more string handling ------------------------------ + +proc contains*(s: string, pattern: TRegEx, start = 0): bool = + ## same as ``find(s, pattern, start) >= 0`` + return find(s, pattern, start) >= 0 + +proc contains*(s: string, pattern: TRegEx, matches: var openArray[string], + start = 0): bool = + ## same as ``find(s, pattern, matches, start) >= 0`` + return find(s, pattern, matches, start) >= 0 + +proc startsWith*(s: string, prefix: TRegEx): bool = + ## returns true if `s` starts with the pattern `prefix` + result = matchLen(s, prefix) >= 0 + +proc endsWith*(s: string, suffix: TRegEx): bool = + ## returns true if `s` ends with the pattern `prefix` + for i in 0 .. s.len-1: + if matchLen(s, suffix, i) == s.len - i: return true + +proc replace*(s: string, sub: TRegEx, by: string): string = + ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` + ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: + ## + ## .. code-block:: nimrod + ## "var1=key; var2=key2".replace(re"(\w+)'='(\w+)", "$1<-$2$2") + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## + ## "var1<-keykey; val2<-key2key2" + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + var x = matchLen(s, sub, caps, i) + if x <= 0: + add(result, s[i]) + inc(i) + else: + addf(result, by, caps) + inc(i, x) + # copy the rest: + add(result, copy(s, i)) + +proc parallelReplace*(s: string, subs: openArray[ + tuple[pattern: TRegEx, repl: string]]): string = + ## Returns a modified copy of `s` with the substitutions in `subs` + ## applied in parallel. + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + block searchSubs: + for j in 0..high(subs): + var x = matchLen(s, subs[j][0], caps, i) + if x > 0: + addf(result, subs[j][1], caps) + inc(i, x) + break searchSubs + add(result, s[i]) + inc(i) + # copy the rest: + add(result, copy(s, i)) + +proc transformFile*(infile, outfile: string, + subs: openArray[tuple[pattern: TRegEx, repl: string]]) = + ## reads in the file `infile`, performs a parallel replacement (calls + ## `parallelReplace`) and writes back to `outfile`. Calls ``quit`` if an + ## error occurs. This is supposed to be used for quick scripting. + var x = readFile(infile) + if not isNil(x): + var f: TFile + if open(f, outfile, fmWrite): + write(f, x.parallelReplace(subs)) + close(f) + else: + quit("cannot open for writing: " & outfile) + else: + quit("cannot open for reading: " & infile) + +iterator split*(s: string, sep: TRegEx): string = + ## Splits the string `s` into substrings. + ## + ## Substrings are separated by the regular expression `sep`. + ## Examples: + ## + ## .. code-block:: nimrod + ## for word in split("00232this02939is39an22example111", re"\d+"): + ## writeln(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "this" + ## "is" + ## "an" + ## "example" + ## + var + first = 0 + last = 0 + while last < len(s): + var x = matchLen(s, sep, last) + if x > 0: inc(last, x) + first = last + while last < len(s): + inc(last) + x = matchLen(s, sep, last) + if x > 0: break + if first < last: + yield copy(s, first, last-1) + +proc split*(s: string, sep: TRegEx): seq[string] = + ## Splits the string `s` into substrings. + accumulateResult(split(s, sep)) + +const ## common regular expressions + reIdentifier* = r"\b[a-zA-Z_]+[a-zA-Z_0-9]*\b" ## describes an identifier + reNatural* = r"\b\d+\b" ## describes a natural number + reInteger* = r"\b[-+]?\d+\b" ## describes an integer + reHex* = r"\b0[xX][0-9a-fA-F]+\b" ## describes a hexadecimal number + reBinary* = r"\b0[bB][01]+\b" ## describes a binary number (example: 0b11101) + reOctal* = r"\b0[oO][0-7]+\b" ## describes an octal number (example: 0o777) + reFloat* = r"\b[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\b" + ## describes a floating point number + reEmail* = r"\b[a-zA-Z0-9!#$%&'*+/=?^_`{|}~\-]+(?:\. &" & + r"[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)" & + r"*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+" & + r"(?:[a-zA-Z]{2}|com|org|" & + r"net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\b" + ## describes a common email address + reURL* = r"\b(http(s)?|ftp|gopher|telnet|file|notes|ms\-help):" & + r"((//)|(\\\\))+[\w\d:#@%/;$()~_?\+\-\=\\\.\&]*\b" + ## describes an URL + +when isMainModule: + echo matchLen("key", re"[a-zA-Z_][a-zA-Z_0-9]*") + + var pattern = re"[a-zA-Z_][a-zA-Z_0-9]*\s*=\s*[a-zA-Z_][a-zA-Z_0-9]*" + echo matchLen("key1= cal9", pattern, 2) + + echo find("_____abc_______", re("abc"), 3) + #echo "var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") + #echo "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") + + if "abc" =~ re"(a)bc xyz|([a-z]+)": + echo matches[0] + else: + echo "BUG" + +# for word in split("00232this02939is39an22example111", peg"\d+"): +# writeln(stdout, word) diff --git a/nimlib/pure/regexprs.nim b/nimlib/pure/regexprs.nim new file mode 100755 index 000000000..cff3152cf --- /dev/null +++ b/nimlib/pure/regexprs.nim @@ -0,0 +1,177 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Regular expression support for Nimrod. +## Currently this module is implemented by providing a wrapper around the +## `PRCE (Perl-Compatible Regular Expressions) <http://www.pcre.org>`_ +## C library. This means that your application will depend on the PRCE +## library's licence when using this module, which should not be a problem +## though. +## PRCE's licence follows: +## +## .. include:: ../doc/regexprs.txt +## + +# This is not just a convenient wrapper for the pcre library; the +# API will stay the same if the implementation should change. + +import + pcre, strutils + +type + EInvalidRegEx* = object of EInvalidValue + ## is raised if the pattern is no valid regular expression. + +const + MaxSubpatterns* = 10 + ## defines the maximum number of subpatterns that can be captured. + ## More subpatterns cannot be captured! + +proc match*(s, pattern: string, matches: var openarray[string], + start: int = 0): bool + ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and + ## the captured substrings in the array ``matches``. If it does not + ## match, nothing is written into ``matches`` and ``false`` is + ## returned. + +proc match*(s, pattern: string, start: int = 0): bool + ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``. + +proc matchLen*(s, pattern: string, matches: var openarray[string], + start: int = 0): int + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. + +proc find*(s, pattern: string, matches: var openarray[string], + start: int = 0): bool + ## returns ``true`` if ``pattern`` occurs in ``s`` and the captured + ## substrings in the array ``matches``. If it does not match, nothing + ## is written into ``matches``. + +proc find*(s, pattern: string, start: int = 0): bool + ## returns ``true`` if ``pattern`` occurs in ``s``. + +proc rawCompile(pattern: string, flags: cint): PPcre = + var + msg: CString + offset: int + com = pcreCompile(pattern, flags, addr(msg), addr(offset), nil) + if com == nil: + var e: ref EInvalidRegEx + new(e) + e.msg = $msg & "\n" & pattern & "\n" & repeatChar(offset) & "^\n" + raise e + return com + +proc matchOrFind(s: string, pattern: PPcre, matches: var openarray[string], + start: cint): cint = + var + rawMatches: array [0..maxSubpatterns * 3 - 1, cint] + res = int(pcreExec(pattern, nil, s, len(s), start, 0, + cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)) + dealloc(pattern) + if res < 0: return res + for i in 0..res-1: + var + a = rawMatches[i * 2] + b = rawMatches[i * 2 + 1] + if a >= 0'i32: matches[i] = copy(s, a, int(b)-1) + else: matches[i] = "" + return res + +proc matchOrFind(s: string, pattern: PPcre, start: cint): cint = + var + rawMatches: array [0..maxSubpatterns * 3 - 1, cint] + res = pcreExec(pattern, nil, s, len(s), start, 0, + cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) + dealloc(pattern) + return res + +proc match(s, pattern: string, matches: var openarray[string], + start: int = 0): bool = + return matchOrFind(s, rawCompile(pattern, PCRE_ANCHORED), + matches, start) >= 0'i32 + +proc matchLen(s, pattern: string, matches: var openarray[string], + start: int = 0): int = + return matchOrFind(s, rawCompile(pattern, PCRE_ANCHORED), matches, start) + +proc find(s, pattern: string, matches: var openarray[string], + start: int = 0): bool = + return matchOrFind(s, rawCompile(pattern, PCRE_MULTILINE), + matches, start) >= 0'i32 + +proc match(s, pattern: string, start: int = 0): bool = + return matchOrFind(s, rawCompile(pattern, PCRE_ANCHORED), start) >= 0'i32 + +proc find(s, pattern: string, start: int = 0): bool = + return matchOrFind(s, rawCompile(pattern, PCRE_MULTILINE), start) >= 0'i32 + +template `=~` *(s, pattern: expr): expr = + ## This calls ``match`` with an implicit declared ``matches`` array that + ## can be used in the scope of the ``=~`` call: + ## + ## .. code-block:: nimrod + ## + ## if line =~ r"\s*(\w+)\s*\=\s*(\w+)": + ## # matches a key=value pair: + ## echo("Key: ", matches[1]) + ## echo("Value: ", matches[2]) + ## elif line =~ r"\s*(\#.*)": + ## # matches a comment + ## # note that the implicit ``matches`` array is different from the + ## # ``matches`` array of the first branch + ## echo("comment: ", matches[1]) + ## else: + ## echo("syntax error") + ## + when not definedInScope(matches): + var matches: array[0..maxSubPatterns-1, string] + match(s, pattern, matches) + + +const ## common regular expressions + reIdentifier* = r"\b[a-zA-Z_][a-zA-Z_0-9]*\b" ## describes an identifier + reNatural* = r"\b\d+\b" ## describes a natural number + reInteger* = r"\b[-+]?\d+\b" ## describes an integer + reHex* = r"\b0[xX][0-9a-fA-F]+\b" ## describes a hexadecimal number + reBinary* = r"\b0[bB][01]+\b" ## describes a binary number (example: 0b11101) + reOctal* = r"\b0[oO][0-7]+\b" ## describes an octal number (example: 0o777) + reFloat* = r"\b[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\b" + ## describes a floating point number + reEmail* = r"\b[a-zA-Z0-9!#$%&'*+/=?^_`{|}~\-]+(?:\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)" & + r"*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+(?:[a-zA-Z]{2}|com|org|" & + r"net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\b" + ## describes a common email address + reURL* = r"\b(http(s)?|ftp|gopher|telnet|file|notes|ms\-help):" & + r"((//)|(\\\\))+[\w\d:#@%/;$()~_?\+\-\=\\\.\&]*\b" + ## describes an URL + +proc verbose*(pattern: string): string {.noSideEffect.} = + ## deletes whitespace from a pattern that is not escaped or in a character + ## class. This is modelled after Perl's ``/x`` modifier. + result = "" + var i = 0 + while i < pattern.len: + case pattern[i] + of ' ', '\t': + inc i + of '\\': + add result, '\\' + add result, pattern[i+1] + inc i, 2 + of '[': + while pattern[i] != ']' and pattern[i] != '\0': + add result, pattern[i] + inc i + else: + add result, pattern[i] + inc i + diff --git a/nimlib/pure/streams.nim b/nimlib/pure/streams.nim new file mode 100755 index 000000000..f4d2911fc --- /dev/null +++ b/nimlib/pure/streams.nim @@ -0,0 +1,245 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module provides a stream interface and two implementations thereof: +## the `PFileStream` and the `PStringStream` which implement the stream +## interface for Nimrod file objects (`TFile`) and strings. Other modules +## may provide other implementations for this standard stream interface. + +proc newEIO(msg: string): ref EIO = + new(result) + result.msg = msg + +type + PStream* = ref TStream + TStream* = object of TObject ## Stream interface that supports + ## writing or reading. + close*: proc (s: PStream) + atEnd*: proc (s: PStream): bool + setPosition*: proc (s: PStream, pos: int) + getPosition*: proc (s: PStream): int + readData*: proc (s: PStream, buffer: pointer, bufLen: int): int + writeData*: proc (s: PStream, buffer: pointer, bufLen: int) + +proc write*[T](s: PStream, x: T) = + ## generic write procedure. Writes `x` to the stream `s`. Implementation: + ## + ## .. code-block:: Nimrod + ## + ## s.writeData(s, addr(x), sizeof(x)) + var x = x + s.writeData(s, addr(x), sizeof(x)) + +proc write*(s: PStream, x: string) = + ## writes the string `x` to the the stream `s`. No length field or + ## terminating zero is written. + s.writeData(s, cstring(x), x.len) + +proc read[T](s: PStream, result: var T) = + ## generic read procedure. Reads `result` from the stream `s`. + if s.readData(s, addr(result), sizeof(T)) != sizeof(T): + raise newEIO("cannot read from stream") + +proc readChar*(s: PStream): char = + ## reads a char from the stream `s`. Raises `EIO` if an error occured. + ## Returns '\0' as an EOF marker. + discard s.readData(s, addr(result), sizeof(result)) + +proc readBool*(s: PStream): bool = + ## reads a bool from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readInt8*(s: PStream): int8 = + ## reads an int8 from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readInt16*(s: PStream): int16 = + ## reads an int16 from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readInt32*(s: PStream): int32 = + ## reads an int32 from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readInt64*(s: PStream): int64 = + ## reads an int64 from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readFloat32*(s: PStream): float32 = + ## reads a float32 from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readFloat64*(s: PStream): float64 = + ## reads a float64 from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readStr*(s: PStream, length: int): string = + ## reads a string of length `length` from the stream `s`. Raises `EIO` if + ## an error occured. + result = newString(length) + var L = s.readData(s, addr(result[0]), length) + if L != length: setLen(result, L) + +proc readLine*(s: PStream): string = + ## Reads a line from a stream `s`. Note: This is not very efficient. Raises + ## `EIO` if an error occured. + result = "" + while not s.atEnd(s): + var c = readChar(s) + if c == '\c': + c = readChar(s) + break + elif c == '\L' or c == '\0': break + result.add(c) + +type + PStringStream* = ref TStringStream ## a stream that encapsulates a string + TStringStream* = object of TStream + data*: string + pos: int + +proc ssAtEnd(s: PStringStream): bool = + return s.pos >= s.data.len + +proc ssSetPosition(s: PStringStream, pos: int) = + s.pos = min(pos, s.data.len-1) + +proc ssGetPosition(s: PStringStream): int = + return s.pos + +proc ssReadData(s: PStringStream, buffer: pointer, bufLen: int): int = + result = min(bufLen, s.data.len - s.pos) + if result > 0: + copyMem(buffer, addr(s.data[s.pos]), result) + inc(s.pos, result) + +proc ssWriteData(s: PStringStream, buffer: pointer, bufLen: int) = + if bufLen > 0: + setLen(s.data, s.data.len + bufLen) + copyMem(addr(s.data[s.pos]), buffer, bufLen) + inc(s.pos, bufLen) + +proc ssClose(s: PStringStream) = + s.data = nil + +proc newStringStream*(s: string = ""): PStringStream = + ## creates a new stream from the string `s`. + new(result) + result.data = s + result.pos = 0 + result.close = ssClose + result.atEnd = ssAtEnd + result.setPosition = ssSetPosition + result.getPosition = ssGetPosition + result.readData = ssReadData + result.writeData = ssWriteData + +type + PFileStream* = ref TFileStream ## a stream that encapsulates a `TFile` + TFileStream* = object of TStream + f: TFile + +proc fsClose(s: PFileStream) = close(s.f) +proc fsAtEnd(s: PFileStream): bool = return EndOfFile(s.f) +proc fsSetPosition(s: PFileStream, pos: int) = setFilePos(s.f, pos) +proc fsGetPosition(s: PFileStream): int = return int(getFilePos(s.f)) + +proc fsReadData(s: PFileStream, buffer: pointer, bufLen: int): int = + result = readBuffer(s.f, buffer, bufLen) + +proc fsWriteData(s: PFileStream, buffer: pointer, bufLen: int) = + if writeBuffer(s.f, buffer, bufLen) != bufLen: + raise newEIO("cannot write to stream") + +proc newFileStream*(f: TFile): PFileStream = + ## creates a new stream from the file `f`. + new(result) + result.f = f + result.close = fsClose + result.atEnd = fsAtEnd + result.setPosition = fsSetPosition + result.getPosition = fsGetPosition + result.readData = fsReadData + result.writeData = fsWriteData + +proc newFileStream*(filename: string, mode: TFileMode): PFileStream = + ## creates a new stream from the file named `filename` with the mode `mode`. + ## If the file cannot be opened, nil is returned. + var f: TFile + if Open(f, filename, mode): result = newFileStream(f) + + +when true: + nil +else: + type + TFileHandle* = cint ## Operating system file handle + PFileHandleStream* = ref TFileHandleStream + TFileHandleStream* = object of TStream + handle*: TFileHandle + pos: int + + proc newEOS(msg: string): ref EOS = + new(result) + result.msg = msg + + proc hsGetPosition(s: PFileHandleStream): int = + return s.pos + + when defined(windows): + # do not import windows as this increases compile times: + nil + else: + import posix + + proc hsSetPosition(s: PFileHandleStream, pos: int) = + discard lseek(s.handle, pos, SEEK_SET) + + proc hsClose(s: PFileHandleStream) = discard close(s.handle) + proc hsAtEnd(s: PFileHandleStream): bool = + var pos = hsGetPosition(s) + var theEnd = lseek(s.handle, 0, SEEK_END) + result = pos >= theEnd + hsSetPosition(s, pos) # set position back + + proc hsReadData(s: PFileHandleStream, buffer: pointer, bufLen: int): int = + result = posix.read(s.handle, buffer, bufLen) + inc(s.pos, result) + + proc hsWriteData(s: PFileHandleStream, buffer: pointer, bufLen: int) = + if posix.write(s.handle, buffer, bufLen) != bufLen: + raise newEIO("cannot write to stream") + inc(s.pos, bufLen) + + proc newFileHandleStream*(handle: TFileHandle): PFileHandleStream = + new(result) + result.handle = handle + result.pos = 0 + result.close = hsClose + result.atEnd = hsAtEnd + result.setPosition = hsSetPosition + result.getPosition = hsGetPosition + result.readData = hsReadData + result.writeData = hsWriteData + + proc newFileHandleStream*(filename: string, + mode: TFileMode): PFileHandleStream = + when defined(windows): + nil + else: + var flags: cint + case mode + of fmRead: flags = posix.O_RDONLY + of fmWrite: flags = O_WRONLY or int(O_CREAT) + of fmReadWrite: flags = O_RDWR or int(O_CREAT) + of fmReadWriteExisting: flags = O_RDWR + of fmAppend: flags = O_WRONLY or int(O_CREAT) or O_APPEND + var handle = open(filename, flags) + if handle < 0: raise newEOS("posix.open() call failed") + result = newFileHandleStream(handle) diff --git a/nimlib/pure/strtabs.nim b/nimlib/pure/strtabs.nim new file mode 100755 index 000000000..10cd0b933 --- /dev/null +++ b/nimlib/pure/strtabs.nim @@ -0,0 +1,198 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2008 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## The ``strtabs`` module implements an efficient hash table that is a mapping +## from strings to strings. Supports a case-sensitive, case-insensitive and +## style-insensitive mode. An efficient string substitution operator ``%`` +## for the string table is also provided. + +import + os, hashes, strutils + +type + TStringTableMode* = enum ## describes the tables operation mode + modeCaseSensitive, ## the table is case sensitive + modeCaseInsensitive, ## the table is case insensitive + modeStyleInsensitive ## the table is style insensitive + TKeyValuePair = tuple[key, val: string] + TKeyValuePairSeq = seq[TKeyValuePair] + TStringTable* = object of TObject + counter: int + data: TKeyValuePairSeq + mode: TStringTableMode + + PStringTable* = ref TStringTable ## use this type to declare string tables + +proc newStringTable*(keyValuePairs: openarray[string], + mode: TStringTableMode = modeCaseSensitive): PStringTable + ## creates a new string table with given key value pairs. + ## Example:: + ## var mytab = newStringTable("key1", "val1", "key2", "val2", + ## modeCaseInsensitive) + +proc newStringTable*(mode: TStringTableMode = modeCaseSensitive): PStringTable + ## creates a new string table that is empty. + +proc `[]=`*(t: PStringTable, key, val: string) + ## puts a (key, value)-pair into `t`. + +proc `[]`*(t: PStringTable, key: string): string + ## retrieves the value at ``t[key]``. If `key` is not in `t`, "" is returned + ## and no exception is raised. One can check with ``hasKey`` whether the key + ## exists. + +proc hasKey*(t: PStringTable, key: string): bool + ## returns true iff `key` is in the table `t`. + +proc len*(t: PStringTable): int = + ## returns the number of keys in `t`. + result = t.counter + +iterator pairs*(t: PStringTable): tuple[key, value: string] = + ## iterates over any (key, value) pair in the table `t`. + for h in 0..high(t.data): + if not isNil(t.data[h].key): + yield (t.data[h].key, t.data[h].val) + +type + TFormatFlag* = enum ## flags for the `%` operator + useEnvironment, ## use environment variable if the ``$key`` + ## is not found in the table + useEmpty, ## use the empty string as a default, thus it + ## won't throw an exception if ``$key`` is not + ## in the table + useKey ## do not replace ``$key`` if it is not found + ## in the table (or in the environment) + +proc `%`*(f: string, t: PStringTable, flags: set[TFormatFlag] = {}): string + ## The `%` operator for string tables. + +# implementation + +const + growthFactor = 2 + startSize = 64 + +proc newStringTable(mode: TStringTableMode = modeCaseSensitive): PStringTable = + new(result) + result.mode = mode + result.counter = 0 + newSeq(result.data, startSize) + +proc newStringTable(keyValuePairs: openarray[string], + mode: TStringTableMode = modeCaseSensitive): PStringTable = + result = newStringTable(mode) + var i = 0 + while i < high(keyValuePairs): + result[keyValuePairs[i]] = keyValuePairs[i + 1] + inc(i, 2) + +proc myhash(t: PStringTable, key: string): THash = + case t.mode + of modeCaseSensitive: result = hashes.hash(key) + of modeCaseInsensitive: result = hashes.hashIgnoreCase(key) + of modeStyleInsensitive: result = hashes.hashIgnoreStyle(key) + +proc myCmp(t: PStringTable, a, b: string): bool = + case t.mode + of modeCaseSensitive: result = cmp(a, b) == 0 + of modeCaseInsensitive: result = cmpIgnoreCase(a, b) == 0 + of modeStyleInsensitive: result = cmpIgnoreStyle(a, b) == 0 + +proc mustRehash(length, counter: int): bool = + assert(length > counter) + result = (length * 2 < counter * 3) or (length - counter < 4) + +proc nextTry(h, maxHash: THash): THash = + result = ((5 * h) + 1) and maxHash + +proc RawGet(t: PStringTable, key: string): int = + var h: THash + h = myhash(t, key) and high(t.data) # start with real hash value + while not isNil(t.data[h].key): + if mycmp(t, t.data[h].key, key): + return h + h = nextTry(h, high(t.data)) + result = - 1 + +proc `[]`(t: PStringTable, key: string): string = + var index: int + index = RawGet(t, key) + if index >= 0: result = t.data[index].val + else: result = "" + +proc hasKey(t: PStringTable, key: string): bool = + result = rawGet(t, key) >= 0 + +proc RawInsert(t: PStringTable, data: var TKeyValuePairSeq, key, val: string) = + var h: THash + h = myhash(t, key) and high(data) + while not isNil(data[h].key): + h = nextTry(h, high(data)) + data[h].key = key + data[h].val = val + +proc Enlarge(t: PStringTable) = + var n: TKeyValuePairSeq + newSeq(n, len(t.data) * growthFactor) + for i in countup(0, high(t.data)): + if not isNil(t.data[i].key): RawInsert(t, n, t.data[i].key, t.data[i].val) + swap(t.data, n) + +proc `[]=`(t: PStringTable, key, val: string) = + var index = RawGet(t, key) + if index >= 0: + t.data[index].val = val + else: + if mustRehash(len(t.data), t.counter): Enlarge(t) + RawInsert(t, t.data, key, val) + inc(t.counter) + +proc RaiseFormatException(s: string) = + var e: ref EInvalidValue + new(e) + e.msg = "format string: key not found: " & s + raise e + +proc getValue(t: PStringTable, flags: set[TFormatFlag], key: string): string = + if hasKey(t, key): return t[key] + if useEnvironment in flags: result = os.getEnv(key) + else: result = "" + if result.len == 0: + if useKey in flags: result = '$' & key + elif not (useEmpty in flags): raiseFormatException(key) + +proc `%`(f: string, t: PStringTable, flags: set[TFormatFlag] = {}): string = + const + PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} + result = "" + var i = 0 + while i < len(f): + if f[i] == '$': + case f[i+1] + of '$': + add(result, '$') + inc(i, 2) + of '{': + var j = i + 1 + while j < f.len and f[j] != '}': inc(j) + add(result, getValue(t, flags, copy(f, i+2, j-1))) + i = j + 1 + of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '_': + var j = i + 1 + while j < f.len and f[j] in PatternChars: inc(j) + add(result, getValue(t, flags, copy(f, i+1, j-1))) + i = j + else: + add(result, f[i]) + inc(i) + else: + add(result, f[i]) + inc(i) + diff --git a/nimlib/pure/strutils.nim b/nimlib/pure/strutils.nim new file mode 100755 index 000000000..d7fd69f61 --- /dev/null +++ b/nimlib/pure/strutils.nim @@ -0,0 +1,973 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module contains various string utility routines. +## See the module `regexprs` for regular expression support. + +{.deadCodeElim: on.} + +{.push debugger:off .} # the user does not want to trace a part + # of the standard library! + +# copied from excpt.nim, because I don't want to make this template public +template newException(exceptn, message: expr): expr = + block: # open a new scope + var + e: ref exceptn + new(e) + e.msg = message + e + + +type + TCharSet* = set[char] # for compatibility with Nim + +const + Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'} + ## All the characters that count as whitespace. + + Letters* = {'A'..'Z', 'a'..'z'} + ## the set of letters + + Digits* = {'0'..'9'} + ## the set of digits + + IdentChars* = {'a'..'z', 'A'..'Z', '0'..'9', '_'} + ## the set of characters an identifier can consist of + + IdentStartChars* = {'a'..'z', 'A'..'Z', '_'} + ## the set of characters an identifier can start with + +proc `%` *(formatstr: string, a: openarray[string]): string {.noSideEffect.} + ## The `substitution`:idx: operator performs string substitutions in + ## `formatstr` and returns a modified `formatstr`. This is often called + ## `string interpolation`:idx:. + ## + ## This is best explained by an example: + ## + ## .. code-block:: nimrod + ## "$1 eats $2." % ["The cat", "fish"] + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "The cat eats fish." + ## + ## The substitution variables (the thing after the ``$``) are enumerated + ## from 1 to ``a.len``. + ## The notation ``$#`` can be used to refer to the next substitution variable: + ## + ## .. code-block:: nimrod + ## "$# eats $#." % ["The cat", "fish"] + ## + ## Substitution variables can also be words (that is + ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even + ## indices are keys and with odd indices are the corresponding values. + ## An example: + ## + ## .. code-block:: nimrod + ## "$animal eats $food." % ["animal", "The cat", "food", "fish"] + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "The cat eats fish." + ## + ## The variables are compared with `cmpIgnoreStyle`. `EInvalidValue` is + ## raised if an ill-formed format string has been passed to the `%` operator. + +proc `%` *(formatstr, a: string): string {.noSideEffect.} + ## This is the same as ``formatstr % [a]``. + +proc addf*(s: var string, formatstr: string, a: openarray[string]) + ## The same as ``add(s, formatstr % a)``, but more efficient. + +proc strip*(s: string, leading = true, trailing = true): string {.noSideEffect.} + ## Strips whitespace from `s` and returns the resulting string. + ## If `leading` is true, leading whitespace is stripped. + ## If `trailing` is true, trailing whitespace is stripped. + +proc toLower*(s: string): string {.noSideEffect, procvar.} + ## Converts `s` into lower case. This works only for the letters A-Z. + ## See `unicode.toLower` for a version that works for any Unicode character. + +proc toLower*(c: Char): Char {.noSideEffect, procvar.} + ## Converts `c` into lower case. This works only for the letters A-Z. + ## See `unicode.toLower` for a version that works for any Unicode character. + +proc toUpper*(s: string): string {.noSideEffect, procvar.} + ## Converts `s` into upper case. This works only for the letters a-z. + ## See `unicode.toUpper` for a version that works for any Unicode character. + +proc toUpper*(c: Char): Char {.noSideEffect, procvar.} + ## Converts `c` into upper case. This works only for the letters a-z. + ## See `unicode.toUpper` for a version that works for any Unicode character. + +proc capitalize*(s: string): string {.noSideEffect, procvar.} + ## Converts the first character of `s` into upper case. + ## This works only for the letters a-z. + +proc normalize*(s: string): string {.noSideEffect, procvar.} + ## Normalizes the string `s`. That means to convert it to lower case and + ## remove any '_'. This is needed for Nimrod identifiers for example. + +proc find*(s, sub: string, start: int = 0): int {.noSideEffect.} + ## Searches for `sub` in `s` starting at position `start`. Searching is + ## case-sensitive. If `sub` is not in `s`, -1 is returned. + +proc find*(s: string, sub: char, start: int = 0): int {.noSideEffect.} + ## Searches for `sub` in `s` starting at position `start`. Searching is + ## case-sensitive. If `sub` is not in `s`, -1 is returned. + +proc find*(s: string, chars: set[char], start: int = 0): int {.noSideEffect.} + ## Searches for `chars` in `s` starting at position `start`. If `s` contains + ## none of the characters in `chars`, -1 is returned. + +proc replaceStr*(s, sub, by: string): string {.noSideEffect, deprecated.} + ## Replaces `sub` in `s` by the string `by`. + ## **Deprecated since version 0.8.0**: Use `replace` instead. + +proc replaceStr*(s: string, sub, by: char): string {.noSideEffect, deprecated.} + ## optimized version for characters. + ## **Deprecated since version 0.8.0**: Use `replace` instead. + +proc deleteStr*(s: var string, first, last: int) {.deprecated.} + ## Deletes in `s` the characters at position `first`..`last`. This modifies + ## `s` itself, it does not return a copy. + ## **Deprecated since version 0.8.0**: Use `delete` instead. + +proc toOctal*(c: char): string + ## Converts a character `c` to its octal representation. The resulting + ## string may not have a leading zero. Its length is always exactly 3. + +iterator split*(s: string, seps: set[char] = Whitespace): string = + ## Splits the string `s` into substrings. + ## + ## Substrings are separated by a substring containing only `seps`. + ## Examples: + ## + ## .. code-block:: nimrod + ## for word in split(" this is an example "): + ## writeln(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "this" + ## "is" + ## "an" + ## "example" + ## + ## for word in split(";;this;is;an;;example;;;", {';'}): + ## writeln(stdout, word) + ## + ## produces the same output. + var last = 0 + assert(not ('\0' in seps)) + while last < len(s): + while s[last] in seps: inc(last) + var first = last + while last < len(s) and s[last] not_in seps: inc(last) # BUGFIX! + if first <= last-1: + yield copy(s, first, last-1) + +iterator split*(s: string, sep: char): string = + ## Splits the string `s` into substrings. + ## + ## Substrings are separated by the character `sep`. + ## Example: + ## + ## .. code-block:: nimrod + ## for word in split(";;this;is;an;;example;;;", ';'): + ## writeln(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "" + ## "" + ## "this" + ## "is" + ## "an" + ## "" + ## "example" + ## "" + ## "" + ## "" + ## + var last = 0 + assert('\0' != sep) + if len(s) > 0: + # `<=` is correct here for the edge cases! + while last <= len(s): + var first = last + while last < len(s) and s[last] != sep: inc(last) + yield copy(s, first, last-1) + inc(last) + +iterator splitLines*(s: string): string = + ## Splits the string `s` into its containing lines. Every newline + ## combination (CR, LF, CR-LF) is supported. The result strings contain + ## no trailing ``\n``. + ## + ## Example: + ## + ## .. code-block:: nimrod + ## for line in lines("\nthis\nis\nan\n\nexample\n"): + ## writeln(stdout, line) + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "" + ## "this" + ## "is" + ## "an" + ## "" + ## "example" + ## "" + var first = 0 + var last = 0 + while true: + while s[last] notin {'\0', '\c', '\l'}: inc(last) + yield copy(s, first, last-1) + # skip newlines: + if s[last] == '\l': inc(last) + elif s[last] == '\c': + inc(last) + if s[last] == '\l': inc(last) + else: break # was '\0' + first = last + +proc splitLinesSeq*(s: string): seq[string] {.noSideEffect, deprecated.} = + ## The same as `splitLines`, but is a proc that returns a sequence + ## of substrings. + ## **Deprecated since version 0.8.0**: Use `splitLines` instead. + accumulateResult(splitLines(s)) + +proc splitSeq*(s: string, seps: set[char] = Whitespace): seq[string] {. + noSideEffect, deprecated.} = + ## The same as `split`, but is a proc that returns a sequence of substrings. + ## **Deprecated since version 0.8.0**: Use `split` instead. + accumulateResult(split(s, seps)) + +proc splitSeq*(s: string, sep: char): seq[string] {.noSideEffect, + deprecated.} = + ## The same as `split`, but is a proc that returns a sequence of substrings. + ## **Deprecated since version 0.8.0**: Use `split` instead. + accumulateResult(split(s, sep)) + +proc splitLines*(s: string): seq[string] {.noSideEffect.} = + ## The same as the `splitLines` iterator, but is a proc that returns a + ## sequence of substrings. + accumulateResult(splitLines(s)) + +proc split*(s: string, seps: set[char] = Whitespace): seq[string] {. + noSideEffect.} = + ## The same as the `split` iterator, but is a proc that returns a + ## sequence of substrings. + accumulateResult(split(s, seps)) + +proc split*(s: string, sep: char): seq[string] {.noSideEffect.} = + ## The same as the `split` iterator, but is a proc that returns a sequence + ## of substrings. + accumulateResult(split(s, sep)) + +proc cmpIgnoreCase*(a, b: string): int {.noSideEffect.} + ## Compares two strings in a case insensitive manner. Returns: + ## + ## | 0 iff a == b + ## | < 0 iff a < b + ## | > 0 iff a > b + +proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect.} + ## Compares two strings normalized (i.e. case and + ## underscores do not matter). Returns: + ## + ## | 0 iff a == b + ## | < 0 iff a < b + ## | > 0 iff a > b + +proc contains*(s: string, c: char): bool {.noSideEffect.} + ## Same as ``find(s, c) >= 0``. + +proc contains*(s, sub: string): bool {.noSideEffect.} + ## Same as ``find(s, sub) >= 0``. + +proc contains*(s: string, chars: set[char]): bool {.noSideEffect.} + ## Same as ``find(s, chars) >= 0``. + +proc toHex*(x: BiggestInt, len: int): string {.noSideEffect.} + ## Converts `x` to its hexadecimal representation. The resulting string + ## will be exactly `len` characters long. No prefix like ``0x`` + ## is generated. `x` is treated as an unsigned value. + +proc intToStr*(x: int, minchars: int = 1): string + ## Converts `x` to its decimal representation. The resulting string + ## will be minimally `minchars` characters long. This is achieved by + ## adding leading zeros. + +proc ParseInt*(s: string): int {.noSideEffect, procvar.} + ## Parses a decimal integer value contained in `s`. If `s` is not + ## a valid integer, `EInvalidValue` is raised. + +proc ParseBiggestInt*(s: string): biggestInt {.noSideEffect, procvar.} + ## Parses a decimal integer value contained in `s`. If `s` is not + ## a valid integer, `EInvalidValue` is raised. + +proc ParseFloat*(s: string): float {.noSideEffect, procvar.} + ## Parses a decimal floating point value contained in `s`. If `s` is not + ## a valid floating point number, `EInvalidValue` is raised. ``NAN``, + ## ``INF``, ``-INF`` are also supported (case insensitive comparison). + +# the stringify and format operators: +proc toString*[Ty](x: Ty): string {.deprecated.} + ## This generic proc is the same as the stringify operator `$`. + ## + ## **Deprecated since version 0.8.2:** Use `$` instead. + +proc repeatChar*(count: int, c: Char = ' '): string + ## Returns a string of length `count` consisting only of + ## the character `c`. + +proc startsWith*(s, prefix: string): bool {.noSideEffect.} + ## Returns true iff ``s`` starts with ``prefix``. + ## If ``prefix == ""`` true is returned. + +proc endsWith*(s, suffix: string): bool {.noSideEffect.} + ## Returns true iff ``s`` ends with ``suffix``. + ## If ``suffix == ""`` true is returned. + +proc addSep*(dest: var string, sep = ", ", startLen = 0) {.noSideEffect, + inline.} = + ## A shorthand for: + ## + ## .. code-block:: nimrod + ## if dest.len > startLen: add(dest, sep) + ## + ## This is often useful for generating some code where the items need to + ## be *separated* by `sep`. `sep` is only added if `dest` is longer than + ## `startLen`. The following example creates a string describing + ## an array of integers: + ## + ## .. code-block:: nimrod + ## var arr = "[" + ## for x in items([2, 3, 5, 7, 11]): + ## addSep(arr, startLen=len("[")) + ## add(arr, $x) + ## add(arr, "]") + if dest.len > startLen: add(dest, sep) + +proc allCharsInSet*(s: string, theSet: TCharSet): bool = + ## returns true iff each character of `s` is in the set `theSet`. + for c in items(s): + if c notin theSet: return false + return true + +proc quoteIfContainsWhite*(s: string): string = + ## returns ``'"' & s & '"'`` if `s` contains a space and does not + ## start with a quote, else returns `s` + if find(s, {' ', '\t'}) >= 0 and s[0] != '"': + result = '"' & s & '"' + else: + result = s + +proc startsWith(s, prefix: string): bool = + var i = 0 + while true: + if prefix[i] == '\0': return true + if s[i] != prefix[i]: return false + inc(i) + +proc endsWith(s, suffix: string): bool = + var + i = 0 + j = len(s) - len(suffix) + while true: + if suffix[i] == '\0': return true + if s[i+j] != suffix[i]: return false + inc(i) + +when false: + proc abbrev(s: string, possibilities: openarray[string]): int = + ## returns the index of the first item in `possibilities` if not + ## ambiguous; -1 if no item has been found; -2 if multiple items + ## match. + result = -1 # none found + for i in 0..possibilities.len-1: + if possibilities[i].startsWith(s): + if result >= 0: return -2 # ambiguous + result = i + +proc repeatChar(count: int, c: Char = ' '): string = + result = newString(count) + for i in 0..count-1: + result[i] = c + +proc intToStr(x: int, minchars: int = 1): string = + result = $abs(x) + for i in 1 .. minchars - len(result): + result = '0' & result + if x < 0: + result = '-' & result + +proc toString[Ty](x: Ty): string = return $x + +proc toOctal(c: char): string = + result = newString(3) + var val = ord(c) + for i in countdown(2, 0): + result[i] = Chr(val mod 8 + ord('0')) + val = val div 8 + +proc `%`(formatstr: string, a: string): string = + return formatstr % [a] + +proc findNormalized(x: string, inArray: openarray[string]): int = + var i = 0 + while i < high(inArray): + if cmpIgnoreStyle(x, inArray[i]) == 0: return i + inc(i, 2) # incrementing by 1 would probably result in a + # security whole ... + return -1 + +proc addf(s: var string, formatstr: string, a: openarray[string]) = + const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'} + var i = 0 + var num = 0 + while i < len(formatstr): + if formatstr[i] == '$': + case formatstr[i+1] # again we use the fact that strings + # are zero-terminated here + of '#': + add s, a[num] + inc i, 2 + inc num + of '$': + add s, '$' + inc(i, 2) + of '1'..'9': + var j = 0 + inc(i) # skip $ + while formatstr[i] in {'0'..'9'}: + j = j * 10 + ord(formatstr[i]) - ord('0') + inc(i) + num = j + add s, a[j - 1] + of '{': + var j = i+1 + while formatstr[j] notin {'\0', '}'}: inc(j) + var x = findNormalized(copy(formatstr, i+2, j-1), a) + if x >= 0 and x < high(a): add s, a[x+1] + else: raise newException(EInvalidValue, "invalid format string") + i = j+1 + of 'a'..'z', 'A'..'Z', '\128'..'\255', '_': + var j = i+1 + while formatstr[j] in PatternChars: inc(j) + var x = findNormalized(copy(formatstr, i+1, j-1), a) + if x >= 0 and x < high(a): add s, a[x+1] + else: raise newException(EInvalidValue, "invalid format string") + i = j + else: raise newException(EInvalidValue, "invalid format string") + else: + add s, formatstr[i] + inc(i) + +proc `%`(formatstr: string, a: openarray[string]): string = + result = "" + addf(result, formatstr, a) + +proc cmpIgnoreCase(a, b: string): int = + # makes usage of the fact that strings are zero-terminated + for i in 0..len(a)-1: + var aa = toLower(a[i]) + var bb = toLower(b[i]) + result = ord(aa) - ord(bb) + if result != 0: break + +{.push checks: off, line_trace: off .} # this is a hot-spot in the compiler! + # thus we compile without checks here + +proc cmpIgnoreStyle(a, b: string): int = + var i = 0 + var j = 0 + while True: + while a[i] == '_': inc(i) + while b[j] == '_': inc(j) # BUGFIX: typo + var aa = toLower(a[i]) + var bb = toLower(b[j]) + result = ord(aa) - ord(bb) + if result != 0 or aa == '\0': break + inc(i) + inc(j) + +{.pop.} + +# --------------------------------------------------------------------------- + +proc join*(a: openArray[string], sep: string): string = + ## concatenates all strings in `a` separating them with `sep`. + if len(a) > 0: + var L = sep.len * (a.len-1) + for i in 0..high(a): inc(L, a[i].len) + result = newString(L) + setLen(result, 0) + add(result, a[0]) + for i in 1..high(a): + add(result, sep) + add(result, a[i]) + else: + result = "" + +proc join*(a: openArray[string]): string = + ## concatenates all strings in `a`. + if len(a) > 0: + var L = 0 + for i in 0..high(a): inc(L, a[i].len) + result = newString(L) + setLen(result, 0) + for i in 0..high(a): add(result, a[i]) + else: + result = "" + +proc strip(s: string, leading = true, trailing = true): string = + const + chars: set[Char] = Whitespace + var + first = 0 + last = len(s)-1 + if leading: + while s[first] in chars: inc(first) + if trailing: + while last >= 0 and s[last] in chars: dec(last) + result = copy(s, first, last) + +proc toLower(c: Char): Char = + if c in {'A'..'Z'}: + result = chr(ord(c) + (ord('a') - ord('A'))) + else: + result = c + +proc toLower(s: string): string = + result = newString(len(s)) + for i in 0..len(s) - 1: + result[i] = toLower(s[i]) + +proc toUpper(c: Char): Char = + if c in {'a'..'z'}: + result = Chr(Ord(c) - (Ord('a') - Ord('A'))) + else: + result = c + +proc toUpper(s: string): string = + result = newString(len(s)) + for i in 0..len(s) - 1: + result[i] = toUpper(s[i]) + +proc capitalize(s: string): string = + result = toUpper(s[0]) & copy(s, 1) + +proc normalize(s: string): string = + result = "" + for i in 0..len(s) - 1: + if s[i] in {'A'..'Z'}: + add result, Chr(Ord(s[i]) + (Ord('a') - Ord('A'))) + elif s[i] != '_': + add result, s[i] + +type + TSkipTable = array[Char, int] + +proc preprocessSub(sub: string, a: var TSkipTable) = + var m = len(sub) + for i in 0..0xff: a[chr(i)] = m+1 + for i in 0..m-1: a[sub[i]] = m-i + +proc findAux(s, sub: string, start: int, a: TSkipTable): int = + # fast "quick search" algorithm: + var + m = len(sub) + n = len(s) + # search: + var j = start + while j <= n - m: + block match: + for k in 0..m-1: + if sub[k] != s[k+j]: break match + return j + inc(j, a[s[j+m]]) + return -1 + +proc find(s, sub: string, start: int = 0): int = + var a: TSkipTable + preprocessSub(sub, a) + result = findAux(s, sub, start, a) + +proc find(s: string, sub: char, start: int = 0): int = + for i in start..len(s)-1: + if sub == s[i]: return i + return -1 + +proc find(s: string, chars: set[char], start: int = 0): int = + for i in start..s.len-1: + if s[i] in chars: return i + return -1 + +proc contains(s: string, chars: set[char]): bool = + return find(s, chars) >= 0 + +proc contains(s: string, c: char): bool = + return find(s, c) >= 0 + +proc contains(s, sub: string): bool = + return find(s, sub) >= 0 + +proc replace*(s, sub, by: string): string = + ## Replaces `sub` in `s` by the string `by`. + var a: TSkipTable + result = "" + preprocessSub(sub, a) + var i = 0 + while true: + var j = findAux(s, sub, i, a) + if j < 0: break + add result, copy(s, i, j - 1) + add result, by + i = j + len(sub) + # copy the rest: + add result, copy(s, i) + +proc replace*(s: string, sub, by: char): string = + ## optimized version for characters. + result = newString(s.len) + var i = 0 + while i < s.len: + if s[i] == sub: result[i] = by + else: result[i] = s[i] + inc(i) + +proc delete*(s: var string, first, last: int) = + ## Deletes in `s` the characters at position `first`..`last`. This modifies + ## `s` itself, it does not return a copy. + var + i = first + # example: "abc___uvwxyz\0" (___ is to be deleted) + # --> first == 3, last == 5 + # s[first..] = s[last+1..] + while last+i+1 < len(s): + s[i] = s[last+i+1] + inc(i) + setlen(s, len(s)-(last-first+1)) + +proc replaceStr(s, sub, by: string): string = return replace(s, sub, by) +proc replaceStr(s: string, sub, by: char): string = return replace(s, sub, by) +proc deleteStr*(s: var string, first, last: int) = delete(s, first, last) + +# parsing numbers: + +proc toHex(x: BiggestInt, len: int): string = + const + HexChars = "0123456789ABCDEF" + var + shift: BiggestInt + result = newString(len) + for j in countdown(len-1, 0): + result[j] = HexChars[toU32(x shr shift) and 0xF'i32] + shift = shift + 4 + +{.push overflowChecks: on.} +# this must be compiled with overflow checking turned on: +proc rawParseInt(s: string, index: var int): BiggestInt = + # index contains the start position at proc entry; end position will be + # an index before the proc returns; index = -1 on error (no number at all) + # the problem here is that integers have an asymmetrical range: there is + # one more valid negative than prositive integer. Thus we perform the + # computation as a negative number and then change the sign at the end. + var + i = index # a local i is more efficient than accessing a var parameter + sign: BiggestInt = -1 + if s[i] == '+': + inc(i) + elif s[i] == '-': + inc(i) + sign = 1 + if s[i] in {'0'..'9'}: + result = 0 + while s[i] in {'0'..'9'}: + result = result * 10 - (ord(s[i]) - ord('0')) + inc(i) + while s[i] == '_': + inc(i) # underscores are allowed and ignored + result = result * sign + if s[i] == '\0': + index = i # store index back + else: + index = -1 # BUGFIX: error! + else: + index = -1 + +{.pop.} # overflowChecks + +proc parseInt(s: string): int = + var + index = 0 + res = rawParseInt(s, index) + if index == -1: + raise newException(EInvalidValue, "invalid integer: " & s) + elif (sizeof(int) <= 4) and + ((res < low(int)) or (res > high(int))): + raise newException(EOverflow, "overflow") + else: + result = int(res) # convert to smaller integer type + +proc ParseBiggestInt(s: string): biggestInt = + var index = 0 + result = rawParseInt(s, index) + if index == -1: + raise newException(EInvalidValue, "invalid integer: " & s) + +proc ParseFloat(s: string): float = + var + esign = 1.0 + sign = 1.0 + i = 0 + exponent: int + flags: int + result = 0.0 + if s[i] == '+': inc(i) + elif s[i] == '-': + sign = -1.0 + inc(i) + if s[i] == 'N' or s[i] == 'n': + if s[i+1] == 'A' or s[i+1] == 'a': + if s[i+2] == 'N' or s[i+2] == 'n': + if s[i+3] == '\0': return NaN + raise newException(EInvalidValue, "invalid float: " & s) + if s[i] == 'I' or s[i] == 'i': + if s[i+1] == 'N' or s[i+1] == 'n': + if s[i+2] == 'F' or s[i+2] == 'f': + if s[i+3] == '\0': return Inf*sign + raise newException(EInvalidValue, "invalid float: " & s) + while s[i] in {'0'..'9'}: + # Read integer part + flags = flags or 1 + result = result * 10.0 + toFloat(ord(s[i]) - ord('0')) + inc(i) + while s[i] == '_': inc(i) + # Decimal? + if s[i] == '.': + var hd = 1.0 + inc(i) + while s[i] in {'0'..'9'}: + # Read fractional part + flags = flags or 2 + result = result * 10.0 + toFloat(ord(s[i]) - ord('0')) + hd = hd * 10.0 + inc(i) + while s[i] == '_': inc(i) + result = result / hd # this complicated way preserves precision + # Again, read integer and fractional part + if flags == 0: + raise newException(EInvalidValue, "invalid float: " & s) + # Exponent? + if s[i] in {'e', 'E'}: + inc(i) + if s[i] == '+': + inc(i) + elif s[i] == '-': + esign = -1.0 + inc(i) + if s[i] notin {'0'..'9'}: + raise newException(EInvalidValue, "invalid float: " & s) + while s[i] in {'0'..'9'}: + exponent = exponent * 10 + ord(s[i]) - ord('0') + inc(i) + while s[i] == '_': inc(i) + # Calculate Exponent + var hd = 1.0 + for j in 1..exponent: + hd = hd * 10.0 + if esign > 0.0: result = result * hd + else: result = result / hd + # Not all characters are read? + if s[i] != '\0': raise newException(EInvalidValue, "invalid float: " & s) + # evaluate sign + result = result * sign + +proc toOct*(x: BiggestInt, len: int): string = + ## converts `x` into its octal representation. The resulting string is + ## always `len` characters long. No leading ``0o`` prefix is generated. + var + mask: BiggestInt = 7 + shift: BiggestInt = 0 + assert(len > 0) + result = newString(len) + for j in countdown(len-1, 0): + result[j] = chr(int((x and mask) shr shift) + ord('0')) + shift = shift + 3 + mask = mask shl 3 + +proc toBin*(x: BiggestInt, len: int): string = + ## converts `x` into its binary representation. The resulting string is + ## always `len` characters long. No leading ``0b`` prefix is generated. + var + mask: BiggestInt = 1 + shift: BiggestInt = 0 + assert(len > 0) + result = newString(len) + for j in countdown(len-1, 0): + result[j] = chr(int((x and mask) shr shift) + ord('0')) + shift = shift + 1 + mask = mask shl 1 + +proc escape*(s: string, prefix = "\"", suffix = "\""): string = + ## Escapes a string `s`. This does these operations (at the same time): + ## * replaces any ``\`` by ``\\`` + ## * replaces any ``'`` by ``\'`` + ## * replaces any ``"`` by ``\"`` + ## * replaces any other character in the set ``{'\0'..'\31', '\128'..'\255'}`` + ## by ``\xHH`` where ``HH`` is its hexadecimal value. + ## The procedure has been designed so that its output is usable for many + ## different common syntaxes. The resulting string is prefixed with + ## ``prefix`` and suffixed with ``suffix``. Both may be empty strings. + result = prefix + for c in items(s): + case c + of '\0'..'\31', '\128'..'\255': + add(result, '\\') + add(result, toHex(ord(c), 2)) + of '\\': add(result, "\\\\") + of '\'': add(result, "\\'") + of '\"': add(result, "\\\"") + else: add(result, c) + add(result, suffix) + +proc validEmailAddress*(s: string): bool = + ## returns true if `s` seems to be a valid e-mail address. + ## The checking also uses a domain list. + const + chars = Letters + Digits + {'!','#','$','%','&', + '\'','*','+','/','=','?','^','_','`','{','}','|','~','-','.'} + var i = 0 + if s[i] notin chars or s[i] == '.': return false + while s[i] in chars: + if s[i] == '.' and s[i+1] == '.': return false + inc(i) + if s[i] != '@': return false + var j = len(s)-1 + if s[j] notin letters: return false + while j >= i and s[j] in letters: dec(j) + inc(i) # skip '@' + while s[i] in {'0'..'9', 'a'..'z', '-', '.'}: inc(i) + if s[i] != '\0': return false + + var x = copy(s, j+1) + if len(x) == 2 and x[0] in Letters and x[1] in Letters: return true + case toLower(x) + of "com", "org", "net", "gov", "mil", "biz", "info", "mobi", "name", + "aero", "jobs", "museum": return true + return false + +proc validIdentifier*(s: string): bool = + ## returns true if `s` is a valid identifier. A valid identifier starts + ## with a character of the set `IdentStartChars` and is followed by any + ## number of characters of the set `IdentChars`. + if s[0] in IdentStartChars: + for i in 1..s.len-1: + if s[i] notin IdentChars: return false + return true + +proc editDistance*(a, b: string): int = + ## returns the edit distance between `a` and `b`. This uses the Levenshtein + ## distance algorithm with only a linear memory overhead. This implementation + ## is highly optimized! + var len1 = a.len + var len2 = b.len + if len1 > len2: + # make `b` the longer string + return editDistance(b, a) + + # strip common prefix: + var s = 0 + while a[s] == b[s] and a[s] != '\0': + inc(s) + dec(len1) + dec(len2) + # strip common suffix: + while len1 > 0 and len2 > 0 and a[s+len1-1] == b[s+len2-1]: + dec(len1) + dec(len2) + # trivial cases: + if len1 == 0: return len2 + if len2 == 0: return len1 + + # another special case: + if len1 == 1: + for j in s..len2-1: + if a[s] == b[j]: return len2 - 1 + return len2 + + inc(len1) + inc(len2) + var half = len1 shr 1 + # initalize first row: + #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2 * sizeof(int))) + var row: seq[int] + newSeq(row, len2) + var e = s + len2 - 1 # end marker + for i in 1..len2 - half - 1: row[i] = i + row[0] = len1 - half - 1 + for i in 1 .. len1 - 1: + var char1 = a[i + s - 1] + var char2p: int + var D, x: int + var p: int + if i >= len1 - half: + # skip the upper triangle: + var offset = i - len1 + half + char2p = offset + p = offset + var c3 = row[p] + ord(char1 != b[s + char2p]) + inc(p) + inc(char2p) + x = row[p] + 1 + D = x + if x > c3: x = c3 + row[p] = x + inc(p) + else: + p = 1 + char2p = 0 + D = i + x = i + if i <= half + 1: + # skip the lower triangle: + e = len2 + i - half - 2 + # main: + while p <= e: + dec(D) + var c3 = D + ord(char1 != b[char2p + s]) + inc(char2p) + inc(x) + if x > c3: x = c3 + D = row[p] + 1 + if x > D: x = D + row[p] = x + inc(p) + # lower triangle sentinel: + if i <= half: + dec(D) + var c3 = D + ord(char1 != b[char2p + s]) + inc(x) + if x > c3: x = c3 + row[p] = x + result = row[e] + #dealloc(row) + +{.pop.} diff --git a/nimlib/pure/terminal.nim b/nimlib/pure/terminal.nim new file mode 100755 index 000000000..42bd80cb4 --- /dev/null +++ b/nimlib/pure/terminal.nim @@ -0,0 +1,310 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module contains a few procedures to control the *terminal* +## (also called *console*). On UNIX, the implementation simply uses ANSI escape +## sequences and does not depend on any other module, on Windows it uses the +## Windows API. +## Changing the style is permanent even after program termination! Use the +## code ``system.addQuitProc(resetAttributes)`` to restore the defaults. + +when defined(windows): + import windows, os + + var + conHandle: THandle + # = createFile("CONOUT$", GENERIC_WRITE, 0, nil, OPEN_ALWAYS, 0, 0) + + block: + var hTemp = GetStdHandle(STD_OUTPUT_HANDLE()) + if DuplicateHandle(GetCurrentProcess(), hTemp, GetCurrentProcess(), + addr(conHandle), 0, 1, DUPLICATE_SAME_ACCESS) == 0: + OSError() + + proc getCursorPos(): tuple [x,y: int] = + var c: TCONSOLE_SCREEN_BUFFER_INFO + if GetConsoleScreenBufferInfo(conHandle, addr(c)) == 0: OSError() + return (int(c.dwCursorPosition.x), int(c.dwCursorPosition.y)) + + proc getAttributes(): int16 = + var c: TCONSOLE_SCREEN_BUFFER_INFO + # workaround Windows bugs: try several times + if GetConsoleScreenBufferInfo(conHandle, addr(c)) != 0: + return c.wAttributes + else: + OSError() + return 0x70'i16 # ERROR: return white background, black text + + var + oldAttr = getAttributes() + +proc setCursorPos*(x, y: int) = + ## sets the terminal's cursor to the (x,y) position. (0,0) is the + ## upper left of the screen. + when defined(windows): + var c: TCoord + c.x = int16(x) + c.y = int16(y) + if SetConsoleCursorPosition(conHandle, c) == 0: OSError() + else: + stdout.write("\e[" & $y & ';' & $x & 'f') + +proc setCursorXPos*(x: int) = + ## sets the terminal's cursor to the x position. The y position is + ## not changed. + when defined(windows): + var scrbuf: TCONSOLE_SCREEN_BUFFER_INFO + var hStdout = conHandle + if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: OSError() + var origin = scrbuf.dwCursorPosition + origin.x = int16(x) + if SetConsoleCursorPosition(conHandle, origin) == 0: OSError() + else: + stdout.write("\e[" & $x & 'G') + +when defined(windows): + proc setCursorYPos*(y: int) = + ## sets the terminal's cursor to the y position. The x position is + ## not changed. **Warning**: This is not supported on UNIX! + when defined(windows): + var scrbuf: TCONSOLE_SCREEN_BUFFER_INFO + var hStdout = conHandle + if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: OSError() + var origin = scrbuf.dwCursorPosition + origin.y = int16(y) + if SetConsoleCursorPosition(conHandle, origin) == 0: OSError() + else: + nil + +proc CursorUp*(count=1) = + ## Moves the cursor up by `count` rows. + when defined(windows): + var p = getCursorPos() + dec(p.y, count) + setCursorPos(p.x, p.y) + else: + stdout.write("\e[" & $count & 'A') + +proc CursorDown*(count=1) = + ## Moves the cursor down by `count` rows. + when defined(windows): + var p = getCursorPos() + inc(p.y, count) + setCursorPos(p.x, p.y) + else: + stdout.write("\e[" & $count & 'B') + +proc CursorForward*(count=1) = + ## Moves the cursor forward by `count` columns. + when defined(windows): + var p = getCursorPos() + inc(p.x, count) + setCursorPos(p.x, p.y) + else: + stdout.write("\e[" & $count & 'C') + +proc CursorBackward*(count=1) = + ## Moves the cursor backward by `count` columns. + when defined(windows): + var p = getCursorPos() + dec(p.x, count) + setCursorPos(p.x, p.y) + else: + stdout.write("\e[" & $count & 'D') + +when true: + nil +else: + proc EraseLineEnd* = + ## Erases from the current cursor position to the end of the current line. + when defined(windows): + nil + else: + stdout.write("\e[K") + + proc EraseLineStart* = + ## Erases from the current cursor position to the start of the current line. + when defined(windows): + nil + else: + stdout.write("\e[1K") + + proc EraseDown* = + ## Erases the screen from the current line down to the bottom of the screen. + when defined(windows): + nil + else: + stdout.write("\e[J") + + proc EraseUp* = + ## Erases the screen from the current line up to the top of the screen. + when defined(windows): + nil + else: + stdout.write("\e[1J") + +proc EraseLine* = + ## Erases the entire current line. + when defined(windows): + var scrbuf: TCONSOLE_SCREEN_BUFFER_INFO + var numwrote: DWORD + var hStdout = conHandle + if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: OSError() + var origin = scrbuf.dwCursorPosition + origin.x = 0'i16 + if SetConsoleCursorPosition(conHandle, origin) == 0: OSError() + var ht = scrbuf.dwSize.Y - origin.Y + var wt = scrbuf.dwSize.X - origin.X + if FillConsoleOutputCharacter(hStdout,' ', ht*wt, + origin, addr(numwrote)) == 0: + OSError() + if FillConsoleOutputAttribute(hStdout, scrbuf.wAttributes, ht * wt, + scrbuf.dwCursorPosition, addr(numwrote)) == 0: + OSError() + else: + stdout.write("\e[2K") + setCursorXPos(0) + +proc EraseScreen* = + ## Erases the screen with the background colour and moves the cursor to home. + when defined(windows): + var scrbuf: TCONSOLE_SCREEN_BUFFER_INFO + var numwrote: DWORD + var origin: TCoord # is inititalized to 0, 0 + var hStdout = conHandle + if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: OSError() + if FillConsoleOutputCharacter(hStdout, ' ', scrbuf.dwSize.X*scrbuf.dwSize.Y, + origin, addr(numwrote)) == 0: + OSError() + if FillConsoleOutputAttribute(hStdout, scrbuf.wAttributes, + scrbuf.dwSize.X * scrbuf.dwSize.Y, + origin, addr(numwrote)) == 0: + OSError() + setCursorXPos(0) + else: + stdout.write("\e[2J") + +proc ResetAttributes* {.noconv.} = + ## resets all attributes; it is advisable to register this as a quit proc + ## with ``system.addQuitProc(resetAttributes)``. + when defined(windows): + discard SetConsoleTextAttribute(conHandle, oldAttr) + else: + stdout.write("\e[0m") + +type + TStyle* = enum ## different styles for text output + styleBright = 1, ## bright text + styleDim, ## dim text + styleUnknown, ## unknown + styleUnderscore = 4, ## underscored text + styleBlink, ## blinking/bold text + styleReverse, ## unknown + styleHidden ## hidden text + +when not defined(windows): + var + gFG = 0 + gBG = 0 + +proc WriteStyled*(txt: string, style: set[TStyle] = {styleBright}) = + ## writes the text `txt` in a given `style`. + when defined(windows): + var a = 0'i16 + if styleBright in style: a = a or int16(FOREGROUND_INTENSITY) + if styleBlink in style: a = a or int16(BACKGROUND_INTENSITY) + if styleReverse in style: a = a or 0x4000'i16 # COMMON_LVB_REVERSE_VIDEO + if styleUnderscore in style: a = a or 0x8000'i16 # COMMON_LVB_UNDERSCORE + var old = getAttributes() + discard SetConsoleTextAttribute(conHandle, old or a) + stdout.write(txt) + discard SetConsoleTextAttribute(conHandle, old) + else: + for s in items(style): + stdout.write("\e[" & $ord(s) & 'm') + stdout.write(txt) + resetAttributes() + if gFG != 0: + stdout.write("\e[" & $ord(gFG) & 'm') + if gBG != 0: + stdout.write("\e[" & $ord(gBG) & 'm') + +type + TForegroundColor* = enum ## terminal's foreground colors + fgBlack = 30, ## black + fgRed, ## red + fgGreen, ## green + fgYellow, ## yellow + fgBlue, ## blue + fgMagenta, ## magenta + fgCyan, ## cyan + fgWhite ## white + + TBackgroundColor* = enum ## terminal's background colors + bgBlack = 40, ## black + bgRed, ## red + bgGreen, ## green + bgYellow, ## yellow + bgBlue, ## blue + bgMagenta, ## magenta + bgCyan, ## cyan + bgWhite ## white + +proc setForegroundColor*(fg: TForegroundColor, bright=false) = + ## sets the terminal's foreground color + when defined(windows): + var old = getAttributes() and not 0x0007 + if bright: + old = old or FOREGROUND_INTENSITY + const lookup: array [TForegroundColor, int] = [ + 0, + (FOREGROUND_RED), + (FOREGROUND_GREEN), + (FOREGROUND_RED or FOREGROUND_GREEN), + (FOREGROUND_BLUE), + (FOREGROUND_RED or FOREGROUND_BLUE), + (FOREGROUND_BLUE or FOREGROUND_GREEN), + (FOREGROUND_BLUE or FOREGROUND_GREEN or FOREGROUND_RED)] + discard SetConsoleTextAttribute(conHandle, toU16(old or lookup[fg])) + else: + gFG = ord(fg) + if bright: inc(gFG, 60) + stdout.write("\e[" & $gFG & 'm') + +proc setBackgroundColor*(bg: TBackgroundColor, bright=false) = + ## sets the terminal's background color + when defined(windows): + var old = getAttributes() and not 0x0070 + if bright: + old = old or BACKGROUND_INTENSITY + const lookup: array [TBackgroundColor, int] = [ + 0, + (BACKGROUND_RED), + (BACKGROUND_GREEN), + (BACKGROUND_RED or BACKGROUND_GREEN), + (BACKGROUND_BLUE), + (BACKGROUND_RED or BACKGROUND_BLUE), + (BACKGROUND_BLUE or BACKGROUND_GREEN), + (BACKGROUND_BLUE or BACKGROUND_GREEN or BACKGROUND_RED)] + discard SetConsoleTextAttribute(conHandle, toU16(old or lookup[bg])) + else: + gBG = ord(bg) + if bright: inc(gBG, 60) + stdout.write("\e[" & $gBG & 'm') + +when isMainModule: + system.addQuitProc(resetAttributes) + write(stdout, "never mind") + eraseLine() + #setCursorPos(2, 2) + writeStyled("styled text ", {styleBright, styleBlink, styleUnderscore}) + setBackGroundColor(bgCyan, true) + setForeGroundColor(fgBlue) + writeln(stdout, "ordinary text") + diff --git a/nimlib/pure/times.nim b/nimlib/pure/times.nim new file mode 100755 index 000000000..8c21b6027 --- /dev/null +++ b/nimlib/pure/times.nim @@ -0,0 +1,307 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + + +## This module contains routines and types for dealing with time. +## This module is available for the ECMAScript target. + +{.push debugger:off .} # the user does not want to trace a part + # of the standard library! + +import + strutils + +type + TMonth* = enum ## represents a month + mJan, mFeb, mMar, mApr, mMay, mJun, mJul, mAug, mSep, mOct, mNov, mDec + TWeekDay* = enum ## represents a weekday + dMon, dTue, dWed, dThu, dFri, dSat, dSun + +when defined(posix): + type + TTime* = distinct int ## distinct type that represents a time +elif defined(windows): + when defined(vcc): + # newest version of Visual C++ defines time_t to be of 64 bits + type TTime* = distinct int64 + else: + type TTime* = distinct int32 +elif defined(ECMAScript): + type + TTime* {.final.} = object + getDay: proc (): int + getFullYear: proc (): int + getHours: proc (): int + getMilliseconds: proc (): int + getMinutes: proc (): int + getMonth: proc (): int + getSeconds: proc (): int + getTime: proc (): int + getTimezoneOffset: proc (): int + getUTCDate: proc (): int + getUTCFullYear: proc (): int + getUTCHours: proc (): int + getUTCMilliseconds: proc (): int + getUTCMinutes: proc (): int + getUTCMonth: proc (): int + getUTCSeconds: proc (): int + getYear: proc (): int + parse: proc (s: cstring): TTime + setDate: proc (x: int) + setFullYear: proc (x: int) + setHours: proc (x: int) + setMilliseconds: proc (x: int) + setMinutes: proc (x: int) + setMonth: proc (x: int) + setSeconds: proc (x: int) + setTime: proc (x: int) + setUTCDate: proc (x: int) + setUTCFullYear: proc (x: int) + setUTCHours: proc (x: int) + setUTCMilliseconds: proc (x: int) + setUTCMinutes: proc (x: int) + setUTCMonth: proc (x: int) + setUTCSeconds: proc (x: int) + setYear: proc (x: int) + toGMTString: proc (): cstring + toLocaleString: proc (): cstring + UTC: proc (): int + +type + TTimeInfo* = object of TObject ## represents a time in different parts + second*: range[0..61] ## The number of seconds after the minute, + ## normally in the range 0 to 59, but can + ## be up to 61 to allow for leap seconds. + minute*: range[0..59] ## The number of minutes after the hour, + ## in the range 0 to 59. + hour*: range[0..23] ## The number of hours past midnight, + ## in the range 0 to 23. + monthday*: range[1..31] ## The day of the month, in the range 1 to 31. + month*: TMonth ## The current month. + year*: int ## The current year. + weekday*: TWeekDay ## The current day of the week. + yearday*: range[0..365] ## The number of days since January 1, + ## in the range 0 to 365. + ## Always 0 if the target is ECMAScript. + +proc getTime*(): TTime ## gets the current calendar time +proc getLocalTime*(t: TTime): TTimeInfo + ## converts the calendar time `t` to broken-time representation, + ## expressed relative to the user's specified time zone. +proc getGMTime*(t: TTime): TTimeInfo + ## converts the calendar time `t` to broken-down time representation, + ## expressed in Coordinated Universal Time (UTC). + +proc TimeInfoToTime*(timeInfo: TTimeInfo): TTime + ## converts a broken-down time structure, expressed as local time, to + ## calendar time representation. The function ignores the specified + ## contents of the structure members `weekday` and `yearday` and recomputes + ## them from the other information in the broken-down time structure. + +proc `$` *(timeInfo: TTimeInfo): string + ## converts a `TTimeInfo` object to a string representation. +proc `$` *(time: TTime): string + ## converts a calendar time to a string representation. + +proc getDateStr*(): string + ## gets the current date as a string of the format + ## ``YYYY-MM-DD``. +proc getClockStr*(): string + ## gets the current clock time as a string of the format ``HH:MM:SS``. + +proc `-` *(a, b: TTime): int64 + ## computes the difference of two calendar times. Result is in seconds. + +proc `<` * (a, b: TTime): bool = + ## returns true iff ``a < b``, that is iff a happened before b. + result = a - b < 0 + +proc `<=` * (a, b: TTime): bool = + ## returns true iff ``a <= b``. + result = a - b <= 0 + +proc getStartMilsecs*(): int + ## get the miliseconds from the start of the program + + +when not defined(ECMAScript): + # C wrapper: + type + structTM {.importc: "struct tm", final.} = object + second {.importc: "tm_sec".}, + minute {.importc: "tm_min".}, + hour {.importc: "tm_hour".}, + monthday {.importc: "tm_mday".}, + month {.importc: "tm_mon".}, + year {.importc: "tm_year".}, + weekday {.importc: "tm_wday".}, + yearday {.importc: "tm_yday".}, + isdst {.importc: "tm_isdst".}: cint + + PTimeInfo = ptr structTM + PTime = ptr TTime + + TClock {.importc: "clock_t".} = range[low(int)..high(int)] + + proc localtime(timer: PTime): PTimeInfo {. + importc: "localtime", header: "<time.h>".} + proc gmtime(timer: PTime): PTimeInfo {.importc: "gmtime", header: "<time.h>".} + proc timec(timer: PTime): TTime {.importc: "time", header: "<time.h>".} + proc mktime(t: structTM): TTime {.importc: "mktime", header: "<time.h>".} + proc asctime(tblock: structTM): CString {. + importc: "asctime", header: "<time.h>".} + proc ctime(time: PTime): CString {.importc: "ctime", header: "<time.h>".} + # strftime(s: CString, maxsize: int, fmt: CString, t: tm): int {. + # importc: "strftime", header: "<time.h>".} + proc clock(): TClock {.importc: "clock", header: "<time.h>".} + proc difftime(a, b: TTime): float {.importc: "difftime", header: "<time.h>".} + + var + clocksPerSec {.importc: "CLOCKS_PER_SEC", nodecl.}: int + + + # our own procs on top of that: + proc tmToTimeInfo(tm: structTM): TTimeInfo = + const + weekDays: array [0..6, TWeekDay] = [ + dSun, dMon, dTue, dWed, dThu, dFri, dSat] + result.second = int(tm.second) + result.minute = int(tm.minute) + result.hour = int(tm.hour) + result.monthday = int(tm.monthday) + result.month = TMonth(tm.month) + result.year = tm.year + 1900'i32 + result.weekday = weekDays[int(tm.weekDay)] + result.yearday = int(tm.yearday) + + proc timeInfoToTM(t: TTimeInfo): structTM = + const + weekDays: array [TWeekDay, int] = [1, 2, 3, 4, 5, 6, 0] + result.second = t.second + result.minute = t.minute + result.hour = t.hour + result.monthday = t.monthday + result.month = ord(t.month) + result.year = t.year - 1900 + result.weekday = weekDays[t.weekDay] + result.yearday = t.yearday + result.isdst = -1 + + proc `-` (a, b: TTime): int64 = + return toBiggestInt(difftime(a, b)) + + proc getStartMilsecs(): int = return clock() div (clocksPerSec div 1000) + proc getTime(): TTime = return timec(nil) + proc getLocalTime(t: TTime): TTimeInfo = + var a = t + result = tmToTimeInfo(localtime(addr(a))^) + # copying is needed anyway to provide reentrancity; thus + # the convertion is not expensive + + proc getGMTime(t: TTime): TTimeInfo = + var a = t + result = tmToTimeInfo(gmtime(addr(a))^) + # copying is needed anyway to provide reentrancity; thus + # the convertion is not expensive + + proc TimeInfoToTime(timeInfo: TTimeInfo): TTime = + var cTimeInfo = timeInfo # for C++ we have to make a copy, + # because the header of mktime is broken in my version of libc + return mktime(timeInfoToTM(cTimeInfo)) + + proc toStringTillNL(p: cstring): string = + result = "" + var i = 0 + while p[i] != '\0' and p[i] != '\10' and p[i] != '\13': + add(result, p[i]) + inc(i) + return result + + proc `$`(timeInfo: TTimeInfo): string = + # BUGFIX: asctime returns a newline at the end! + var p = asctime(timeInfoToTM(timeInfo)) + result = toStringTillNL(p) + + proc `$`(time: TTime): string = + # BUGFIX: ctime returns a newline at the end! + var a = time + return toStringTillNL(ctime(addr(a))) + + const + epochDiff = 116444736000000000'i64 + rateDiff = 10000000'i64 # 100 nsecs + + proc unixTimeToWinTime*(t: TTime): int64 = + ## converts a UNIX `TTime` (``time_t``) to a Windows file time + result = int64(t) * rateDiff + epochDiff + + proc winTimeToUnixTime*(t: int64): TTime = + ## converts a Windows time to a UNIX `TTime` (``time_t``) + result = TTime((t - epochDiff) div rateDiff) + +else: + proc getTime(): TTime {.importc: "new Date", nodecl.} + + const + weekDays: array [0..6, TWeekDay] = [ + dSun, dMon, dTue, dWed, dThu, dFri, dSat] + + proc getLocalTime(t: TTime): TTimeInfo = + result.second = t.getSeconds() + result.minute = t.getMinutes() + result.hour = t.getHours() + result.monthday = t.getDate() + result.month = TMonth(t.getMonth()) + result.year = t.getFullYear() + result.weekday = weekDays[t.getDay()] + result.yearday = 0 + + proc getGMTime(t: TTime): TTimeInfo = + result.second = t.getUTCSeconds() + result.minute = t.getUTCMinutes() + result.hour = t.getUTCHours() + result.monthday = t.getUTCDate() + result.month = TMonth(t.getUTCMonth()) + result.year = t.getUTCFullYear() + result.weekday = weekDays[t.getDay()] + result.yearday = 0 + + proc TimeInfoToTime*(timeInfo: TTimeInfo): TTime = + result = getTime() + result.setSeconds(timeInfo.second) + result.setMinutes(timeInfo.minute) + result.setHours(timeInfo.hour) + result.setMonth(ord(timeInfo.month)) + result.setFullYear(timeInfo.year) + result.setDate(timeInfo.monthday) + + proc `$`(timeInfo: TTimeInfo): string = return $(TimeInfoToTIme(timeInfo)) + proc `$`(time: TTime): string = $time.toLocaleString() + + proc `-` (a, b: TTime): int64 = + return a.getTime() - b.getTime() + + var + startMilsecs = getTime() + + proc getStartMilsecs(): int = + ## get the miliseconds from the start of the program + return int(getTime() - startMilsecs) + +proc getDateStr(): string = + var ti = getLocalTime(getTime()) + result = $ti.year & '-' & intToStr(ord(ti.month)+1, 2) & + '-' & intToStr(ti.monthDay, 2) + +proc getClockStr(): string = + var ti = getLocalTime(getTime()) + result = intToStr(ti.hour, 2) & ':' & intToStr(ti.minute, 2) & + ':' & intToStr(ti.second, 2) + +{.pop.} diff --git a/nimlib/pure/unicode.nim b/nimlib/pure/unicode.nim new file mode 100755 index 000000000..2a53d7660 --- /dev/null +++ b/nimlib/pure/unicode.nim @@ -0,0 +1,1178 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module provides support to handle the Unicode UTF-8 encoding. + +{.deadCodeElim: on.} + +type + irune = int # underlying type of TRune + TRune* = distinct irune ## type that can hold any Unicode character + TRune16* = distinct int16 ## 16 bit Unicode character + +proc `<=%`*(a, b: TRune): bool {.borrow.} +proc `<%`*(a, b: TRune): bool {.borrow.} +proc `==`*(a, b: TRune): bool {.borrow.} + +template ones(n: expr): expr = ((1 shl n)-1) + +proc runeLen*(s: string): int = + ## returns the number of Unicode characters of the string `s`. + var i = 0 + while i < len(s): + if ord(s[i]) <=% 127: inc(i) + elif ord(s[i]) shr 5 == 0b110: inc(i, 2) + elif ord(s[i]) shr 4 == 0b1110: inc(i, 3) + elif ord(s[i]) shr 3 == 0b11110: inc(i, 4) + else: assert(false) + inc(result) + +proc runeLenAt*(s: string, i: int): int = + ## returns the number of bytes the rune starting at ``s[i]`` takes. + if ord(s[i]) <=% 127: result = 1 + elif ord(s[i]) shr 5 == 0b110: result = 2 + elif ord(s[i]) shr 4 == 0b1110: result = 3 + elif ord(s[i]) shr 3 == 0b11110: result = 4 + else: assert(false) + +template fastRuneAt*(s: string, i: int, result: expr, doInc = true) = + ## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true`` + ## `i` is incremented by the number of bytes that have been processed. + if ord(s[i]) <=% 127: + result = TRune(ord(s[i])) + when doInc: inc(i) + elif ord(s[i]) shr 5 == 0b110: + assert(ord(s[i+1]) shr 6 == 0b10) + result = TRune((ord(s[i]) and (bind ones(5))) shl 6 or + (ord(s[i+1]) and ones(6))) + when doInc: inc(i, 2) + elif ord(s[i]) shr 4 == 0b1110: + assert(ord(s[i+1]) shr 6 == 0b10) + assert(ord(s[i+2]) shr 6 == 0b10) + result = TRune((ord(s[i]) and ones(4)) shl 12 or + (ord(s[i+1]) and ones(6)) shl 6 or + (ord(s[i+2]) and ones(6))) + when doInc: inc(i, 3) + elif ord(s[i]) shr 3 == 0b11110: + assert(ord(s[i+1]) shr 6 == 0b10) + assert(ord(s[i+2]) shr 6 == 0b10) + assert(ord(s[i+3]) shr 6 == 0b10) + result = TRune((ord(s[i]) and ones(3)) shl 18 or + (ord(s[i+1]) and ones(6)) shl 12 or + (ord(s[i+2]) and ones(6)) shl 6 or + (ord(s[i+3]) and ones(6))) + when doInc: inc(i, 4) + else: + assert(false) + +proc runeAt*(s: string, i: int): TRune = + ## returns the unicode character in `s` at byte index `i` + fastRuneAt(s, i, result, false) + +proc toUTF8*(c: TRune): string = + ## converts a rune into its UTF8 representation + var i = irune(c) + if i <=% 127: + result = newString(1) + result[0] = chr(i) + elif i <=% 0x07FF: + result = newString(2) + result[0] = chr(i shr 6 or 0b110_0000) + result[1] = chr(i and ones(6) or 0b10_000000) + elif i <=% 0xFFFF: + result = newString(3) + result[0] = chr(i shr 12 or 0b1110_0000) + result[1] = chr(i shr 6 and ones(6) or 0b10_0000_00) + result[2] = chr(i and ones(6) or 0b10_0000_00) + elif i <=% 0x0010FFFF: + result = newString(4) + result[0] = chr(i shr 18 or 0b1111_0000) + result[1] = chr(i shr 12 and ones(6) or 0b10_0000_00) + result[2] = chr(i shr 6 and ones(6) or 0b10_0000_00) + result[3] = chr(i and ones(6) or 0b10_0000_00) + else: + assert false + +const + alphaRanges = [ + 0x00d8, 0x00f6, # Ø - ö + 0x00f8, 0x01f5, # ø - ǵ + 0x0250, 0x02a8, # ɐ - ʨ + 0x038e, 0x03a1, # Ύ - Ρ + 0x03a3, 0x03ce, # Σ - ώ + 0x03d0, 0x03d6, # ϐ - ϖ + 0x03e2, 0x03f3, # Ϣ - ϳ + 0x0490, 0x04c4, # Ґ - ӄ + 0x0561, 0x0587, # ա - և + 0x05d0, 0x05ea, # א - ת + 0x05f0, 0x05f2, # װ - ײ + 0x0621, 0x063a, # ء - غ + 0x0640, 0x064a, # ـ - ي + 0x0671, 0x06b7, # ٱ - ڷ + 0x06ba, 0x06be, # ں - ھ + 0x06c0, 0x06ce, # ۀ - ێ + 0x06d0, 0x06d3, # ې - ۓ + 0x0905, 0x0939, # अ - ह + 0x0958, 0x0961, # क़ - ॡ + 0x0985, 0x098c, # অ - ঌ + 0x098f, 0x0990, # এ - ঐ + 0x0993, 0x09a8, # ও - ন + 0x09aa, 0x09b0, # প - র + 0x09b6, 0x09b9, # শ - হ + 0x09dc, 0x09dd, # ড় - ঢ় + 0x09df, 0x09e1, # য় - ৡ + 0x09f0, 0x09f1, # ৰ - ৱ + 0x0a05, 0x0a0a, # ਅ - ਊ + 0x0a0f, 0x0a10, # ਏ - ਐ + 0x0a13, 0x0a28, # ਓ - ਨ + 0x0a2a, 0x0a30, # ਪ - ਰ + 0x0a32, 0x0a33, # ਲ - ਲ਼ + 0x0a35, 0x0a36, # ਵ - ਸ਼ + 0x0a38, 0x0a39, # ਸ - ਹ + 0x0a59, 0x0a5c, # ਖ਼ - ੜ + 0x0a85, 0x0a8b, # અ - ઋ + 0x0a8f, 0x0a91, # એ - ઑ + 0x0a93, 0x0aa8, # ઓ - ન + 0x0aaa, 0x0ab0, # પ - ર + 0x0ab2, 0x0ab3, # લ - ળ + 0x0ab5, 0x0ab9, # વ - હ + 0x0b05, 0x0b0c, # ଅ - ଌ + 0x0b0f, 0x0b10, # ଏ - ଐ + 0x0b13, 0x0b28, # ଓ - ନ + 0x0b2a, 0x0b30, # ପ - ର + 0x0b32, 0x0b33, # ଲ - ଳ + 0x0b36, 0x0b39, # ଶ - ହ + 0x0b5c, 0x0b5d, # ଡ଼ - ଢ଼ + 0x0b5f, 0x0b61, # ୟ - ୡ + 0x0b85, 0x0b8a, # அ - ஊ + 0x0b8e, 0x0b90, # எ - ஐ + 0x0b92, 0x0b95, # ஒ - க + 0x0b99, 0x0b9a, # ங - ச + 0x0b9e, 0x0b9f, # ஞ - ட + 0x0ba3, 0x0ba4, # ண - த + 0x0ba8, 0x0baa, # ந - ப + 0x0bae, 0x0bb5, # ம - வ + 0x0bb7, 0x0bb9, # ஷ - ஹ + 0x0c05, 0x0c0c, # అ - ఌ + 0x0c0e, 0x0c10, # ఎ - ఐ + 0x0c12, 0x0c28, # ఒ - న + 0x0c2a, 0x0c33, # ప - ళ + 0x0c35, 0x0c39, # వ - హ + 0x0c60, 0x0c61, # ౠ - ౡ + 0x0c85, 0x0c8c, # ಅ - ಌ + 0x0c8e, 0x0c90, # ಎ - ಐ + 0x0c92, 0x0ca8, # ಒ - ನ + 0x0caa, 0x0cb3, # ಪ - ಳ + 0x0cb5, 0x0cb9, # ವ - ಹ + 0x0ce0, 0x0ce1, # ೠ - ೡ + 0x0d05, 0x0d0c, # അ - ഌ + 0x0d0e, 0x0d10, # എ - ഐ + 0x0d12, 0x0d28, # ഒ - ന + 0x0d2a, 0x0d39, # പ - ഹ + 0x0d60, 0x0d61, # ൠ - ൡ + 0x0e01, 0x0e30, # ก - ะ + 0x0e32, 0x0e33, # า - ำ + 0x0e40, 0x0e46, # เ - ๆ + 0x0e5a, 0x0e5b, # ๚ - ๛ + 0x0e81, 0x0e82, # ກ - ຂ + 0x0e87, 0x0e88, # ງ - ຈ + 0x0e94, 0x0e97, # ດ - ທ + 0x0e99, 0x0e9f, # ນ - ຟ + 0x0ea1, 0x0ea3, # ມ - ຣ + 0x0eaa, 0x0eab, # ສ - ຫ + 0x0ead, 0x0eae, # ອ - ຮ + 0x0eb2, 0x0eb3, # າ - ຳ + 0x0ec0, 0x0ec4, # ເ - ໄ + 0x0edc, 0x0edd, # ໜ - ໝ + 0x0f18, 0x0f19, # ༘ - ༙ + 0x0f40, 0x0f47, # ཀ - ཇ + 0x0f49, 0x0f69, # ཉ - ཀྵ + 0x10d0, 0x10f6, # ა - ჶ + 0x1100, 0x1159, # ᄀ - ᅙ + 0x115f, 0x11a2, # ᅟ - ᆢ + 0x11a8, 0x11f9, # ᆨ - ᇹ + 0x1e00, 0x1e9b, # Ḁ - ẛ + 0x1f50, 0x1f57, # ὐ - ὗ + 0x1f80, 0x1fb4, # ᾀ - ᾴ + 0x1fb6, 0x1fbc, # ᾶ - ᾼ + 0x1fc2, 0x1fc4, # ῂ - ῄ + 0x1fc6, 0x1fcc, # ῆ - ῌ + 0x1fd0, 0x1fd3, # ῐ - ΐ + 0x1fd6, 0x1fdb, # ῖ - Ί + 0x1fe0, 0x1fec, # ῠ - Ῥ + 0x1ff2, 0x1ff4, # ῲ - ῴ + 0x1ff6, 0x1ffc, # ῶ - ῼ + 0x210a, 0x2113, # ℊ - ℓ + 0x2115, 0x211d, # ℕ - ℝ + 0x2120, 0x2122, # ℠ - ™ + 0x212a, 0x2131, # K - ℱ + 0x2133, 0x2138, # ℳ - ℸ + 0x3041, 0x3094, # ぁ - ゔ + 0x30a1, 0x30fa, # ァ - ヺ + 0x3105, 0x312c, # ㄅ - ㄬ + 0x3131, 0x318e, # ㄱ - ㆎ + 0x3192, 0x319f, # ㆒ - ㆟ + 0x3260, 0x327b, # ㉠ - ㉻ + 0x328a, 0x32b0, # ㊊ - ㊰ + 0x32d0, 0x32fe, # ㋐ - ㋾ + 0x3300, 0x3357, # ㌀ - ㍗ + 0x3371, 0x3376, # ㍱ - ㍶ + 0x337b, 0x3394, # ㍻ - ㎔ + 0x3399, 0x339e, # ㎙ - ㎞ + 0x33a9, 0x33ad, # ㎩ - ㎭ + 0x33b0, 0x33c1, # ㎰ - ㏁ + 0x33c3, 0x33c5, # ㏃ - ㏅ + 0x33c7, 0x33d7, # ㏇ - ㏗ + 0x33d9, 0x33dd, # ㏙ - ㏝ + 0x4e00, 0x9fff, # 一 - 鿿 + 0xac00, 0xd7a3, # 가 - 힣 + 0xf900, 0xfb06, # 豈 - st + 0xfb13, 0xfb17, # ﬓ - ﬗ + 0xfb1f, 0xfb28, # ײַ - ﬨ + 0xfb2a, 0xfb36, # שׁ - זּ + 0xfb38, 0xfb3c, # טּ - לּ + 0xfb40, 0xfb41, # נּ - סּ + 0xfb43, 0xfb44, # ףּ - פּ + 0xfb46, 0xfbb1, # צּ - ﮱ + 0xfbd3, 0xfd3d, # ﯓ - ﴽ + 0xfd50, 0xfd8f, # ﵐ - ﶏ + 0xfd92, 0xfdc7, # ﶒ - ﷇ + 0xfdf0, 0xfdf9, # ﷰ - ﷹ + 0xfe70, 0xfe72, # ﹰ - ﹲ + 0xfe76, 0xfefc, # ﹶ - ﻼ + 0xff66, 0xff6f, # ヲ - ッ + 0xff71, 0xff9d, # ア - ン + 0xffa0, 0xffbe, # ᅠ - ᄒ + 0xffc2, 0xffc7, # ᅡ - ᅦ + 0xffca, 0xffcf, # ᅧ - ᅬ + 0xffd2, 0xffd7, # ᅭ - ᅲ + 0xffda, 0xffdc] # ᅳ - ᅵ + + alphaSinglets = [ + 0x00aa, # ª + 0x00b5, # µ + 0x00ba, # º + 0x03da, # Ϛ + 0x03dc, # Ϝ + 0x03de, # Ϟ + 0x03e0, # Ϡ + 0x06d5, # ە + 0x09b2, # ল + 0x0a5e, # ਫ਼ + 0x0a8d, # ઍ + 0x0ae0, # ૠ + 0x0b9c, # ஜ + 0x0cde, # ೞ + 0x0e4f, # ๏ + 0x0e84, # ຄ + 0x0e8a, # ຊ + 0x0e8d, # ຍ + 0x0ea5, # ລ + 0x0ea7, # ວ + 0x0eb0, # ະ + 0x0ebd, # ຽ + 0x1fbe, # ι + 0x207f, # ⁿ + 0x20a8, # ₨ + 0x2102, # ℂ + 0x2107, # ℇ + 0x2124, # ℤ + 0x2126, # Ω + 0x2128, # ℨ + 0xfb3e, # מּ + 0xfe74] # ﹴ + + spaceRanges = [ + 0x0009, 0x000a, # tab and newline + 0x0020, 0x0020, # space + 0x00a0, 0x00a0, # + 0x2000, 0x200b, # - + 0x2028, 0x2029, # - 0x3000, 0x3000, # + 0xfeff, 0xfeff] # + + toupperRanges = [ + 0x0061, 0x007a, 468, # a-z A-Z + 0x00e0, 0x00f6, 468, # à-ö À-Ö + 0x00f8, 0x00fe, 468, # ø-þ Ø-Þ + 0x0256, 0x0257, 295, # ɖ-ɗ Ɖ-Ɗ + 0x0258, 0x0259, 298, # ɘ-ə Ǝ-Ə + 0x028a, 0x028b, 283, # ʊ-ʋ Ʊ-Ʋ + 0x03ad, 0x03af, 463, # έ-ί Έ-Ί + 0x03b1, 0x03c1, 468, # α-ρ Α-Ρ + 0x03c3, 0x03cb, 468, # σ-ϋ Σ-Ϋ + 0x03cd, 0x03ce, 437, # ύ-ώ Ύ-Ώ + 0x0430, 0x044f, 468, # а-я А-Я + 0x0451, 0x045c, 420, # ё-ќ Ё-Ќ + 0x045e, 0x045f, 420, # ў-џ Ў-Џ + 0x0561, 0x0586, 452, # ա-ֆ Ա-Ֆ + 0x1f00, 0x1f07, 508, # ἀ-ἇ Ἀ-Ἇ + 0x1f10, 0x1f15, 508, # ἐ-ἕ Ἐ-Ἕ + 0x1f20, 0x1f27, 508, # ἠ-ἧ Ἠ-Ἧ + 0x1f30, 0x1f37, 508, # ἰ-ἷ Ἰ-Ἷ + 0x1f40, 0x1f45, 508, # ὀ-ὅ Ὀ-Ὅ + 0x1f60, 0x1f67, 508, # ὠ-ὧ Ὠ-Ὧ + 0x1f70, 0x1f71, 574, # ὰ-ά Ὰ-Ά + 0x1f72, 0x1f75, 586, # ὲ-ή Ὲ-Ή + 0x1f76, 0x1f77, 600, # ὶ-ί Ὶ-Ί + 0x1f78, 0x1f79, 628, # ὸ-ό Ὸ-Ό + 0x1f7a, 0x1f7b, 612, # ὺ-ύ Ὺ-Ύ + 0x1f7c, 0x1f7d, 626, # ὼ-ώ Ὼ-Ώ + 0x1f80, 0x1f87, 508, # ᾀ-ᾇ ᾈ-ᾏ + 0x1f90, 0x1f97, 508, # ᾐ-ᾗ ᾘ-ᾟ + 0x1fa0, 0x1fa7, 508, # ᾠ-ᾧ ᾨ-ᾯ + 0x1fb0, 0x1fb1, 508, # ᾰ-ᾱ Ᾰ-Ᾱ + 0x1fd0, 0x1fd1, 508, # ῐ-ῑ Ῐ-Ῑ + 0x1fe0, 0x1fe1, 508, # ῠ-ῡ Ῠ-Ῡ + 0x2170, 0x217f, 484, # ⅰ-ⅿ Ⅰ-Ⅿ + 0x24d0, 0x24e9, 474, # ⓐ-ⓩ Ⓐ-Ⓩ + 0xff41, 0xff5a, 468] # a-z A-Z + + toupperSinglets = [ + 0x00ff, 621, # ÿ Ÿ + 0x0101, 499, # ā Ā + 0x0103, 499, # ă Ă + 0x0105, 499, # ą Ą + 0x0107, 499, # ć Ć + 0x0109, 499, # ĉ Ĉ + 0x010b, 499, # ċ Ċ + 0x010d, 499, # č Č + 0x010f, 499, # ď Ď + 0x0111, 499, # đ Đ + 0x0113, 499, # ē Ē + 0x0115, 499, # ĕ Ĕ + 0x0117, 499, # ė Ė + 0x0119, 499, # ę Ę + 0x011b, 499, # ě Ě + 0x011d, 499, # ĝ Ĝ + 0x011f, 499, # ğ Ğ + 0x0121, 499, # ġ Ġ + 0x0123, 499, # ģ Ģ + 0x0125, 499, # ĥ Ĥ + 0x0127, 499, # ħ Ħ + 0x0129, 499, # ĩ Ĩ + 0x012b, 499, # ī Ī + 0x012d, 499, # ĭ Ĭ + 0x012f, 499, # į Į + 0x0131, 268, # ı I + 0x0133, 499, # ij IJ + 0x0135, 499, # ĵ Ĵ + 0x0137, 499, # ķ Ķ + 0x013a, 499, # ĺ Ĺ + 0x013c, 499, # ļ Ļ + 0x013e, 499, # ľ Ľ + 0x0140, 499, # ŀ Ŀ + 0x0142, 499, # ł Ł + 0x0144, 499, # ń Ń + 0x0146, 499, # ņ Ņ + 0x0148, 499, # ň Ň + 0x014b, 499, # ŋ Ŋ + 0x014d, 499, # ō Ō + 0x014f, 499, # ŏ Ŏ + 0x0151, 499, # ő Ő + 0x0153, 499, # œ Œ + 0x0155, 499, # ŕ Ŕ + 0x0157, 499, # ŗ Ŗ + 0x0159, 499, # ř Ř + 0x015b, 499, # ś Ś + 0x015d, 499, # ŝ Ŝ + 0x015f, 499, # ş Ş + 0x0161, 499, # š Š + 0x0163, 499, # ţ Ţ + 0x0165, 499, # ť Ť + 0x0167, 499, # ŧ Ŧ + 0x0169, 499, # ũ Ũ + 0x016b, 499, # ū Ū + 0x016d, 499, # ŭ Ŭ + 0x016f, 499, # ů Ů + 0x0171, 499, # ű Ű + 0x0173, 499, # ų Ų + 0x0175, 499, # ŵ Ŵ + 0x0177, 499, # ŷ Ŷ + 0x017a, 499, # ź Ź + 0x017c, 499, # ż Ż + 0x017e, 499, # ž Ž + 0x017f, 200, # ſ S + 0x0183, 499, # ƃ Ƃ + 0x0185, 499, # ƅ Ƅ + 0x0188, 499, # ƈ Ƈ + 0x018c, 499, # ƌ Ƌ + 0x0192, 499, # ƒ Ƒ + 0x0199, 499, # ƙ Ƙ + 0x01a1, 499, # ơ Ơ + 0x01a3, 499, # ƣ Ƣ + 0x01a5, 499, # ƥ Ƥ + 0x01a8, 499, # ƨ Ƨ + 0x01ad, 499, # ƭ Ƭ + 0x01b0, 499, # ư Ư + 0x01b4, 499, # ƴ Ƴ + 0x01b6, 499, # ƶ Ƶ + 0x01b9, 499, # ƹ Ƹ + 0x01bd, 499, # ƽ Ƽ + 0x01c5, 499, # Dž DŽ + 0x01c6, 498, # dž DŽ + 0x01c8, 499, # Lj LJ + 0x01c9, 498, # lj LJ + 0x01cb, 499, # Nj NJ + 0x01cc, 498, # nj NJ + 0x01ce, 499, # ǎ Ǎ + 0x01d0, 499, # ǐ Ǐ + 0x01d2, 499, # ǒ Ǒ + 0x01d4, 499, # ǔ Ǔ + 0x01d6, 499, # ǖ Ǖ + 0x01d8, 499, # ǘ Ǘ + 0x01da, 499, # ǚ Ǚ + 0x01dc, 499, # ǜ Ǜ + 0x01df, 499, # ǟ Ǟ + 0x01e1, 499, # ǡ Ǡ + 0x01e3, 499, # ǣ Ǣ + 0x01e5, 499, # ǥ Ǥ + 0x01e7, 499, # ǧ Ǧ + 0x01e9, 499, # ǩ Ǩ + 0x01eb, 499, # ǫ Ǫ + 0x01ed, 499, # ǭ Ǭ + 0x01ef, 499, # ǯ Ǯ + 0x01f2, 499, # Dz DZ + 0x01f3, 498, # dz DZ + 0x01f5, 499, # ǵ Ǵ + 0x01fb, 499, # ǻ Ǻ + 0x01fd, 499, # ǽ Ǽ + 0x01ff, 499, # ǿ Ǿ + 0x0201, 499, # ȁ Ȁ + 0x0203, 499, # ȃ Ȃ + 0x0205, 499, # ȅ Ȅ + 0x0207, 499, # ȇ Ȇ + 0x0209, 499, # ȉ Ȉ + 0x020b, 499, # ȋ Ȋ + 0x020d, 499, # ȍ Ȍ + 0x020f, 499, # ȏ Ȏ + 0x0211, 499, # ȑ Ȑ + 0x0213, 499, # ȓ Ȓ + 0x0215, 499, # ȕ Ȕ + 0x0217, 499, # ȗ Ȗ + 0x0253, 290, # ɓ Ɓ + 0x0254, 294, # ɔ Ɔ + 0x025b, 297, # ɛ Ɛ + 0x0260, 295, # ɠ Ɠ + 0x0263, 293, # ɣ Ɣ + 0x0268, 291, # ɨ Ɨ + 0x0269, 289, # ɩ Ɩ + 0x026f, 289, # ɯ Ɯ + 0x0272, 287, # ɲ Ɲ + 0x0283, 282, # ʃ Ʃ + 0x0288, 282, # ʈ Ʈ + 0x0292, 281, # ʒ Ʒ + 0x03ac, 462, # ά Ά + 0x03cc, 436, # ό Ό + 0x03d0, 438, # ϐ Β + 0x03d1, 443, # ϑ Θ + 0x03d5, 453, # ϕ Φ + 0x03d6, 446, # ϖ Π + 0x03e3, 499, # ϣ Ϣ + 0x03e5, 499, # ϥ Ϥ + 0x03e7, 499, # ϧ Ϧ + 0x03e9, 499, # ϩ Ϩ + 0x03eb, 499, # ϫ Ϫ + 0x03ed, 499, # ϭ Ϭ + 0x03ef, 499, # ϯ Ϯ + 0x03f0, 414, # ϰ Κ + 0x03f1, 420, # ϱ Ρ + 0x0461, 499, # ѡ Ѡ + 0x0463, 499, # ѣ Ѣ + 0x0465, 499, # ѥ Ѥ + 0x0467, 499, # ѧ Ѧ + 0x0469, 499, # ѩ Ѩ + 0x046b, 499, # ѫ Ѫ + 0x046d, 499, # ѭ Ѭ + 0x046f, 499, # ѯ Ѯ + 0x0471, 499, # ѱ Ѱ + 0x0473, 499, # ѳ Ѳ + 0x0475, 499, # ѵ Ѵ + 0x0477, 499, # ѷ Ѷ + 0x0479, 499, # ѹ Ѹ + 0x047b, 499, # ѻ Ѻ + 0x047d, 499, # ѽ Ѽ + 0x047f, 499, # ѿ Ѿ + 0x0481, 499, # ҁ Ҁ + 0x0491, 499, # ґ Ґ + 0x0493, 499, # ғ Ғ + 0x0495, 499, # ҕ Ҕ + 0x0497, 499, # җ Җ + 0x0499, 499, # ҙ Ҙ + 0x049b, 499, # қ Қ + 0x049d, 499, # ҝ Ҝ + 0x049f, 499, # ҟ Ҟ + 0x04a1, 499, # ҡ Ҡ + 0x04a3, 499, # ң Ң + 0x04a5, 499, # ҥ Ҥ + 0x04a7, 499, # ҧ Ҧ + 0x04a9, 499, # ҩ Ҩ + 0x04ab, 499, # ҫ Ҫ + 0x04ad, 499, # ҭ Ҭ + 0x04af, 499, # ү Ү + 0x04b1, 499, # ұ Ұ + 0x04b3, 499, # ҳ Ҳ + 0x04b5, 499, # ҵ Ҵ + 0x04b7, 499, # ҷ Ҷ + 0x04b9, 499, # ҹ Ҹ + 0x04bb, 499, # һ Һ + 0x04bd, 499, # ҽ Ҽ + 0x04bf, 499, # ҿ Ҿ + 0x04c2, 499, # ӂ Ӂ + 0x04c4, 499, # ӄ Ӄ + 0x04c8, 499, # ӈ Ӈ + 0x04cc, 499, # ӌ Ӌ + 0x04d1, 499, # ӑ Ӑ + 0x04d3, 499, # ӓ Ӓ + 0x04d5, 499, # ӕ Ӕ + 0x04d7, 499, # ӗ Ӗ + 0x04d9, 499, # ә Ә + 0x04db, 499, # ӛ Ӛ + 0x04dd, 499, # ӝ Ӝ + 0x04df, 499, # ӟ Ӟ + 0x04e1, 499, # ӡ Ӡ + 0x04e3, 499, # ӣ Ӣ + 0x04e5, 499, # ӥ Ӥ + 0x04e7, 499, # ӧ Ӧ + 0x04e9, 499, # ө Ө + 0x04eb, 499, # ӫ Ӫ + 0x04ef, 499, # ӯ Ӯ + 0x04f1, 499, # ӱ Ӱ + 0x04f3, 499, # ӳ Ӳ + 0x04f5, 499, # ӵ Ӵ + 0x04f9, 499, # ӹ Ӹ + 0x1e01, 499, # ḁ Ḁ + 0x1e03, 499, # ḃ Ḃ + 0x1e05, 499, # ḅ Ḅ + 0x1e07, 499, # ḇ Ḇ + 0x1e09, 499, # ḉ Ḉ + 0x1e0b, 499, # ḋ Ḋ + 0x1e0d, 499, # ḍ Ḍ + 0x1e0f, 499, # ḏ Ḏ + 0x1e11, 499, # ḑ Ḑ + 0x1e13, 499, # ḓ Ḓ + 0x1e15, 499, # ḕ Ḕ + 0x1e17, 499, # ḗ Ḗ + 0x1e19, 499, # ḙ Ḙ + 0x1e1b, 499, # ḛ Ḛ + 0x1e1d, 499, # ḝ Ḝ + 0x1e1f, 499, # ḟ Ḟ + 0x1e21, 499, # ḡ Ḡ + 0x1e23, 499, # ḣ Ḣ + 0x1e25, 499, # ḥ Ḥ + 0x1e27, 499, # ḧ Ḧ + 0x1e29, 499, # ḩ Ḩ + 0x1e2b, 499, # ḫ Ḫ + 0x1e2d, 499, # ḭ Ḭ + 0x1e2f, 499, # ḯ Ḯ + 0x1e31, 499, # ḱ Ḱ + 0x1e33, 499, # ḳ Ḳ + 0x1e35, 499, # ḵ Ḵ + 0x1e37, 499, # ḷ Ḷ + 0x1e39, 499, # ḹ Ḹ + 0x1e3b, 499, # ḻ Ḻ + 0x1e3d, 499, # ḽ Ḽ + 0x1e3f, 499, # ḿ Ḿ + 0x1e41, 499, # ṁ Ṁ + 0x1e43, 499, # ṃ Ṃ + 0x1e45, 499, # ṅ Ṅ + 0x1e47, 499, # ṇ Ṇ + 0x1e49, 499, # ṉ Ṉ + 0x1e4b, 499, # ṋ Ṋ + 0x1e4d, 499, # ṍ Ṍ + 0x1e4f, 499, # ṏ Ṏ + 0x1e51, 499, # ṑ Ṑ + 0x1e53, 499, # ṓ Ṓ + 0x1e55, 499, # ṕ Ṕ + 0x1e57, 499, # ṗ Ṗ + 0x1e59, 499, # ṙ Ṙ + 0x1e5b, 499, # ṛ Ṛ + 0x1e5d, 499, # ṝ Ṝ + 0x1e5f, 499, # ṟ Ṟ + 0x1e61, 499, # ṡ Ṡ + 0x1e63, 499, # ṣ Ṣ + 0x1e65, 499, # ṥ Ṥ + 0x1e67, 499, # ṧ Ṧ + 0x1e69, 499, # ṩ Ṩ + 0x1e6b, 499, # ṫ Ṫ + 0x1e6d, 499, # ṭ Ṭ + 0x1e6f, 499, # ṯ Ṯ + 0x1e71, 499, # ṱ Ṱ + 0x1e73, 499, # ṳ Ṳ + 0x1e75, 499, # ṵ Ṵ + 0x1e77, 499, # ṷ Ṷ + 0x1e79, 499, # ṹ Ṹ + 0x1e7b, 499, # ṻ Ṻ + 0x1e7d, 499, # ṽ Ṽ + 0x1e7f, 499, # ṿ Ṿ + 0x1e81, 499, # ẁ Ẁ + 0x1e83, 499, # ẃ Ẃ + 0x1e85, 499, # ẅ Ẅ + 0x1e87, 499, # ẇ Ẇ + 0x1e89, 499, # ẉ Ẉ + 0x1e8b, 499, # ẋ Ẋ + 0x1e8d, 499, # ẍ Ẍ + 0x1e8f, 499, # ẏ Ẏ + 0x1e91, 499, # ẑ Ẑ + 0x1e93, 499, # ẓ Ẓ + 0x1e95, 499, # ẕ Ẕ + 0x1ea1, 499, # ạ Ạ + 0x1ea3, 499, # ả Ả + 0x1ea5, 499, # ấ Ấ + 0x1ea7, 499, # ầ Ầ + 0x1ea9, 499, # ẩ Ẩ + 0x1eab, 499, # ẫ Ẫ + 0x1ead, 499, # ậ Ậ + 0x1eaf, 499, # ắ Ắ + 0x1eb1, 499, # ằ Ằ + 0x1eb3, 499, # ẳ Ẳ + 0x1eb5, 499, # ẵ Ẵ + 0x1eb7, 499, # ặ Ặ + 0x1eb9, 499, # ẹ Ẹ + 0x1ebb, 499, # ẻ Ẻ + 0x1ebd, 499, # ẽ Ẽ + 0x1ebf, 499, # ế Ế + 0x1ec1, 499, # ề Ề + 0x1ec3, 499, # ể Ể + 0x1ec5, 499, # ễ Ễ + 0x1ec7, 499, # ệ Ệ + 0x1ec9, 499, # ỉ Ỉ + 0x1ecb, 499, # ị Ị + 0x1ecd, 499, # ọ Ọ + 0x1ecf, 499, # ỏ Ỏ + 0x1ed1, 499, # ố Ố + 0x1ed3, 499, # ồ Ồ + 0x1ed5, 499, # ổ Ổ + 0x1ed7, 499, # ỗ Ỗ + 0x1ed9, 499, # ộ Ộ + 0x1edb, 499, # ớ Ớ + 0x1edd, 499, # ờ Ờ + 0x1edf, 499, # ở Ở + 0x1ee1, 499, # ỡ Ỡ + 0x1ee3, 499, # ợ Ợ + 0x1ee5, 499, # ụ Ụ + 0x1ee7, 499, # ủ Ủ + 0x1ee9, 499, # ứ Ứ + 0x1eeb, 499, # ừ Ừ + 0x1eed, 499, # ử Ử + 0x1eef, 499, # ữ Ữ + 0x1ef1, 499, # ự Ự + 0x1ef3, 499, # ỳ Ỳ + 0x1ef5, 499, # ỵ Ỵ + 0x1ef7, 499, # ỷ Ỷ + 0x1ef9, 499, # ỹ Ỹ + 0x1f51, 508, # ὑ Ὑ + 0x1f53, 508, # ὓ Ὓ + 0x1f55, 508, # ὕ Ὕ + 0x1f57, 508, # ὗ Ὗ + 0x1fb3, 509, # ᾳ ᾼ + 0x1fc3, 509, # ῃ ῌ + 0x1fe5, 507, # ῥ Ῥ + 0x1ff3, 509] # ῳ ῼ + + tolowerRanges = [ + 0x0041, 0x005a, 532, # A-Z a-z + 0x00c0, 0x00d6, 532, # À-Ö à-ö + 0x00d8, 0x00de, 532, # Ø-Þ ø-þ + 0x0189, 0x018a, 705, # Ɖ-Ɗ ɖ-ɗ + 0x018e, 0x018f, 702, # Ǝ-Ə ɘ-ə + 0x01b1, 0x01b2, 717, # Ʊ-Ʋ ʊ-ʋ + 0x0388, 0x038a, 537, # Έ-Ί έ-ί + 0x038e, 0x038f, 563, # Ύ-Ώ ύ-ώ + 0x0391, 0x03a1, 532, # Α-Ρ α-ρ + 0x03a3, 0x03ab, 532, # Σ-Ϋ σ-ϋ + 0x0401, 0x040c, 580, # Ё-Ќ ё-ќ + 0x040e, 0x040f, 580, # Ў-Џ ў-џ + 0x0410, 0x042f, 532, # А-Я а-я + 0x0531, 0x0556, 548, # Ա-Ֆ ա-ֆ + 0x10a0, 0x10c5, 548, # Ⴀ-Ⴥ ა-ჵ + 0x1f08, 0x1f0f, 492, # Ἀ-Ἇ ἀ-ἇ + 0x1f18, 0x1f1d, 492, # Ἐ-Ἕ ἐ-ἕ + 0x1f28, 0x1f2f, 492, # Ἠ-Ἧ ἠ-ἧ + 0x1f38, 0x1f3f, 492, # Ἰ-Ἷ ἰ-ἷ + 0x1f48, 0x1f4d, 492, # Ὀ-Ὅ ὀ-ὅ + 0x1f68, 0x1f6f, 492, # Ὠ-Ὧ ὠ-ὧ + 0x1f88, 0x1f8f, 492, # ᾈ-ᾏ ᾀ-ᾇ + 0x1f98, 0x1f9f, 492, # ᾘ-ᾟ ᾐ-ᾗ + 0x1fa8, 0x1faf, 492, # ᾨ-ᾯ ᾠ-ᾧ + 0x1fb8, 0x1fb9, 492, # Ᾰ-Ᾱ ᾰ-ᾱ + 0x1fba, 0x1fbb, 426, # Ὰ-Ά ὰ-ά + 0x1fc8, 0x1fcb, 414, # Ὲ-Ή ὲ-ή + 0x1fd8, 0x1fd9, 492, # Ῐ-Ῑ ῐ-ῑ + 0x1fda, 0x1fdb, 400, # Ὶ-Ί ὶ-ί + 0x1fe8, 0x1fe9, 492, # Ῠ-Ῡ ῠ-ῡ + 0x1fea, 0x1feb, 388, # Ὺ-Ύ ὺ-ύ + 0x1ff8, 0x1ff9, 372, # Ὸ-Ό ὸ-ό + 0x1ffa, 0x1ffb, 374, # Ὼ-Ώ ὼ-ώ + 0x2160, 0x216f, 516, # Ⅰ-Ⅿ ⅰ-ⅿ + 0x24b6, 0x24cf, 526, # Ⓐ-Ⓩ ⓐ-ⓩ + 0xff21, 0xff3a, 532] # A-Z a-z + + tolowerSinglets = [ + 0x0100, 501, # Ā ā + 0x0102, 501, # Ă ă + 0x0104, 501, # Ą ą + 0x0106, 501, # Ć ć + 0x0108, 501, # Ĉ ĉ + 0x010a, 501, # Ċ ċ + 0x010c, 501, # Č č + 0x010e, 501, # Ď ď + 0x0110, 501, # Đ đ + 0x0112, 501, # Ē ē + 0x0114, 501, # Ĕ ĕ + 0x0116, 501, # Ė ė + 0x0118, 501, # Ę ę + 0x011a, 501, # Ě ě + 0x011c, 501, # Ĝ ĝ + 0x011e, 501, # Ğ ğ + 0x0120, 501, # Ġ ġ + 0x0122, 501, # Ģ ģ + 0x0124, 501, # Ĥ ĥ + 0x0126, 501, # Ħ ħ + 0x0128, 501, # Ĩ ĩ + 0x012a, 501, # Ī ī + 0x012c, 501, # Ĭ ĭ + 0x012e, 501, # Į į + 0x0130, 301, # İ i + 0x0132, 501, # IJ ij + 0x0134, 501, # Ĵ ĵ + 0x0136, 501, # Ķ ķ + 0x0139, 501, # Ĺ ĺ + 0x013b, 501, # Ļ ļ + 0x013d, 501, # Ľ ľ + 0x013f, 501, # Ŀ ŀ + 0x0141, 501, # Ł ł + 0x0143, 501, # Ń ń + 0x0145, 501, # Ņ ņ + 0x0147, 501, # Ň ň + 0x014a, 501, # Ŋ ŋ + 0x014c, 501, # Ō ō + 0x014e, 501, # Ŏ ŏ + 0x0150, 501, # Ő ő + 0x0152, 501, # Œ œ + 0x0154, 501, # Ŕ ŕ + 0x0156, 501, # Ŗ ŗ + 0x0158, 501, # Ř ř + 0x015a, 501, # Ś ś + 0x015c, 501, # Ŝ ŝ + 0x015e, 501, # Ş ş + 0x0160, 501, # Š š + 0x0162, 501, # Ţ ţ + 0x0164, 501, # Ť ť + 0x0166, 501, # Ŧ ŧ + 0x0168, 501, # Ũ ũ + 0x016a, 501, # Ū ū + 0x016c, 501, # Ŭ ŭ + 0x016e, 501, # Ů ů + 0x0170, 501, # Ű ű + 0x0172, 501, # Ų ų + 0x0174, 501, # Ŵ ŵ + 0x0176, 501, # Ŷ ŷ + 0x0178, 379, # Ÿ ÿ + 0x0179, 501, # Ź ź + 0x017b, 501, # Ż ż + 0x017d, 501, # Ž ž + 0x0181, 710, # Ɓ ɓ + 0x0182, 501, # Ƃ ƃ + 0x0184, 501, # Ƅ ƅ + 0x0186, 706, # Ɔ ɔ + 0x0187, 501, # Ƈ ƈ + 0x018b, 501, # Ƌ ƌ + 0x0190, 703, # Ɛ ɛ + 0x0191, 501, # Ƒ ƒ + 0x0193, 705, # Ɠ ɠ + 0x0194, 707, # Ɣ ɣ + 0x0196, 711, # Ɩ ɩ + 0x0197, 709, # Ɨ ɨ + 0x0198, 501, # Ƙ ƙ + 0x019c, 711, # Ɯ ɯ + 0x019d, 713, # Ɲ ɲ + 0x01a0, 501, # Ơ ơ + 0x01a2, 501, # Ƣ ƣ + 0x01a4, 501, # Ƥ ƥ + 0x01a7, 501, # Ƨ ƨ + 0x01a9, 718, # Ʃ ʃ + 0x01ac, 501, # Ƭ ƭ + 0x01ae, 718, # Ʈ ʈ + 0x01af, 501, # Ư ư + 0x01b3, 501, # Ƴ ƴ + 0x01b5, 501, # Ƶ ƶ + 0x01b7, 719, # Ʒ ʒ + 0x01b8, 501, # Ƹ ƹ + 0x01bc, 501, # Ƽ ƽ + 0x01c4, 502, # DŽ dž + 0x01c5, 501, # Dž dž + 0x01c7, 502, # LJ lj + 0x01c8, 501, # Lj lj + 0x01ca, 502, # NJ nj + 0x01cb, 501, # Nj nj + 0x01cd, 501, # Ǎ ǎ + 0x01cf, 501, # Ǐ ǐ + 0x01d1, 501, # Ǒ ǒ + 0x01d3, 501, # Ǔ ǔ + 0x01d5, 501, # Ǖ ǖ + 0x01d7, 501, # Ǘ ǘ + 0x01d9, 501, # Ǚ ǚ + 0x01db, 501, # Ǜ ǜ + 0x01de, 501, # Ǟ ǟ + 0x01e0, 501, # Ǡ ǡ + 0x01e2, 501, # Ǣ ǣ + 0x01e4, 501, # Ǥ ǥ + 0x01e6, 501, # Ǧ ǧ + 0x01e8, 501, # Ǩ ǩ + 0x01ea, 501, # Ǫ ǫ + 0x01ec, 501, # Ǭ ǭ + 0x01ee, 501, # Ǯ ǯ + 0x01f1, 502, # DZ dz + 0x01f2, 501, # Dz dz + 0x01f4, 501, # Ǵ ǵ + 0x01fa, 501, # Ǻ ǻ + 0x01fc, 501, # Ǽ ǽ + 0x01fe, 501, # Ǿ ǿ + 0x0200, 501, # Ȁ ȁ + 0x0202, 501, # Ȃ ȃ + 0x0204, 501, # Ȅ ȅ + 0x0206, 501, # Ȇ ȇ + 0x0208, 501, # Ȉ ȉ + 0x020a, 501, # Ȋ ȋ + 0x020c, 501, # Ȍ ȍ + 0x020e, 501, # Ȏ ȏ + 0x0210, 501, # Ȑ ȑ + 0x0212, 501, # Ȓ ȓ + 0x0214, 501, # Ȕ ȕ + 0x0216, 501, # Ȗ ȗ + 0x0386, 538, # Ά ά + 0x038c, 564, # Ό ό + 0x03e2, 501, # Ϣ ϣ + 0x03e4, 501, # Ϥ ϥ + 0x03e6, 501, # Ϧ ϧ + 0x03e8, 501, # Ϩ ϩ + 0x03ea, 501, # Ϫ ϫ + 0x03ec, 501, # Ϭ ϭ + 0x03ee, 501, # Ϯ ϯ + 0x0460, 501, # Ѡ ѡ + 0x0462, 501, # Ѣ ѣ + 0x0464, 501, # Ѥ ѥ + 0x0466, 501, # Ѧ ѧ + 0x0468, 501, # Ѩ ѩ + 0x046a, 501, # Ѫ ѫ + 0x046c, 501, # Ѭ ѭ + 0x046e, 501, # Ѯ ѯ + 0x0470, 501, # Ѱ ѱ + 0x0472, 501, # Ѳ ѳ + 0x0474, 501, # Ѵ ѵ + 0x0476, 501, # Ѷ ѷ + 0x0478, 501, # Ѹ ѹ + 0x047a, 501, # Ѻ ѻ + 0x047c, 501, # Ѽ ѽ + 0x047e, 501, # Ѿ ѿ + 0x0480, 501, # Ҁ ҁ + 0x0490, 501, # Ґ ґ + 0x0492, 501, # Ғ ғ + 0x0494, 501, # Ҕ ҕ + 0x0496, 501, # Җ җ + 0x0498, 501, # Ҙ ҙ + 0x049a, 501, # Қ қ + 0x049c, 501, # Ҝ ҝ + 0x049e, 501, # Ҟ ҟ + 0x04a0, 501, # Ҡ ҡ + 0x04a2, 501, # Ң ң + 0x04a4, 501, # Ҥ ҥ + 0x04a6, 501, # Ҧ ҧ + 0x04a8, 501, # Ҩ ҩ + 0x04aa, 501, # Ҫ ҫ + 0x04ac, 501, # Ҭ ҭ + 0x04ae, 501, # Ү ү + 0x04b0, 501, # Ұ ұ + 0x04b2, 501, # Ҳ ҳ + 0x04b4, 501, # Ҵ ҵ + 0x04b6, 501, # Ҷ ҷ + 0x04b8, 501, # Ҹ ҹ + 0x04ba, 501, # Һ һ + 0x04bc, 501, # Ҽ ҽ + 0x04be, 501, # Ҿ ҿ + 0x04c1, 501, # Ӂ ӂ + 0x04c3, 501, # Ӄ ӄ + 0x04c7, 501, # Ӈ ӈ + 0x04cb, 501, # Ӌ ӌ + 0x04d0, 501, # Ӑ ӑ + 0x04d2, 501, # Ӓ ӓ + 0x04d4, 501, # Ӕ ӕ + 0x04d6, 501, # Ӗ ӗ + 0x04d8, 501, # Ә ә + 0x04da, 501, # Ӛ ӛ + 0x04dc, 501, # Ӝ ӝ + 0x04de, 501, # Ӟ ӟ + 0x04e0, 501, # Ӡ ӡ + 0x04e2, 501, # Ӣ ӣ + 0x04e4, 501, # Ӥ ӥ + 0x04e6, 501, # Ӧ ӧ + 0x04e8, 501, # Ө ө + 0x04ea, 501, # Ӫ ӫ + 0x04ee, 501, # Ӯ ӯ + 0x04f0, 501, # Ӱ ӱ + 0x04f2, 501, # Ӳ ӳ + 0x04f4, 501, # Ӵ ӵ + 0x04f8, 501, # Ӹ ӹ + 0x1e00, 501, # Ḁ ḁ + 0x1e02, 501, # Ḃ ḃ + 0x1e04, 501, # Ḅ ḅ + 0x1e06, 501, # Ḇ ḇ + 0x1e08, 501, # Ḉ ḉ + 0x1e0a, 501, # Ḋ ḋ + 0x1e0c, 501, # Ḍ ḍ + 0x1e0e, 501, # Ḏ ḏ + 0x1e10, 501, # Ḑ ḑ + 0x1e12, 501, # Ḓ ḓ + 0x1e14, 501, # Ḕ ḕ + 0x1e16, 501, # Ḗ ḗ + 0x1e18, 501, # Ḙ ḙ + 0x1e1a, 501, # Ḛ ḛ + 0x1e1c, 501, # Ḝ ḝ + 0x1e1e, 501, # Ḟ ḟ + 0x1e20, 501, # Ḡ ḡ + 0x1e22, 501, # Ḣ ḣ + 0x1e24, 501, # Ḥ ḥ + 0x1e26, 501, # Ḧ ḧ + 0x1e28, 501, # Ḩ ḩ + 0x1e2a, 501, # Ḫ ḫ + 0x1e2c, 501, # Ḭ ḭ + 0x1e2e, 501, # Ḯ ḯ + 0x1e30, 501, # Ḱ ḱ + 0x1e32, 501, # Ḳ ḳ + 0x1e34, 501, # Ḵ ḵ + 0x1e36, 501, # Ḷ ḷ + 0x1e38, 501, # Ḹ ḹ + 0x1e3a, 501, # Ḻ ḻ + 0x1e3c, 501, # Ḽ ḽ + 0x1e3e, 501, # Ḿ ḿ + 0x1e40, 501, # Ṁ ṁ + 0x1e42, 501, # Ṃ ṃ + 0x1e44, 501, # Ṅ ṅ + 0x1e46, 501, # Ṇ ṇ + 0x1e48, 501, # Ṉ ṉ + 0x1e4a, 501, # Ṋ ṋ + 0x1e4c, 501, # Ṍ ṍ + 0x1e4e, 501, # Ṏ ṏ + 0x1e50, 501, # Ṑ ṑ + 0x1e52, 501, # Ṓ ṓ + 0x1e54, 501, # Ṕ ṕ + 0x1e56, 501, # Ṗ ṗ + 0x1e58, 501, # Ṙ ṙ + 0x1e5a, 501, # Ṛ ṛ + 0x1e5c, 501, # Ṝ ṝ + 0x1e5e, 501, # Ṟ ṟ + 0x1e60, 501, # Ṡ ṡ + 0x1e62, 501, # Ṣ ṣ + 0x1e64, 501, # Ṥ ṥ + 0x1e66, 501, # Ṧ ṧ + 0x1e68, 501, # Ṩ ṩ + 0x1e6a, 501, # Ṫ ṫ + 0x1e6c, 501, # Ṭ ṭ + 0x1e6e, 501, # Ṯ ṯ + 0x1e70, 501, # Ṱ ṱ + 0x1e72, 501, # Ṳ ṳ + 0x1e74, 501, # Ṵ ṵ + 0x1e76, 501, # Ṷ ṷ + 0x1e78, 501, # Ṹ ṹ + 0x1e7a, 501, # Ṻ ṻ + 0x1e7c, 501, # Ṽ ṽ + 0x1e7e, 501, # Ṿ ṿ + 0x1e80, 501, # Ẁ ẁ + 0x1e82, 501, # Ẃ ẃ + 0x1e84, 501, # Ẅ ẅ + 0x1e86, 501, # Ẇ ẇ + 0x1e88, 501, # Ẉ ẉ + 0x1e8a, 501, # Ẋ ẋ + 0x1e8c, 501, # Ẍ ẍ + 0x1e8e, 501, # Ẏ ẏ + 0x1e90, 501, # Ẑ ẑ + 0x1e92, 501, # Ẓ ẓ + 0x1e94, 501, # Ẕ ẕ + 0x1ea0, 501, # Ạ ạ + 0x1ea2, 501, # Ả ả + 0x1ea4, 501, # Ấ ấ + 0x1ea6, 501, # Ầ ầ + 0x1ea8, 501, # Ẩ ẩ + 0x1eaa, 501, # Ẫ ẫ + 0x1eac, 501, # Ậ ậ + 0x1eae, 501, # Ắ ắ + 0x1eb0, 501, # Ằ ằ + 0x1eb2, 501, # Ẳ ẳ + 0x1eb4, 501, # Ẵ ẵ + 0x1eb6, 501, # Ặ ặ + 0x1eb8, 501, # Ẹ ẹ + 0x1eba, 501, # Ẻ ẻ + 0x1ebc, 501, # Ẽ ẽ + 0x1ebe, 501, # Ế ế + 0x1ec0, 501, # Ề ề + 0x1ec2, 501, # Ể ể + 0x1ec4, 501, # Ễ ễ + 0x1ec6, 501, # Ệ ệ + 0x1ec8, 501, # Ỉ ỉ + 0x1eca, 501, # Ị ị + 0x1ecc, 501, # Ọ ọ + 0x1ece, 501, # Ỏ ỏ + 0x1ed0, 501, # Ố ố + 0x1ed2, 501, # Ồ ồ + 0x1ed4, 501, # Ổ ổ + 0x1ed6, 501, # Ỗ ỗ + 0x1ed8, 501, # Ộ ộ + 0x1eda, 501, # Ớ ớ + 0x1edc, 501, # Ờ ờ + 0x1ede, 501, # Ở ở + 0x1ee0, 501, # Ỡ ỡ + 0x1ee2, 501, # Ợ ợ + 0x1ee4, 501, # Ụ ụ + 0x1ee6, 501, # Ủ ủ + 0x1ee8, 501, # Ứ ứ + 0x1eea, 501, # Ừ ừ + 0x1eec, 501, # Ử ử + 0x1eee, 501, # Ữ ữ + 0x1ef0, 501, # Ự ự + 0x1ef2, 501, # Ỳ ỳ + 0x1ef4, 501, # Ỵ ỵ + 0x1ef6, 501, # Ỷ ỷ + 0x1ef8, 501, # Ỹ ỹ + 0x1f59, 492, # Ὑ ὑ + 0x1f5b, 492, # Ὓ ὓ + 0x1f5d, 492, # Ὕ ὕ + 0x1f5f, 492, # Ὗ ὗ + 0x1fbc, 491, # ᾼ ᾳ + 0x1fcc, 491, # ῌ ῃ + 0x1fec, 493, # Ῥ ῥ + 0x1ffc, 491] # ῼ ῳ + + toTitleSinglets = [ + 0x01c4, 501, # DŽ Dž + 0x01c6, 499, # dž Dž + 0x01c7, 501, # LJ Lj + 0x01c9, 499, # lj Lj + 0x01ca, 501, # NJ Nj + 0x01cc, 499, # nj Nj + 0x01f1, 501, # DZ Dz + 0x01f3, 499] # dz Dz + +proc binarySearch(c: irune, tab: openArray[iRune], len, stride: int): int = + var n = len + var t = 0 + while n > 1: + var m = n div 2 + var p = t + m*stride + if c >= tab[p]: + t = p + n = n-m + else: + n = m + if n != 0 and c >= tab[t]: + return t + return -1 + +proc toLower*(c: TRune): TRune = + ## Converts `c` into lower case. This works for any Unicode character. + ## If possible, prefer `toLower` over `toUpper`. + var c = irune(c) + var p = binarySearch(c, tolowerRanges, len(toLowerRanges) div 3, 3) + if p >= 0 and c >= tolowerRanges[p] and c <= tolowerRanges[p+1]: + return TRune(c + tolowerRanges[p+2] - 500) + p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2) + if p >= 0 and c == toLowerSinglets[p]: + return TRune(c + toLowerSinglets[p+1] - 500) + return TRune(c) + +proc toUpper*(c: TRune): TRune = + ## Converts `c` into upper case. This works for any Unicode character. + ## If possible, prefer `toLower` over `toUpper`. + var c = irune(c) + var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3) + if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]: + return TRune(c + toUpperRanges[p+2] - 500) + p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2) + if p >= 0 and c == toUpperSinglets[p]: + return TRune(c + toUpperSinglets[p+1] - 500) + return TRune(c) + +proc toTitle*(c: TRune): TRune = + var c = irune(c) + var p = binarySearch(c, toTitleSinglets, len(toTitleSinglets) div 2, 2) + if p >= 0 and c == toTitleSinglets[p]: + return TRune(c + toTitleSinglets[p+1] - 500) + return TRune(c) + +proc isLower*(c: TRune): bool = + ## returns true iff `c` is a lower case Unicode character + ## If possible, prefer `isLower` over `isUpper`. + var c = irune(c) + # Note: toUpperRanges is correct here! + var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3) + if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]: + return true + p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2) + if p >= 0 and c == toUpperSinglets[p]: + return true + +proc isUpper*(c: TRune): bool = + ## returns true iff `c` is a upper case Unicode character + ## If possible, prefer `isLower` over `isUpper`. + var c = irune(c) + # Note: toLowerRanges is correct here! + var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3) + if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]: + return true + p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2) + if p >= 0 and c == toLowerSinglets[p]: + return true + +proc isAlpha*(c: TRune): bool = + ## returns true iff `c` is an *alpha* Unicode character (i.e. a letter) + if isUpper(c) or isLower(c): + return true + var c = irune(c) + var p = binarySearch(c, alphaRanges, len(alphaRanges) div 2, 2) + if p >= 0 and c >= alphaRanges[p] and c <= alphaRanges[p+1]: + return true + p = binarySearch(c, alphaSinglets, len(alphaSinglets), 1) + if p >= 0 and c == alphaSinglets[p]: + return true + +proc isTitle*(c: TRune): bool = + return isUpper(c) and isLower(c) + +proc isWhiteSpace*(c: TRune): bool = + ## returns true iff `c` is a Unicode whitespace character + var c = irune(c) + var p = binarySearch(c, spaceRanges, len(spaceRanges) div 2, 2) + if p >= 0 and c >= spaceRanges[p] and c <= spaceRanges[p+1]: + return true + +iterator runes*(s: string): TRune = + ## iterates over any unicode character of the string `s`. + var + i = 0 + result: TRune + while i < len(s): + fastRuneAt(s, i, result, true) + yield result + +proc cmpRunesIgnoreCase*(a, b: string): int = + ## compares two UTF8 strings and ignores the case. Returns: + ## + ## | 0 iff a == b + ## | < 0 iff a < b + ## | > 0 iff a > b + var i = 0 + var j = 0 + var ar, br: TRune + while i < a.len and j < b.len: + # slow path: + fastRuneAt(a, i, ar) + fastRuneAt(b, j, br) + result = irune(toLower(ar)) - irune(toLower(br)) + if result != 0: return + result = a.len - b.len + diff --git a/nimlib/pure/variants.nim b/nimlib/pure/variants.nim new file mode 100755 index 000000000..f661f81a6 --- /dev/null +++ b/nimlib/pure/variants.nim @@ -0,0 +1,181 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements Nimrod's support for the ``variant`` datatype. +## `TVariant` shows how the flexibility of dynamic typing is achieved +## within a static type system. + +type + TVarType* = enum + vtNone, + vtBool, + vtChar, + vtEnum, + vtInt, + vtFloat, + vtString, + vtSet, + vtSeq, + vtDict + TVariant* {.final.} = object of TObject + case vtype: TVarType + of vtNone: nil + of vtBool, vtChar, vtEnum, vtInt: vint: int64 + of vtFloat: vfloat: float64 + of vtString: vstring: string + of vtSet, vtSeq: q: seq[TVariant] + of vtDict: d: seq[tuple[key, val: TVariant]] + +iterator objectFields*[T](x: T, skipInherited: bool): tuple[ + key: string, val: TVariant] {.magic: "ObjectFields"} + +proc `<>`*(x: ordinal): TVariant = + result.kind = vtEnum + result.vint = x + +proc `<>`*(x: biggestInt): TVariant = + result.kind = vtInt + result.vint = x + +proc `<>`*(x: char): TVariant = + result.kind = vtChar + result.vint = ord(x) + +proc `<>`*(x: bool): TVariant = + result.kind = vtBool + result.vint = ord(x) + +proc `<>`*(x: biggestFloat): TVariant = + result.kind = vtFloat + result.vfloat = x + +proc `<>`*(x: string): TVariant = + result.kind = vtString + result.vstring = x + +proc `<>`*[T](x: openArray[T]): TVariant = + result.kind = vtSeq + newSeq(result.q, x.len) + for i in 0..x.len-1: result.q[i] = <>x[i] + +proc `<>`*[T](x: set[T]): TVariant = + result.kind = vtSet + result.q = @[] + for a in items(x): result.q.add(<>a) + +proc `<>`* [T: object](x: T): TVariant {.magic: "ToVariant".} + ## this converts a value to a variant ("boxing") + +proc `><`*[T](v: TVariant, typ: T): T {.magic: "FromVariant".} + +[<>5, <>67, <>"hallo"] +myVar><int + + +proc `==`* (x, y: TVariant): bool = + if x.vtype == y.vtype: + case x.vtype + of vtNone: result = true + of vtBool, vtChar, vtEnum, vtInt: result = x.vint == y.vint + of vtFloat: result = x.vfloat == y.vfloat + of vtString: result = x.vstring == y.vstring + of vtSet: + # complicated! We check that each a in x also occurs in y and that the + # counts are identical: + if x.q.len == y.q.len: + for a in items(x.q): + block inner: + for b in items(y.q): + if a == b: break inner + return false + result = true + of vtSeq: + if x.q.len == y.q.len: + for i in 0..x.q.len-1: + if x.q[i] != y.q[i]: return false + result = true + of vtDict: + # it is an ordered dict: + if x.d.len == y.d.len: + for i in 0..x.d.len-1: + if x.d[i].key != y.d[i].key: return false + if x.d[i].val != y.d[i].val: return false + result = true + +proc `[]`* (a, b: TVariant): TVariant = + case a.vtype + of vtSeq: + if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: + result = a.q[b.vint] + else: + variantError() + of vtDict: + for i in 0..a.d.len-1: + if a.d[i].key == b: return a.d[i].val + if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: + result = a.d[b.vint].val + variantError() + else: variantError() + +proc `[]=`* (a, b, c: TVariant) = + case a.vtype + of vtSeq: + if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: + a.q[b.vint] = b + else: + variantError() + of vtDict: + for i in 0..a.d.len-1: + if a.d[i].key == b: + a.d[i].val = c + return + if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: + a.d[b.vint].val = c + variantError() + else: variantError() + +proc `[]`* (a: TVariant, b: int): TVariant {.inline} = return a[<>b] +proc `[]`* (a: TVariant, b: string): TVariant {.inline} = return a[<>b] +proc `[]=`* (a: TVariant, b: int, c: TVariant) {.inline} = a[<>b] = c +proc `[]=`* (a: TVariant, b: string, c: TVariant) {.inline} = a[<>b] = c + +proc `+`* (x, y: TVariant): TVariant = + case x.vtype + of vtBool, vtChar, vtEnum, vtInt: + if y.vtype == x.vtype: + result.vtype = x.vtype + result.vint = x.vint + y.vint + else: + case y.vtype + of vtBool, vtChar, vtEnum, vtInt: + + + + vint: int64 + of vtFloat: vfloat: float64 + of vtString: vstring: string + of vtSet, vtSeq: q: seq[TVariant] + of vtDict: d: seq[tuple[key, val: TVariant]] + +proc `-`* (x, y: TVariant): TVariant +proc `*`* (x, y: TVariant): TVariant +proc `/`* (x, y: TVariant): TVariant +proc `div`* (x, y: TVariant): TVariant +proc `mod`* (x, y: TVariant): TVariant +proc `&`* (x, y: TVariant): TVariant +proc `$`* (x: TVariant): string = + # uses JS notation + +proc parseVariant*(s: string): TVariant +proc `<`* (x, y: TVariant): bool +proc `<=`* (x, y: TVariant): bool + +proc hash*(x: TVariant): int = + + diff --git a/nimlib/pure/xmlgen.nim b/nimlib/pure/xmlgen.nim new file mode 100755 index 000000000..79a782252 --- /dev/null +++ b/nimlib/pure/xmlgen.nim @@ -0,0 +1,406 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a simple `XML`:idx: and `HTML`:idx: code +## generator. Each commonly used HTML tag has a corresponding macro +## that generates a string with its HTML representation. +## +## Example: +## +## .. code-block:: nimrod +## var nim = "Nimrod" +## echo h1(a(href="http://force7.de/nimrod", nim)) +## +## Writes the string:: +## +## <h1><a href="http://force7.de/nimrod">Nimrod</a></h1> +## + +import + macros, strutils + +const + coreAttr* = " id class title style " + eventAttr* = " onclick ondblclick onmousedown onmouseup " & + "onmouseover onmousemove onmouseout onkeypress onkeydown onkeyup " + commonAttr* = coreAttr & eventAttr + +proc getIdent(e: PNimrodNode): string {.compileTime.} = + case e.kind + of nnkIdent: result = normalize($e.ident) + of nnkAccQuoted: result = getIdent(e[0]) + else: error("cannot extract identifier from node: " & toStrLit(e).strVal) + +proc delete[T](s: var seq[T], attr: T): bool = + var idx = find(s, attr) + if idx >= 0: + var L = s.len + s[idx] = s[L-1] + setLen(s, L-1) + result = true + +proc xmlCheckedTag*(e: PNimrodNode, tag: string, + optAttr = "", reqAttr = "", + isLeaf = false): PNimrodNode {.compileTime.} = + ## use this procedure to define a new XML tag + + # copy the attributes; when iterating over them these lists + # will be modified, so that each attribute is only given one value + var req = splitSeq(reqAttr) + var opt = splitSeq(optAttr) + result = newNimNode(nnkBracket, e) + result.add(newStrLitNode("<")) + result.add(newStrLitNode(tag)) + # first pass over attributes: + for i in 1..e.len-1: + if e[i].kind == nnkExprEqExpr: + var name = getIdent(e[i][0]) + if delete(req, name) or delete(opt, name): + result.add(newStrLitNode(" ")) + result.add(newStrLitNode(name)) + result.add(newStrLitNode("=\"")) + result.add(e[i][1]) + result.add(newStrLitNode("\"")) + else: + error("invalid attribute for '" & tag & "' element: " & name) + # check each required attribute exists: + if req.len > 0: + error(req[0] & " attribute for '" & tag & "' element expected") + if isLeaf: + for i in 1..e.len-1: + if e[i].kind != nnkExprEqExpr: + error("element " & tag & " cannot be nested") + result.add(newStrLitNode(" />")) + else: + result.add(newStrLitNode(">")) + # second pass over elements: + for i in 1..e.len-1: + if e[i].kind != nnkExprEqExpr: result.add(e[i]) + result.add(newStrLitNode("</")) + result.add(newStrLitNode(tag)) + result.add(newStrLitNode(">")) + result = NestList(!"&", result) + + +macro a*(e: expr): expr = + ## generates the HTML ``a`` element. + result = xmlCheckedTag(e, "a", "href charset type hreflang rel rev " & + "accesskey tabindex" & commonAttr) + +macro acronym*(e: expr): expr = + ## generates the HTML ``acronym`` element. + result = xmlCheckedTag(e, "acronym", commonAttr) + +macro address*(e: expr): expr = + ## generates the HTML ``address`` element. + result = xmlCheckedTag(e, "address", commonAttr) + +macro area*(e: expr): expr = + ## generates the HTML ``area`` element. + result = xmlCheckedTag(e, "area", "shape coords href nohref" & + " accesskey tabindex" & commonAttr, "alt", true) + +macro b*(e: expr): expr = + ## generates the HTML ``b`` element. + result = xmlCheckedTag(e, "b", commonAttr) + +macro base*(e: expr): expr = + ## generates the HTML ``base`` element. + result = xmlCheckedTag(e, "base", "", "href", true) + +macro big*(e: expr): expr = + ## generates the HTML ``big`` element. + result = xmlCheckedTag(e, "big", commonAttr) + +macro blockquote*(e: expr): expr = + ## generates the HTML ``blockquote`` element. + result = xmlCheckedTag(e, "blockquote", " cite" & commonAttr) + +macro body*(e: expr): expr = + ## generates the HTML ``body`` element. + result = xmlCheckedTag(e, "body", commonAttr) + +macro br*(e: expr): expr = + ## generates the HTML ``br`` element. + result = xmlCheckedTag(e, "br", "", "", true) + +macro button*(e: expr): expr = + ## generates the HTML ``button`` element. + result = xmlCheckedTag(e, "button", "accesskey tabindex " & + "disabled name type value" & commonAttr) + +macro caption*(e: expr): expr = + ## generates the HTML ``caption`` element. + result = xmlCheckedTag(e, "caption", commonAttr) + +macro cite*(e: expr): expr = + ## generates the HTML ``cite`` element. + result = xmlCheckedTag(e, "cite", commonAttr) + +macro code*(e: expr): expr = + ## generates the HTML ``code`` element. + result = xmlCheckedTag(e, "code", commonAttr) + +macro col*(e: expr): expr = + ## generates the HTML ``col`` element. + result = xmlCheckedTag(e, "col", "span align valign" & commonAttr, "", true) + +macro colgroup*(e: expr): expr = + ## generates the HTML ``colgroup`` element. + result = xmlCheckedTag(e, "colgroup", "span align valign" & commonAttr) + +macro dd*(e: expr): expr = + ## generates the HTML ``dd`` element. + result = xmlCheckedTag(e, "dd", commonAttr) + +macro del*(e: expr): expr = + ## generates the HTML ``del`` element. + result = xmlCheckedTag(e, "del", "cite datetime" & commonAttr) + +macro dfn*(e: expr): expr = + ## generates the HTML ``dfn`` element. + result = xmlCheckedTag(e, "dfn", commonAttr) + +macro `div`*(e: expr): expr = + ## generates the HTML ``div`` element. + result = xmlCheckedTag(e, "div", commonAttr) + +macro dl*(e: expr): expr = + ## generates the HTML ``dl`` element. + result = xmlCheckedTag(e, "dl", commonAttr) + +macro dt*(e: expr): expr = + ## generates the HTML ``dt`` element. + result = xmlCheckedTag(e, "dt", commonAttr) + +macro em*(e: expr): expr = + ## generates the HTML ``em`` element. + result = xmlCheckedTag(e, "em", commonAttr) + +macro fieldset*(e: expr): expr = + ## generates the HTML ``fieldset`` element. + result = xmlCheckedTag(e, "fieldset", commonAttr) + +macro form*(e: expr): expr = + ## generates the HTML ``form`` element. + result = xmlCheckedTag(e, "form", "method encype accept accept-charset" & + commonAttr, "action") + +macro h1*(e: expr): expr = + ## generates the HTML ``h1`` element. + result = xmlCheckedTag(e, "h1", commonAttr) + +macro h2*(e: expr): expr = + ## generates the HTML ``h2`` element. + result = xmlCheckedTag(e, "h2", commonAttr) + +macro h3*(e: expr): expr = + ## generates the HTML ``h3`` element. + result = xmlCheckedTag(e, "h3", commonAttr) + +macro h4*(e: expr): expr = + ## generates the HTML ``h4`` element. + result = xmlCheckedTag(e, "h4", commonAttr) + +macro h5*(e: expr): expr = + ## generates the HTML ``h5`` element. + result = xmlCheckedTag(e, "h5", commonAttr) + +macro h6*(e: expr): expr = + ## generates the HTML ``h6`` element. + result = xmlCheckedTag(e, "h6", commonAttr) + +macro head*(e: expr): expr = + ## generates the HTML ``head`` element. + result = xmlCheckedTag(e, "head", "profile") + +macro html*(e: expr): expr = + ## generates the HTML ``html`` element. + result = xmlCheckedTag(e, "html", "", "xmlns") + +macro hr*(e: expr): expr = + ## generates the HTML ``hr`` element. + result = xmlCheckedTag(e, "hr", commonAttr, "", true) + +macro i*(e: expr): expr = + ## generates the HTML ``i`` element. + result = xmlCheckedTag(e, "i", commonAttr) + +macro img*(e: expr): expr = + ## generates the HTML ``img`` element. + result = xmlCheckedTag(e, "img", "longdesc height width", "src alt", true) + +macro input*(e: expr): expr = + ## generates the HTML ``input`` element. + result = xmlCheckedTag(e, "input", "name type value checked maxlength src" & + " alt accept disabled readonly accesskey tabindex" & commonAttr, "", true) + +macro ins*(e: expr): expr = + ## generates the HTML ``ins`` element. + result = xmlCheckedTag(e, "ins", "cite datetime" & commonAttr) + +macro kbd*(e: expr): expr = + ## generates the HTML ``kbd`` element. + result = xmlCheckedTag(e, "kbd", commonAttr) + +macro label*(e: expr): expr = + ## generates the HTML ``label`` element. + result = xmlCheckedTag(e, "label", "for accesskey" & commonAttr) + +macro legend*(e: expr): expr = + ## generates the HTML ``legend`` element. + result = xmlCheckedTag(e, "legend", "accesskey" & commonAttr) + +macro li*(e: expr): expr = + ## generates the HTML ``li`` element. + result = xmlCheckedTag(e, "li", commonAttr) + +macro link*(e: expr): expr = + ## generates the HTML ``link`` element. + result = xmlCheckedTag(e, "link", "href charset hreflang type rel rev media" & + commonAttr, "", true) + +macro map*(e: expr): expr = + ## generates the HTML ``map`` element. + result = xmlCheckedTag(e, "map", "class title" & eventAttr, "id", false) + +macro meta*(e: expr): expr = + ## generates the HTML ``meta`` element. + result = xmlCheckedTag(e, "meta", "name http-equiv scheme", "content", true) + +macro noscript*(e: expr): expr = + ## generates the HTML ``noscript`` element. + result = xmlCheckedTag(e, "noscript", commonAttr) + +macro `object`*(e: expr): expr = + ## generates the HTML ``object`` element. + result = xmlCheckedTag(e, "object", "classid data codebase declare type " & + "codetype archive standby width height name tabindex" & commonAttr) + +macro ol*(e: expr): expr = + ## generates the HTML ``ol`` element. + result = xmlCheckedTag(e, "ol", commonAttr) + +macro optgroup*(e: expr): expr = + ## generates the HTML ``optgroup`` element. + result = xmlCheckedTag(e, "optgroup", "disabled" & commonAttr, "label", false) + +macro option*(e: expr): expr = + ## generates the HTML ``option`` element. + result = xmlCheckedTag(e, "option", "selected value" & commonAttr) + +macro p*(e: expr): expr = + ## generates the HTML ``p`` element. + result = xmlCheckedTag(e, "p", commonAttr) + +macro param*(e: expr): expr = + ## generates the HTML ``param`` element. + result = xmlCheckedTag(e, "param", "value id type valuetype", "name", true) + +macro pre*(e: expr): expr = + ## generates the HTML ``pre`` element. + result = xmlCheckedTag(e, "pre", commonAttr) + +macro q*(e: expr): expr = + ## generates the HTML ``q`` element. + result = xmlCheckedTag(e, "q", "cite" & commonAttr) + +macro samp*(e: expr): expr = + ## generates the HTML ``samp`` element. + result = xmlCheckedTag(e, "samp", commonAttr) + +macro script*(e: expr): expr = + ## generates the HTML ``script`` element. + result = xmlCheckedTag(e, "script", "src charset defer", "type", false) + +macro select*(e: expr): expr = + ## generates the HTML ``select`` element. + result = xmlCheckedTag(e, "select", "name size multiple disabled tabindex" & + commonAttr) + +macro small*(e: expr): expr = + ## generates the HTML ``small`` element. + result = xmlCheckedTag(e, "small", commonAttr) + +macro span*(e: expr): expr = + ## generates the HTML ``span`` element. + result = xmlCheckedTag(e, "span", commonAttr) + +macro strong*(e: expr): expr = + ## generates the HTML ``strong`` element. + result = xmlCheckedTag(e, "strong", commonAttr) + +macro style*(e: expr): expr = + ## generates the HTML ``style`` element. + result = xmlCheckedTag(e, "style", "media title", "type") + +macro sub*(e: expr): expr = + ## generates the HTML ``sub`` element. + result = xmlCheckedTag(e, "sub", commonAttr) + +macro sup*(e: expr): expr = + ## generates the HTML ``sup`` element. + result = xmlCheckedTag(e, "sup", commonAttr) + +macro table*(e: expr): expr = + ## generates the HTML ``table`` element. + result = xmlCheckedTag(e, "table", "summary border cellpadding cellspacing" & + " frame rules width" & commonAttr) + +macro tbody*(e: expr): expr = + ## generates the HTML ``tbody`` element. + result = xmlCheckedTag(e, "tbody", "align valign" & commonAttr) + +macro td*(e: expr): expr = + ## generates the HTML ``td`` element. + result = xmlCheckedTag(e, "td", "colspan rowspan abbr axis headers scope" & + " align valign" & commonAttr) + +macro textarea*(e: expr): expr = + ## generates the HTML ``textarea`` element. + result = xmlCheckedTag(e, "textarea", " name disabled readonly accesskey" & + " tabindex" & commonAttr, "rows cols", false) + +macro tfoot*(e: expr): expr = + ## generates the HTML ``tfoot`` element. + result = xmlCheckedTag(e, "tfoot", "align valign" & commonAttr) + +macro th*(e: expr): expr = + ## generates the HTML ``th`` element. + result = xmlCheckedTag(e, "th", "colspan rowspan abbr axis headers scope" & + " align valign" & commonAttr) + +macro thead*(e: expr): expr = + ## generates the HTML ``thead`` element. + result = xmlCheckedTag(e, "thead", "align valign" & commonAttr) + +macro title*(e: expr): expr = + ## generates the HTML ``title`` element. + result = xmlCheckedTag(e, "title") + +macro tr*(e: expr): expr = + ## generates the HTML ``tr`` element. + result = xmlCheckedTag(e, "tr", "align valign" & commonAttr) + +macro tt*(e: expr): expr = + ## generates the HTML ``tt`` element. + result = xmlCheckedTag(e, "tt", commonAttr) + +macro ul*(e: expr): expr = + ## generates the HTML ``ul`` element. + result = xmlCheckedTag(e, "ul", commonAttr) + +macro `var`*(e: expr): expr = + ## generates the HTML ``var`` element. + result = xmlCheckedTag(e, "var", commonAttr) + +when isMainModule: + var nim = "Nimrod" + echo h1(a(href="http://force7.de/nimrod", nim)) + diff --git a/nimlib/readme.txt b/nimlib/readme.txt new file mode 100755 index 000000000..2b2c4a03c --- /dev/null +++ b/nimlib/readme.txt @@ -0,0 +1,2 @@ +This directory contains a fixed system library and some other libraries for +bootstrapping Nimrod with Nim, the old Pascal version of the compiler. diff --git a/nimlib/system.nim b/nimlib/system.nim new file mode 100755 index 000000000..174d739cd --- /dev/null +++ b/nimlib/system.nim @@ -0,0 +1,1531 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## The compiler depends on the System module to work properly and the System +## module depends on the compiler. Most of the routines listed here use +## special compiler magic. +## Each module implicitly imports the System module; it may not be listed +## explicitly. Because of this there cannot be a user-defined module named +## ``system``. + +{.push hints: off.} + +type + int* {.magic: Int.} ## default integer type; bitwidth depends on + ## architecture, but is always the same as a pointer + int8* {.magic: Int8.} ## signed 8 bit integer type + int16* {.magic: Int16.} ## signed 16 bit integer type + int32* {.magic: Int32.} ## signed 32 bit integer type + int64* {.magic: Int64.} ## signed 64 bit integer type + float* {.magic: Float.} ## default floating point type + float32* {.magic: Float32.} ## 32 bit floating point type + float64* {.magic: Float64.} ## 64 bit floating point type +type # we need to start a new type section here, so that ``0`` can have a type + bool* {.magic: Bool.} = enum ## built-in boolean type + false = 0, true = 1 + +type + char* {.magic: Char.} ## built-in 8 bit character type (unsigned) + string* {.magic: String.} ## built-in string type + cstring* {.magic: Cstring.} ## built-in cstring (*compatible string*) type + pointer* {.magic: Pointer.} ## built-in pointer type + Ordinal* {.magic: Ordinal.}[T] + +type + `nil` {.magic: "Nil".} + expr* {.magic: Expr.} ## meta type to denote an expression (for templates) + stmt* {.magic: Stmt.} ## meta type to denote a statement (for templates) + typeDesc* {.magic: TypeDesc.} ## meta type to denote + ## a type description (for templates) + +proc defined*[T](x: T): bool {.magic: "Defined", noSideEffect.} + ## Special comile-time procedure that checks whether `x` is + ## defined. `x` has to be an identifier or a qualified identifier. + ## This can be used to check whether a library provides a certain + ## feature or not: + ## + ## .. code-block:: Nimrod + ## when not defined(strutils.toUpper): + ## # provide our own toUpper proc here, because strutils is + ## # missing it. + +proc definedInScope*[T](x: T): bool {. + magic: "DefinedInScope", noSideEffect.} + ## Special comile-time procedure that checks whether `x` is + ## defined in the current scope. `x` has to be an identifier. + +# these require compiler magic: +proc `not` *(x: bool): bool {.magic: "Not", noSideEffect.} + ## Boolean not; returns true iff ``x == false``. + +proc `and`*(x, y: bool): bool {.magic: "And", noSideEffect.} + ## Boolean ``and``; returns true iff ``x == y == true``. + ## Evaluation is short-circuited: This means that if ``x`` is false, + ## ``y`` will not even be evaluated. +proc `or`*(x, y: bool): bool {.magic: "Or", noSideEffect.} + ## Boolean ``or``; returns true iff ``not (not x and not y)``. + ## Evaluation is short-circuited: This means that if ``x`` is true, + ## ``y`` will not even be evaluated. +proc `xor`*(x, y: bool): bool {.magic: "Xor", noSideEffect.} + ## Boolean `exclusive or`; returns true iff ``x != y``. + +proc new*[T](a: var ref T) {.magic: "New", noSideEffect.} + ## creates a new object of type ``T`` and returns a safe (traced) + ## reference to it in ``a``. + +proc new*[T](a: var ref T, finalizer: proc (x: ref T)) {. + magic: "NewFinalize", noSideEffect.} + ## creates a new object of type ``T`` and returns a safe (traced) + ## reference to it in ``a``. When the garbage collector frees the object, + ## `finalizer` is called. The `finalizer` may not keep a reference to the + ## object pointed to by `x`. The `finalizer` cannot prevent the GC from + ## freeing the object. Note: The `finalizer` refers to the type `T`, not to + ## the object! This means that for each object of type `T` the finalizer + ## will be called! + +# for low and high the return type T may not be correct, but +# we handle that with compiler magic in SemLowHigh() +proc high*[T](x: T): T {.magic: "High", noSideEffect.} + ## returns the highest possible index of an array, a sequence, a string or + ## the highest possible value of an ordinal value `x`. As a special + ## semantic rule, `x` may also be a type identifier. + +proc low*[T](x: T): T {.magic: "Low", noSideEffect.} + ## returns the lowest possible index of an array, a sequence, a string or + ## the lowest possible value of an ordinal value `x`. As a special + ## semantic rule, `x` may also be a type identifier. + +type + range*{.magic: "Range".} [T] ## Generic type to construct range types. + array*{.magic: "Array".}[I, T] ## Generic type to construct + ## fixed-length arrays. + openarray*{.magic: "OpenArray".}[T] ## Generic type to construct open arrays. + ## Open arrays are implemented as a + ## pointer to the array data and a + ## length field. + seq*{.magic: "Seq".}[T] ## Generic type to construct sequences. + set*{.magic: "Set".}[T] ## Generic type to construct bit sets. + +when not defined(EcmaScript) and not defined(NimrodVM): + type + TGenericSeq {.compilerproc, pure.} = object + len, space: int + PGenericSeq {.exportc.} = ptr TGenericSeq + # len and space without counting the terminating zero: + NimStringDesc {.compilerproc, final.} = object of TGenericSeq + data: array[0..100_000_000, char] + NimString = ptr NimStringDesc + + include "system/hti" + +type + Byte* = Int8 ## this is an alias for ``int8``, that is a signed + ## int 8 bits wide. + + Natural* = range[0..high(int)] + ## is an int type ranging from zero to the maximum value + ## of an int. This type is often useful for documentation and debugging. + + Positive* = range[1..high(int)] + ## is an int type ranging from one to the maximum value + ## of an int. This type is often useful for documentation and debugging. + + TObject* {.exportc: "TNimObject".} = + object ## the root of Nimrod's object hierarchy. Objects should + ## inherit from TObject or one of its descendants. However, + ## objects that have no ancestor are allowed. + PObject* = ref TObject ## reference to TObject + + E_Base* {.compilerproc.} = object of TObject ## base exception class; + ## each exception has to + ## inherit from `E_Base`. + name*: cstring ## The exception's name is its Nimrod identifier. + ## This field is filled automatically in the + ## ``raise`` statement. + msg* {.exportc: "message".}: cstring ## the exception's message. Not + ## providing an + ## exception message is bad style. + + EAsynch* = object of E_Base ## Abstract exception class for + ## *asynchronous exceptions* (interrupts). + ## This is rarely needed: Most + ## exception types inherit from `ESynch` + ESynch* = object of E_Base ## Abstract exception class for + ## *synchronous exceptions*. Most exceptions + ## should be inherited (directly or indirectly) + ## from ESynch. + ESystem* = object of ESynch ## Abstract class for exceptions that the runtime + ## system raises. + EIO* = object of ESystem ## raised if an IO error occured. + EOS* = object of ESystem ## raised if an operating system service failed. + EInvalidLibrary* = object of EOS ## raised if a dynamic library + ## could not be loaded. + ERessourceExhausted* = object of ESystem ## raised if a ressource request + ## could not be fullfilled. + EArithmetic* = object of ESynch ## raised if any kind of arithmetic + ## error occured. + EDivByZero* {.compilerproc.} = + object of EArithmetic ## is the exception class for integer divide-by-zero + ## errors. + EOverflow* {.compilerproc.} = + object of EArithmetic ## is the exception class for integer calculations + ## whose results are too large to fit in the + ## provided bits. + + EAccessViolation* {.compilerproc.} = + object of ESynch ## the exception class for invalid memory access errors + + EAssertionFailed* {.compilerproc.} = + object of ESynch ## is the exception class for Assert + ## procedures that is raised if the + ## assertion proves wrong + + EControlC* = object of EAsynch ## is the exception class for Ctrl+C + ## key presses in console applications. + + EInvalidValue* = object of ESynch ## is the exception class for string + ## and object conversion errors. + + EOutOfMemory* = object of ESystem ## is the exception class for + ## unsuccessful attempts to allocate + ## memory. + + EInvalidIndex* = object of ESynch ## is raised if an array index is out + ## of bounds. + EInvalidField* = object of ESynch ## is raised if a record field is not + ## accessible because its dicriminant's + ## value does not fit. + + EOutOfRange* = object of ESynch ## is raised if a range check error + ## occured. + + EStackOverflow* = object of ESystem ## is raised if the hardware stack + ## used for subroutine calls overflowed. + + ENoExceptionToReraise* = object of ESynch ## is raised if there is no + ## exception to reraise. + + EInvalidObjectAssignment* = + object of ESynch ## is raised if an object gets assigned to its + ## farther's object. + + EInvalidObjectConversion* = + object of ESynch ## is raised if an object is converted to an incompatible + ## object type. + + TResult* = enum Failure, Success + +proc sizeof*[T](x: T): natural {.magic: "SizeOf", noSideEffect.} + ## returns the size of ``x`` in bytes. Since this is a low-level proc, + ## its usage is discouraged - using ``new`` for the most cases suffices + ## that one never needs to know ``x``'s size. As a special semantic rule, + ## ``x`` may also be a type identifier (``sizeof(int)`` is valid). + +proc succ*[T](x: ordinal[T], y = 1): T {.magic: "Succ", noSideEffect.} + ## returns the ``y``-th successor of the value ``x``. ``T`` has to be + ## an ordinal type. If such a value does not exist, ``EOutOfRange`` is raised + ## or a compile time error occurs. + +proc pred*[T](x: ordinal[T], y = 1): T {.magic: "Pred", noSideEffect.} + ## returns the ``y``-th predecessor of the value ``x``. ``T`` has to be + ## an ordinal type. If such a value does not exist, ``EOutOfRange`` is raised + ## or a compile time error occurs. + +proc inc*[T](x: var ordinal[T], y = 1) {.magic: "Inc", noSideEffect.} + ## increments the ordinal ``x`` by ``y``. If such a value does not + ## exist, ``EOutOfRange`` is raised or a compile time error occurs. This is a + ## short notation for: ``x = succ(x, y)``. + +proc dec*[T](x: var ordinal[T], y = 1) {.magic: "Dec", noSideEffect.} + ## decrements the ordinal ``x`` by ``y``. If such a value does not + ## exist, ``EOutOfRange`` is raised or a compile time error occurs. This is a + ## short notation for: ``x = pred(x, y)``. + +proc newSeq*[T](s: var seq[T], len: int) {.magic: "NewSeq", noSideEffect.} + ## creates a new sequence of type ``seq[T]`` with length ``len``. + ## This is equivalent to ``s = @[]; setlen(s, len)``, but more + ## efficient since no reallocation is needed. + +proc len*[T](x: openarray[T]): int {.magic: "LengthOpenArray", noSideEffect.} +proc len*(x: string): int {.magic: "LengthStr", noSideEffect.} +proc len*(x: cstring): int {.magic: "LengthStr", noSideEffect.} +proc len*[I, T](x: array[I, T]): int {.magic: "LengthArray", noSideEffect.} +proc len*[T](x: seq[T]): int {.magic: "LengthSeq", noSideEffect.} + ## returns the length of an array, a sequence or a string. + ## This is rougly the same as ``high(T)-low(T)+1``, but its resulting type is + ## always an int. + +# set routines: +proc incl*[T](x: var set[T], y: T) {.magic: "Incl", noSideEffect.} + ## includes element ``y`` to the set ``x``. This is the same as + ## ``x = x + {y}``, but it might be more efficient. + +proc excl*[T](x: var set[T], y: T) {.magic: "Excl", noSideEffect.} + ## excludes element ``y`` to the set ``x``. This is the same as + ## ``x = x - {y}``, but it might be more efficient. + +proc card*[T](x: set[T]): int {.magic: "Card", noSideEffect.} + ## returns the cardinality of the set ``x``, i.e. the number of elements + ## in the set. + +proc ord*[T](x: T): int {.magic: "Ord", noSideEffect.} + ## returns the internal int value of an ordinal value ``x``. + +proc chr*(u: range[0..255]): char {.magic: "Chr", noSideEffect.} + ## converts an int in the range 0..255 to a character. + +# -------------------------------------------------------------------------- +# built-in operators + +proc ze*(x: int8): int {.magic: "Ze8ToI", noSideEffect.} + ## zero extends a smaller integer type to ``int``. This treats `x` as + ## unsigned. +proc ze*(x: int16): int {.magic: "Ze16ToI", noSideEffect.} + ## zero extends a smaller integer type to ``int``. This treats `x` as + ## unsigned. + +proc ze64*(x: int8): int64 {.magic: "Ze8ToI64", noSideEffect.} + ## zero extends a smaller integer type to ``int64``. This treats `x` as + ## unsigned. +proc ze64*(x: int16): int64 {.magic: "Ze16ToI64", noSideEffect.} + ## zero extends a smaller integer type to ``int64``. This treats `x` as + ## unsigned. + +proc ze64*(x: int32): int64 {.magic: "Ze32ToI64", noSideEffect.} + ## zero extends a smaller integer type to ``int64``. This treats `x` as + ## unsigned. +proc ze64*(x: int): int64 {.magic: "ZeIToI64", noDecl, noSideEffect.} + ## zero extends a smaller integer type to ``int64``. This treats `x` as + ## unsigned. Does nothing if the size of an ``int`` is the same as ``int64``. + ## (This is the case on 64 bit processors.) + +proc toU8*(x: int): int8 {.magic: "ToU8", noSideEffect.} + ## treats `x` as unsigned and converts it to a byte by taking the last 8 bits + ## from `x`. +proc toU16*(x: int): int16 {.magic: "ToU16", noSideEffect.} + ## treats `x` as unsigned and converts it to an ``int16`` by taking the last + ## 16 bits from `x`. +proc toU32*(x: int64): int32 {.magic: "ToU32", noSideEffect.} + ## treats `x` as unsigned and converts it to an ``int32`` by taking the + ## last 32 bits from `x`. + + +# integer calculations: +proc `+` *(x: int): int {.magic: "UnaryPlusI", noSideEffect.} +proc `+` *(x: int8): int8 {.magic: "UnaryPlusI", noSideEffect.} +proc `+` *(x: int16): int16 {.magic: "UnaryPlusI", noSideEffect.} +proc `+` *(x: int32): int32 {.magic: "UnaryPlusI", noSideEffect.} +proc `+` *(x: int64): int64 {.magic: "UnaryPlusI64", noSideEffect.} + ## Unary `+` operator for an integer. Has no effect. + +proc `-` *(x: int): int {.magic: "UnaryMinusI", noSideEffect.} +proc `-` *(x: int8): int8 {.magic: "UnaryMinusI", noSideEffect.} +proc `-` *(x: int16): int16 {.magic: "UnaryMinusI", noSideEffect.} +proc `-` *(x: int32): int32 {.magic: "UnaryMinusI", noSideEffect.} +proc `-` *(x: int64): int64 {.magic: "UnaryMinusI64", noSideEffect.} + ## Unary `-` operator for an integer. Negates `x`. + +proc `not` *(x: int): int {.magic: "BitnotI", noSideEffect.} +proc `not` *(x: int8): int8 {.magic: "BitnotI", noSideEffect.} +proc `not` *(x: int16): int16 {.magic: "BitnotI", noSideEffect.} +proc `not` *(x: int32): int32 {.magic: "BitnotI", noSideEffect.} +proc `not` *(x: int64): int64 {.magic: "BitnotI64", noSideEffect.} + ## computes the `bitwise complement` of the integer `x`. + +proc `+` *(x, y: int): int {.magic: "AddI", noSideEffect.} +proc `+` *(x, y: int8): int8 {.magic: "AddI", noSideEffect.} +proc `+` *(x, y: int16): int16 {.magic: "AddI", noSideEffect.} +proc `+` *(x, y: int32): int32 {.magic: "AddI", noSideEffect.} +proc `+` *(x, y: int64): int64 {.magic: "AddI64", noSideEffect.} + ## Binary `+` operator for an integer. + +proc `-` *(x, y: int): int {.magic: "SubI", noSideEffect.} +proc `-` *(x, y: int8): int8 {.magic: "SubI", noSideEffect.} +proc `-` *(x, y: int16): int16 {.magic: "SubI", noSideEffect.} +proc `-` *(x, y: int32): int32 {.magic: "SubI", noSideEffect.} +proc `-` *(x, y: int64): int64 {.magic: "SubI64", noSideEffect.} + ## Binary `-` operator for an integer. + +proc `*` *(x, y: int): int {.magic: "MulI", noSideEffect.} +proc `*` *(x, y: int8): int8 {.magic: "MulI", noSideEffect.} +proc `*` *(x, y: int16): int16 {.magic: "MulI", noSideEffect.} +proc `*` *(x, y: int32): int32 {.magic: "MulI", noSideEffect.} +proc `*` *(x, y: int64): int64 {.magic: "MulI64", noSideEffect.} + ## Binary `*` operator for an integer. + +proc `div` *(x, y: int): int {.magic: "DivI", noSideEffect.} +proc `div` *(x, y: int8): int8 {.magic: "DivI", noSideEffect.} +proc `div` *(x, y: int16): int16 {.magic: "DivI", noSideEffect.} +proc `div` *(x, y: int32): int32 {.magic: "DivI", noSideEffect.} +proc `div` *(x, y: int64): int64 {.magic: "DivI64", noSideEffect.} + ## computes the integer division. This is roughly the same as + ## ``floor(x/y)``. + +proc `mod` *(x, y: int): int {.magic: "ModI", noSideEffect.} +proc `mod` *(x, y: int8): int8 {.magic: "ModI", noSideEffect.} +proc `mod` *(x, y: int16): int16 {.magic: "ModI", noSideEffect.} +proc `mod` *(x, y: int32): int32 {.magic: "ModI", noSideEffect.} +proc `mod` *(x, y: int64): int64 {.magic: "ModI64", noSideEffect.} + ## computes the integer modulo operation. This is the same as + ## ``x - (x div y) * y``. + +proc `shr` *(x, y: int): int {.magic: "ShrI", noSideEffect.} +proc `shr` *(x, y: int8): int8 {.magic: "ShrI", noSideEffect.} +proc `shr` *(x, y: int16): int16 {.magic: "ShrI", noSideEffect.} +proc `shr` *(x, y: int32): int32 {.magic: "ShrI", noSideEffect.} +proc `shr` *(x, y: int64): int64 {.magic: "ShrI64", noSideEffect.} + ## computes the `shift right` operation of `x` and `y`. + +proc `shl` *(x, y: int): int {.magic: "ShlI", noSideEffect.} +proc `shl` *(x, y: int8): int8 {.magic: "ShlI", noSideEffect.} +proc `shl` *(x, y: int16): int16 {.magic: "ShlI", noSideEffect.} +proc `shl` *(x, y: int32): int32 {.magic: "ShlI", noSideEffect.} +proc `shl` *(x, y: int64): int64 {.magic: "ShlI64", noSideEffect.} + ## computes the `shift left` operation of `x` and `y`. + +proc `and` *(x, y: int): int {.magic: "BitandI", noSideEffect.} +proc `and` *(x, y: int8): int8 {.magic: "BitandI", noSideEffect.} +proc `and` *(x, y: int16): int16 {.magic: "BitandI", noSideEffect.} +proc `and` *(x, y: int32): int32 {.magic: "BitandI", noSideEffect.} +proc `and` *(x, y: int64): int64 {.magic: "BitandI64", noSideEffect.} + ## computes the `bitwise and` of numbers `x` and `y`. + +proc `or` *(x, y: int): int {.magic: "BitorI", noSideEffect.} +proc `or` *(x, y: int8): int8 {.magic: "BitorI", noSideEffect.} +proc `or` *(x, y: int16): int16 {.magic: "BitorI", noSideEffect.} +proc `or` *(x, y: int32): int32 {.magic: "BitorI", noSideEffect.} +proc `or` *(x, y: int64): int64 {.magic: "BitorI64", noSideEffect.} + ## computes the `bitwise or` of numbers `x` and `y`. + +proc `xor` *(x, y: int): int {.magic: "BitxorI", noSideEffect.} +proc `xor` *(x, y: int8): int8 {.magic: "BitxorI", noSideEffect.} +proc `xor` *(x, y: int16): int16 {.magic: "BitxorI", noSideEffect.} +proc `xor` *(x, y: int32): int32 {.magic: "BitxorI", noSideEffect.} +proc `xor` *(x, y: int64): int64 {.magic: "BitxorI64", noSideEffect.} + ## computes the `bitwise xor` of numbers `x` and `y`. + +proc `==` *(x, y: int): bool {.magic: "EqI", noSideEffect.} +proc `==` *(x, y: int8): bool {.magic: "EqI", noSideEffect.} +proc `==` *(x, y: int16): bool {.magic: "EqI", noSideEffect.} +proc `==` *(x, y: int32): bool {.magic: "EqI", noSideEffect.} +proc `==` *(x, y: int64): bool {.magic: "EqI64", noSideEffect.} + ## Compares two integers for equality. + +proc `<=` *(x, y: int): bool {.magic: "LeI", noSideEffect.} +proc `<=` *(x, y: int8): bool {.magic: "LeI", noSideEffect.} +proc `<=` *(x, y: int16): bool {.magic: "LeI", noSideEffect.} +proc `<=` *(x, y: int32): bool {.magic: "LeI", noSideEffect.} +proc `<=` *(x, y: int64): bool {.magic: "LeI64", noSideEffect.} + ## Returns true iff `x` is less than or equal to `y`. + +proc `<` *(x, y: int): bool {.magic: "LtI", noSideEffect.} +proc `<` *(x, y: int8): bool {.magic: "LtI", noSideEffect.} +proc `<` *(x, y: int16): bool {.magic: "LtI", noSideEffect.} +proc `<` *(x, y: int32): bool {.magic: "LtI", noSideEffect.} +proc `<` *(x, y: int64): bool {.magic: "LtI64", noSideEffect.} + ## Returns true iff `x` is less than `y`. + +proc abs*(x: int): int {.magic: "AbsI", noSideEffect.} +proc abs*(x: int8): int8 {.magic: "AbsI", noSideEffect.} +proc abs*(x: int16): int16 {.magic: "AbsI", noSideEffect.} +proc abs*(x: int32): int32 {.magic: "AbsI", noSideEffect.} +proc abs*(x: int64): int64 {.magic: "AbsI64", noSideEffect.} + ## returns the absolute value of `x`. If `x` is ``low(x)`` (that + ## is -MININT for its type), an overflow exception is thrown (if overflow + ## checking is turned on). + +proc `+%` *(x, y: int): int {.magic: "AddU", noSideEffect.} +proc `+%` *(x, y: int8): int8 {.magic: "AddU", noSideEffect.} +proc `+%` *(x, y: int16): int16 {.magic: "AddU", noSideEffect.} +proc `+%` *(x, y: int32): int32 {.magic: "AddU", noSideEffect.} +proc `+%` *(x, y: int64): int64 {.magic: "AddU64", noSideEffect.} + ## treats `x` and `y` as unsigned and adds them. The result is truncated to + ## fit into the result. This implements modulo arithmetic. No overflow + ## errors are possible. + +proc `-%` *(x, y: int): int {.magic: "SubU", noSideEffect.} +proc `-%` *(x, y: int8): int8 {.magic: "SubU", noSideEffect.} +proc `-%` *(x, y: int16): int16 {.magic: "SubU", noSideEffect.} +proc `-%` *(x, y: int32): int32 {.magic: "SubU", noSideEffect.} +proc `-%` *(x, y: int64): int64 {.magic: "SubU64", noSideEffect.} + ## treats `x` and `y` as unsigned and subtracts them. The result is + ## truncated to fit into the result. This implements modulo arithmetic. + ## No overflow errors are possible. + +proc `*%` *(x, y: int): int {.magic: "MulU", noSideEffect.} +proc `*%` *(x, y: int8): int8 {.magic: "MulU", noSideEffect.} +proc `*%` *(x, y: int16): int16 {.magic: "MulU", noSideEffect.} +proc `*%` *(x, y: int32): int32 {.magic: "MulU", noSideEffect.} +proc `*%` *(x, y: int64): int64 {.magic: "MulU64", noSideEffect.} + ## treats `x` and `y` as unsigned and multiplies them. The result is + ## truncated to fit into the result. This implements modulo arithmetic. + ## No overflow errors are possible. + +proc `/%` *(x, y: int): int {.magic: "DivU", noSideEffect.} +proc `/%` *(x, y: int8): int8 {.magic: "DivU", noSideEffect.} +proc `/%` *(x, y: int16): int16 {.magic: "DivU", noSideEffect.} +proc `/%` *(x, y: int32): int32 {.magic: "DivU", noSideEffect.} +proc `/%` *(x, y: int64): int64 {.magic: "DivU64", noSideEffect.} + ## treats `x` and `y` as unsigned and divides them. The result is + ## truncated to fit into the result. This implements modulo arithmetic. + ## No overflow errors are possible. + +proc `%%` *(x, y: int): int {.magic: "ModU", noSideEffect.} +proc `%%` *(x, y: int8): int8 {.magic: "ModU", noSideEffect.} +proc `%%` *(x, y: int16): int16 {.magic: "ModU", noSideEffect.} +proc `%%` *(x, y: int32): int32 {.magic: "ModU", noSideEffect.} +proc `%%` *(x, y: int64): int64 {.magic: "ModU64", noSideEffect.} + ## treats `x` and `y` as unsigned and compute the modulo of `x` and `y`. + ## The result is truncated to fit into the result. + ## This implements modulo arithmetic. + ## No overflow errors are possible. + +proc `<=%` *(x, y: int): bool {.magic: "LeU", noSideEffect.} +proc `<=%` *(x, y: int8): bool {.magic: "LeU", noSideEffect.} +proc `<=%` *(x, y: int16): bool {.magic: "LeU", noSideEffect.} +proc `<=%` *(x, y: int32): bool {.magic: "LeU", noSideEffect.} +proc `<=%` *(x, y: int64): bool {.magic: "LeU64", noSideEffect.} + ## treats `x` and `y` as unsigned and compares them. + ## Returns true iff ``unsigned(x) <= unsigned(y)``. + +proc `<%` *(x, y: int): bool {.magic: "LtU", noSideEffect.} +proc `<%` *(x, y: int8): bool {.magic: "LtU", noSideEffect.} +proc `<%` *(x, y: int16): bool {.magic: "LtU", noSideEffect.} +proc `<%` *(x, y: int32): bool {.magic: "LtU", noSideEffect.} +proc `<%` *(x, y: int64): bool {.magic: "LtU64", noSideEffect.} + ## treats `x` and `y` as unsigned and compares them. + ## Returns true iff ``unsigned(x) < unsigned(y)``. + + +# floating point operations: +proc `+` *(x: float): float {.magic: "UnaryPlusF64", noSideEffect.} +proc `-` *(x: float): float {.magic: "UnaryMinusF64", noSideEffect.} +proc `+` *(x, y: float): float {.magic: "AddF64", noSideEffect.} +proc `-` *(x, y: float): float {.magic: "SubF64", noSideEffect.} +proc `*` *(x, y: float): float {.magic: "MulF64", noSideEffect.} +proc `/` *(x, y: float): float {.magic: "DivF64", noSideEffect.} + ## computes the floating point division + +proc `==` *(x, y: float): bool {.magic: "EqF64", noSideEffect.} +proc `<=` *(x, y: float): bool {.magic: "LeF64", noSideEffect.} +proc `<` *(x, y: float): bool {.magic: "LtF64", noSideEffect.} +proc abs*(x: float): float {.magic: "AbsF64", noSideEffect.} +proc min*(x, y: float): float {.magic: "MinF64", noSideEffect.} +proc max*(x, y: float): float {.magic: "MaxF64", noSideEffect.} + +# set operators +proc `*` *[T](x, y: set[T]): set[T] {.magic: "MulSet", noSideEffect.} + ## This operator computes the intersection of two sets. +proc `+` *[T](x, y: set[T]): set[T] {.magic: "PlusSet", noSideEffect.} + ## This operator computes the union of two sets. +proc `-` *[T](x, y: set[T]): set[T] {.magic: "MinusSet", noSideEffect.} + ## This operator computes the difference of two sets. +proc `-+-` *[T](x, y: set[T]): set[T] {.magic: "SymDiffSet", noSideEffect.} + ## computes the symmetric set difference. This is the same as + ## ``(A - B) + (B - A)``, but more efficient. + +# comparison operators: +proc `==` *[T](x, y: ordinal[T]): bool {.magic: "EqEnum", noSideEffect.} +proc `==` *(x, y: pointer): bool {.magic: "EqRef", noSideEffect.} +proc `==` *(x, y: string): bool {.magic: "EqStr", noSideEffect.} +proc `==` *(x, y: cstring): bool {.magic: "EqCString", noSideEffect.} +proc `==` *(x, y: char): bool {.magic: "EqCh", noSideEffect.} +proc `==` *(x, y: bool): bool {.magic: "EqB", noSideEffect.} +proc `==` *[T](x, y: set[T]): bool {.magic: "EqSet", noSideEffect.} +proc `==` *[T](x, y: ref T): bool {.magic: "EqRef", noSideEffect.} +proc `==` *[T](x, y: ptr T): bool {.magic: "EqRef", noSideEffect.} + +proc `<=` *[T](x, y: ordinal[T]): bool {.magic: "LeEnum", noSideEffect.} +proc `<=` *(x, y: string): bool {.magic: "LeStr", noSideEffect.} +proc `<=` *(x, y: char): bool {.magic: "LeCh", noSideEffect.} +proc `<=` *[T](x, y: set[T]): bool {.magic: "LeSet", noSideEffect.} +proc `<=` *(x, y: bool): bool {.magic: "LeB", noSideEffect.} +proc `<=` *[T](x, y: ref T): bool {.magic: "LePtr", noSideEffect.} +proc `<=` *(x, y: pointer): bool {.magic: "LePtr", noSideEffect.} + +proc `<` *[T](x, y: ordinal[T]): bool {.magic: "LtEnum", noSideEffect.} +proc `<` *(x, y: string): bool {.magic: "LtStr", noSideEffect.} +proc `<` *(x, y: char): bool {.magic: "LtCh", noSideEffect.} +proc `<` *[T](x, y: set[T]): bool {.magic: "LtSet", noSideEffect.} +proc `<` *(x, y: bool): bool {.magic: "LtB", noSideEffect.} +proc `<` *[T](x, y: ref T): bool {.magic: "LtPtr", noSideEffect.} +proc `<` *[T](x, y: ptr T): bool {.magic: "LtPtr", noSideEffect.} +proc `<` *(x, y: pointer): bool {.magic: "LtPtr", noSideEffect.} + +template `!=` * (x, y: expr): expr = + ## unequals operator. This is a shorthand for ``not (x == y)``. + not (x == y) + +template `>=` * (x, y: expr): expr = + ## "is greater or equals" operator. This is the same as ``y <= x``. + y <= x + +template `>` * (x, y: expr): expr = + ## "is greater" operator. This is the same as ``y < x``. + y < x + +proc contains*[T](x: set[T], y: T): bool {.magic: "InSet", noSideEffect.} + ## One should overload this proc if one wants to overload the ``in`` operator. + ## The parameters are in reverse order! ``a in b`` is a template for + ## ``contains(b, a)``. + ## This is because the unification algorithm that Nimrod uses for overload + ## resolution works from left to right. + ## But for the ``in`` operator that would be the wrong direction for this + ## piece of code: + ## + ## .. code-block:: Nimrod + ## var s: set[range['a'..'z']] = {'a'..'c'} + ## writeln(stdout, 'b' in s) + ## + ## If ``in`` had been declared as ``[T](elem: T, s: set[T])`` then ``T`` would + ## have been bound to ``char``. But ``s`` is not compatible to type + ## ``set[char]``! The solution is to bind ``T`` to ``range['a'..'z']``. This + ## is achieved by reversing the parameters for ``contains``; ``in`` then + ## passes its arguments in reverse order. + +template `in` * (x, y: expr): expr = contains(y, x) +template `not_in` * (x, y: expr): expr = not contains(y, x) + +proc `is` *[T, S](x: T, y: S): bool {.magic: "Is", noSideEffect.} +template `is_not` *(x, y: expr): expr = not (x is y) + +proc cmp*[T, S: typeDesc](x: T, y: S): int = + ## Generic compare proc. Returns a value < 0 iff x < y, a value > 0 iff x > y + ## and 0 iff x == y. This is useful for writing generic algorithms without + ## performance loss. This generic implementation uses the `==` and `<` + ## operators. + if x == y: return 0 + if x < y: return -1 + return 1 + +proc cmp*(x, y: string): int {.noSideEffect.} + ## Compare proc for strings. More efficient than the generic version. + +proc `@` * [IDX, T](a: array[IDX, T]): seq[T] {. + magic: "ArrToSeq", nosideeffect.} + ## turns an array into a sequence. This most often useful for constructing + ## sequences with the array constructor: ``@[1, 2, 3]`` has the type + ## ``seq[int]``, while ``[1, 2, 3]`` has the type ``array[0..2, int]``. + +proc setLen*[T](s: var seq[T], newlen: int) {. + magic: "SetLengthSeq", noSideEffect.} + ## sets the length of `s` to `newlen`. + ## ``T`` may be any sequence type. + ## If the current length is greater than the new length, + ## ``s`` will be truncated. + +proc setLen*(s: var string, newlen: int) {. + magic: "SetLengthStr", noSideEffect.} + ## sets the length of `s` to `newlen`. + ## If the current length is greater than the new length, + ## ``s`` will be truncated. + +proc newString*(len: int): string {. + magic: "NewString", importc: "mnewString", noSideEffect.} + ## returns a new string of length ``len`` but with uninitialized + ## content. One needs to fill the string character after character + ## with the index operator ``s[i]``. This procedure exists only for + ## optimization purposes; the same effect can be achieved with the + ## ``&`` operator. + +# concat operator: +proc `&` * (x: string, y: char): string {. + magic: "ConStrStr", noSideEffect, merge.} +proc `&` * (x: char, y: char): string {. + magic: "ConStrStr", noSideEffect, merge.} +proc `&` * (x, y: string): string {. + magic: "ConStrStr", noSideEffect, merge.} +proc `&` * (x: char, y: string): string {. + magic: "ConStrStr", noSideEffect, merge.} + ## is the `concatenation operator`. It concatenates `x` and `y`. + +proc add*(x: var string, y: char) {.magic: "AppendStrCh", noSideEffect.} +proc add*(x: var string, y: string) {.magic: "AppendStrStr", noSideEffect.} + +when not defined(ECMAScript): + {.push overflow_checks:off} + proc add* (x: var string, y: cstring) = + var i = 0 + while y[i] != '\0': + add(x, y[i]) + inc(i) + {.pop.} +else: + proc add* (x: var string, y: cstring) {.pure.} = + asm """ + var len = `x`[0].length-1; + for (var i = 0; i < `y`.length; ++i) { + `x`[0][len] = `y`.charCodeAt(i); + ++len; + } + `x`[0][len] = 0 + """ + +proc add *[T](x: var seq[T], y: T) {.magic: "AppendSeqElem", noSideEffect.} +proc add *[T](x: var seq[T], y: openArray[T]) {.noSideEffect.} = + ## Generic proc for adding a data item `y` to a container `x`. + ## For containers that have an order, `add` means *append*. New generic + ## containers should also call their adding proc `add` for consistency. + ## Generic code becomes much easier to write if the Nimrod naming scheme is + ## respected. + var xl = x.len + setLen(x, xl + y.len) + for i in 0..high(y): x[xl+i] = y[i] + +proc repr*[T](x: T): string {.magic: "Repr", noSideEffect.} + ## takes any Nimrod variable and returns its string representation. It + ## works even for complex data graphs with cycles. This is a great + ## debugging tool. + +type + TAddress* = int + ## is the signed integer type that should be used for converting + ## pointers to integer addresses for readability. + +type + BiggestInt* = int64 + ## is an alias for the biggest signed integer type the Nimrod compiler + ## supports. Currently this is ``int64``, but it is platform-dependant + ## in general. + + BiggestFloat* = float64 + ## is an alias for the biggest floating point type the Nimrod + ## compiler supports. Currently this is ``float64``, but it is + ## platform-dependant in general. + +type # these work for most platforms: + cchar* {.importc: "char", nodecl.} = char + ## This is the same as the type ``char`` in *C*. + cschar* {.importc: "signed char", nodecl.} = byte + ## This is the same as the type ``signed char`` in *C*. + cshort* {.importc: "short", nodecl.} = int16 + ## This is the same as the type ``short`` in *C*. + cint* {.importc: "int", nodecl.} = int32 + ## This is the same as the type ``int`` in *C*. + clong* {.importc: "long", nodecl.} = int + ## This is the same as the type ``long`` in *C*. + clonglong* {.importc: "long long", nodecl.} = int64 + ## This is the same as the type ``long long`` in *C*. + cfloat* {.importc: "float", nodecl.} = float32 + ## This is the same as the type ``float`` in *C*. + cdouble* {.importc: "double", nodecl.} = float64 + ## This is the same as the type ``double`` in *C*. + clongdouble* {.importc: "long double", nodecl.} = BiggestFloat + ## This is the same as the type ``long double`` in *C*. + ## This C type is not supported by Nimrod's code generator + + cstringArray* {.importc: "char**", nodecl.} = ptr array [0..50_000, cstring] + ## This is binary compatible to the type ``char**`` in *C*. The array's + ## high value is large enough to disable bounds checking in practice. + + TEndian* = enum ## is a type describing the endianness of a processor. + littleEndian, bigEndian + + PFloat32* = ptr Float32 ## an alias for ``ptr float32`` + PFloat64* = ptr Float64 ## an alias for ``ptr float64`` + PInt64* = ptr Int64 ## an alias for ``ptr int64`` + PInt32* = ptr Int32 ## an alias for ``ptr int32`` + +const + isMainModule* {.magic: "IsMainModule".}: bool = false + ## is true only when accessed in the main module. This works thanks to + ## compiler magic. It is useful to embed testing code in a module. + + CompileDate* {.magic: "CompileDate"}: string = "0000-00-00" + ## is the date of compilation as a string of the form + ## ``YYYY-MM-DD``. This works thanks to compiler magic. + + CompileTime* {.magic: "CompileTime"}: string = "00:00:00" + ## is the time of compilation as a string of the form + ## ``HH:MM:SS``. This works thanks to compiler magic. + + NimrodVersion* {.magic: "NimrodVersion"}: string = "0.0.0" + ## is the version of Nimrod as a string. + ## This works thanks to compiler magic. + + NimrodMajor* {.magic: "NimrodMajor"}: int = 0 + ## is the major number of Nimrod's version. + ## This works thanks to compiler magic. + + NimrodMinor* {.magic: "NimrodMinor"}: int = 0 + ## is the minor number of Nimrod's version. + ## This works thanks to compiler magic. + + NimrodPatch* {.magic: "NimrodPatch"}: int = 0 + ## is the patch number of Nimrod's version. + ## This works thanks to compiler magic. + + cpuEndian* {.magic: "CpuEndian"}: TEndian = littleEndian + ## is the endianness of the target CPU. This is a valuable piece of + ## information for low-level code only. This works thanks to compiler magic. + + hostOS* {.magic: "HostOS"}: string = "" + ## a string that describes the host operating system. Possible values: + ## "windows", "macosx", "linux", "netbsd", "freebsd", "openbsd", "solaris", + ## "aix" + + hostCPU* {.magic: "HostCPU"}: string = "" + ## a string that describes the host CPU. Possible values: + ## "i386", "alpha", "powerpc", "sparc", "amd64", "mips", "arm" + +proc toFloat*(i: int): float {. + magic: "ToFloat", noSideEffect, importc: "toFloat".} + ## converts an integer `i` into a ``float``. If the conversion + ## fails, `EInvalidValue` is raised. However, on most platforms the + ## conversion cannot fail. + +proc toBiggestFloat*(i: biggestint): biggestfloat {. + magic: "ToBiggestFloat", noSideEffect, importc: "toBiggestFloat".} + ## converts an biggestint `i` into a ``biggestfloat``. If the conversion + ## fails, `EInvalidValue` is raised. However, on most platforms the + ## conversion cannot fail. + +proc toInt*(f: float): int {. + magic: "ToInt", noSideEffect, importc: "toInt".} + ## converts a floating point number `f` into an ``int``. Conversion + ## rounds `f` if it does not contain an integer value. If the conversion + ## fails (because `f` is infinite for example), `EInvalidValue` is raised. + +proc toBiggestInt*(f: biggestfloat): biggestint {. + magic: "ToBiggestInt", noSideEffect, importc: "toBiggestInt".} + ## converts a biggestfloat `f` into a ``biggestint``. Conversion + ## rounds `f` if it does not contain an integer value. If the conversion + ## fails (because `f` is infinite for example), `EInvalidValue` is raised. + +proc addQuitProc*(QuitProc: proc {.noconv.}) {.importc: "atexit", nodecl.} + ## adds/registers a quit procedure. Each call to ``addQuitProc`` + ## registers another quit procedure. Up to 30 procedures can be + ## registered. They are executed on a last-in, first-out basis + ## (that is, the last function registered is the first to be executed). + ## ``addQuitProc`` raises an EOutOfIndex if ``quitProc`` cannot be + ## registered. + +# Support for addQuitProc() is done by Ansi C's facilities here. +# In case of an unhandled exeption the exit handlers should +# not be called explicitly! The user may decide to do this manually though. + +proc copy*(s: string, first = 0): string {. + magic: "CopyStr", importc: "copyStr", noSideEffect.} +proc copy*(s: string, first, last: int): string {. + magic: "CopyStrLast", importc: "copyStrLast", noSideEffect.} + ## copies a slice of `s` into a new string and returns this new + ## string. The bounds `first` and `last` denote the indices of + ## the first and last characters that shall be copied. If ``last`` + ## is omitted, it is treated as ``high(s)``. + +proc zeroMem*(p: Pointer, size: int) {.importc, noDecl.} + ## overwrites the contents of the memory at ``p`` with the value 0. + ## Exactly ``size`` bytes will be overwritten. Like any procedure + ## dealing with raw memory this is *unsafe*. + +proc copyMem*(dest, source: Pointer, size: int) {.importc: "memcpy", noDecl.} + ## copies the contents from the memory at ``source`` to the memory + ## at ``dest``. Exactly ``size`` bytes will be copied. The memory + ## regions may not overlap. Like any procedure dealing with raw + ## memory this is *unsafe*. + +proc moveMem*(dest, source: Pointer, size: int) {.importc: "memmove", noDecl.} + ## copies the contents from the memory at ``source`` to the memory + ## at ``dest``. Exactly ``size`` bytes will be copied. The memory + ## regions may overlap, ``moveMem`` handles this case appropriately + ## and is thus somewhat more safe than ``copyMem``. Like any procedure + ## dealing with raw memory this is still *unsafe*, though. + +proc equalMem*(a, b: Pointer, size: int): bool {. + importc: "equalMem", noDecl, noSideEffect.} + ## compares the memory blocks ``a`` and ``b``. ``size`` bytes will + ## be compared. If the blocks are equal, true is returned, false + ## otherwise. Like any procedure dealing with raw memory this is + ## *unsafe*. + +proc alloc*(size: int): pointer {.noconv.} + ## allocates a new memory block with at least ``size`` bytes. The + ## block has to be freed with ``realloc(block, 0)`` or + ## ``dealloc(block)``. The block is not initialized, so reading + ## from it before writing to it is undefined behaviour! +proc alloc0*(size: int): pointer {.noconv.} + ## allocates a new memory block with at least ``size`` bytes. The + ## block has to be freed with ``realloc(block, 0)`` or + ## ``dealloc(block)``. The block is initialized with all bytes + ## containing zero, so it is somewhat safer than ``alloc``. +proc realloc*(p: Pointer, newsize: int): pointer {.noconv.} + ## grows or shrinks a given memory block. If p is **nil** then a new + ## memory block is returned. In either way the block has at least + ## ``newsize`` bytes. If ``newsize == 0`` and p is not **nil** + ## ``realloc`` calls ``dealloc(p)``. In other cases the block has to + ## be freed with ``dealloc``. +proc dealloc*(p: Pointer) {.noconv.} + ## frees the memory allocated with ``alloc``, ``alloc0`` or + ## ``realloc``. This procedure is dangerous! If one forgets to + ## free the memory a leak occurs; if one tries to access freed + ## memory (or just freeing it twice!) a core dump may happen + ## or other memory may be corrupted. + +proc assert*(cond: bool) {.magic: "Assert", noSideEffect.} + ## provides a means to implement `programming by contracts`:idx: in Nimrod. + ## ``assert`` evaluates expression ``cond`` and if ``cond`` is false, it + ## raises an ``EAssertionFailure`` exception. However, the compiler may + ## not generate any code at all for ``assert`` if it is advised to do so. + ## Use ``assert`` for debugging purposes only. + +proc swap*[T](a, b: var T) {.magic: "Swap", noSideEffect.} + ## swaps the values `a` and `b`. This is often more efficient than + ## ``tmp = a; a = b; b = tmp``. Particularly useful for sorting algorithms. + +template `>=%` *(x, y: expr): expr = y <=% x + ## treats `x` and `y` as unsigned and compares them. + ## Returns true iff ``unsigned(x) >= unsigned(y)``. + +template `>%` *(x, y: expr): expr = y <% x + ## treats `x` and `y` as unsigned and compares them. + ## Returns true iff ``unsigned(x) > unsigned(y)``. + +proc `$` *(x: int): string {.magic: "IntToStr", noSideEffect.} + ## The stingify operator for an integer argument. Returns `x` + ## converted to a decimal string. + +proc `$` *(x: int64): string {.magic: "Int64ToStr", noSideEffect.} + ## The stingify operator for an integer argument. Returns `x` + ## converted to a decimal string. + +proc `$` *(x: float): string {.magic: "FloatToStr", noSideEffect.} + ## The stingify operator for a float argument. Returns `x` + ## converted to a decimal string. + +proc `$` *(x: bool): string {.magic: "BoolToStr", noSideEffect.} + ## The stingify operator for a boolean argument. Returns `x` + ## converted to the string "false" or "true". + +proc `$` *(x: char): string {.magic: "CharToStr", noSideEffect.} + ## The stingify operator for a character argument. Returns `x` + ## converted to a string. + +proc `$` *(x: Cstring): string {.magic: "CStrToStr", noSideEffect.} + ## The stingify operator for a CString argument. Returns `x` + ## converted to a string. + +proc `$` *(x: string): string {.magic: "StrToStr", noSideEffect.} + ## The stingify operator for a string argument. Returns `x` + ## as it is. This operator is useful for generic code, so + ## that ``$expr`` also works if ``expr`` is already a string. + +proc `$` *[T](x: ordinal[T]): string {.magic: "EnumToStr", noSideEffect.} + ## The stingify operator for an enumeration argument. This works for + ## any enumeration type thanks to compiler magic. If a + ## a ``$`` operator for a concrete enumeration is provided, this is + ## used instead. (In other words: *Overwriting* is possible.) + +# undocumented: +proc getRefcount*[T](x: ref T): int {.importc: "getRefcount", noSideEffect.} + ## retrieves the reference count of an heap-allocated object. The + ## value is implementation-dependant. + +#proc writeStackTrace() {.export: "writeStackTrace".} + +when not defined(NimrodVM): + proc getCurrentExceptionMsg*(): string {.exportc.} + ## retrieves the error message that was attached to the current + ## exception; if there is none, "" is returned. + +# new constants: +const + inf* {.magic: "Inf".} = 1.0 / 0.0 + ## contains the IEEE floating point value of positive infinity. + neginf* {.magic: "NegInf".} = -inf + ## contains the IEEE floating point value of negative infinity. + nan* {.magic: "NaN".} = 0.0 / 0.0 + ## contains an IEEE floating point value of *Not A Number*. Note + ## that you cannot compare a floating point value to this value + ## and expect a reasonable result - use the `classify` procedure + ## in the module ``math`` for checking for NaN. + +var + dbgLineHook*: proc = nil + ## set this variable to provide a procedure that should be called before + ## each executed instruction. This should only be used by debuggers! + ## Only code compiled with the ``debugger:on`` switch calls this hook. + +# GC interface: + +proc getOccupiedMem*(): int + ## returns the number of bytes that are owned by the process and hold data. + +proc getFreeMem*(): int + ## returns the number of bytes that are owned by the process, but do not + ## hold any meaningful data. + +proc getTotalMem*(): int + ## returns the number of bytes that are owned by the process. + + +iterator countdown*[T](a, b: T, step = 1): T {.inline.} = + ## Counts from ordinal value `a` down to `b` with the given + ## step count. `T` may be any ordinal type, `step` may only + ## be positive. + var res = a + while res >= b: + yield res + dec(res, step) + +iterator countup*[T](a, b: T, step = 1): T {.inline.} = + ## Counts from ordinal value `a` up to `b` with the given + ## step count. `T` may be any ordinal type, `step` may only + ## be positive. + var res = a + while res <= b: + yield res + inc(res, step) + # we cannot use ``for x in a..b: `` here, because that is not + # known in the System module + + +proc min*(x, y: int): int {.magic: "MinI", noSideEffect.} +proc min*(x, y: int8): int8 {.magic: "MinI", noSideEffect.} +proc min*(x, y: int16): int16 {.magic: "MinI", noSideEffect.} +proc min*(x, y: int32): int32 {.magic: "MinI", noSideEffect.} +proc min*(x, y: int64): int64 {.magic: "MinI64", noSideEffect.} + ## The minimum value of two integers. + +proc min*[T](x: openarray[T]): T = + ## The minimum value of an openarray. + result = x[0] + for i in 1..high(x): result = min(result, x[i]) + +proc max*(x, y: int): int {.magic: "MaxI", noSideEffect.} +proc max*(x, y: int8): int8 {.magic: "MaxI", noSideEffect.} +proc max*(x, y: int16): int16 {.magic: "MaxI", noSideEffect.} +proc max*(x, y: int32): int32 {.magic: "MaxI", noSideEffect.} +proc max*(x, y: int64): int64 {.magic: "MaxI64", noSideEffect.} + ## The maximum value of two integers. + +proc max*[T](x: openarray[T]): T = + ## The maximum value of an openarray. + result = x[0] + for i in 1..high(x): result = max(result, x[i]) + + +iterator items*[T](a: openarray[T]): T {.inline.} = + ## iterates over each item of `a`. + var i = 0 + while i < len(a): + yield a[i] + inc(i) + +iterator items*[IX, T](a: array[IX, T]): T {.inline.} = + ## iterates over each item of `a`. + var i = low(IX) + if i <= high(IX): + while true: + yield a[i] + if i >= high(IX): break + inc(i) + +iterator items*[T](a: seq[T]): T {.inline.} = + ## iterates over each item of `a`. + var i = 0 + while i < len(a): + yield a[i] + inc(i) + +iterator items*(a: string): char {.inline.} = + ## iterates over each item of `a`. + var i = 0 + while i < len(a): + yield a[i] + inc(i) + +iterator items*[T](a: set[T]): T {.inline.} = + ## iterates over each element of `a`. `items` iterates only over the + ## elements that are really in the set (and not over the ones the set is + ## able to hold). + var i = low(T) + if i <= high(T): + while true: + if i in a: yield i + if i >= high(T): break + inc(i) + +iterator items*(a: cstring): char {.inline.} = + ## iterates over each item of `a`. + var i = 0 + while a[i] != '\0': + yield a[i] + inc(i) + +proc isNil*[T](x: seq[T]): bool {.noSideEffect, magic: "IsNil".} +proc isNil*[T](x: ref T): bool {.noSideEffect, magic: "IsNil".} +proc isNil*(x: string): bool {.noSideEffect, magic: "IsNil".} +proc isNil*[T](x: ptr T): bool {.noSideEffect, magic: "IsNil".} +proc isNil*(x: pointer): bool {.noSideEffect, magic: "IsNil".} +proc isNil*(x: cstring): bool {.noSideEffect, magic: "IsNil".} + ## Fast check whether `x` is nil. This is sometimes more efficient than + ## ``== nil``. + + +# Fixup some magic symbols here: +#{.fixup_system.} +# This is an undocumented pragma that can only be used +# once in the system module. + +proc `&` *[T](x, y: openArray[T]): seq[T] {.noSideEffect.} = + newSeq(result, x.len + y.len) + for i in 0..x.len-1: + result[i] = x[i] + for i in 0..y.len-1: + result[i+x.len] = y[i] + +proc `&` *[T](x: openArray[T], y: T): seq[T] {.noSideEffect.} = + newSeq(result, x.len + 1) + for i in 0..x.len-1: + result[i] = x[i] + result[x.len] = y + +proc `&` *[T](x: T, y: openArray[T]): seq[T] {.noSideEffect.} = + newSeq(result, y.len + 1) + for i in 0..y.len-1: + result[i] = y[i] + result[y.len] = x + +when not defined(NimrodVM): + when not defined(ECMAScript): + proc seqToPtr[T](x: seq[T]): pointer {.inline, nosideeffect.} = + result = cast[pointer](x) + else: + proc seqToPtr[T](x: seq[T]): pointer {.pure, nosideeffect.} = + asm """return `x`""" + + proc `==` *[T: typeDesc](x, y: seq[T]): bool {.noSideEffect.} = + ## Generic equals operator for sequences: relies on a equals operator for + ## the element type `T`. + if seqToPtr(x) == seqToPtr(y): + result = true + elif seqToPtr(x) == nil or seqToPtr(y) == nil: + result = false + elif x.len == y.len: + for i in 0..x.len-1: + if x[i] != y[i]: return false + result = true + +proc find*[T, S: typeDesc](a: T, item: S): int {.inline.}= + ## Returns the first index of `item` in `a` or -1 if not found. This requires + ## appropriate `items` and `==` procs to work. + for i in items(a): + if i == item: return + inc(result) + result = -1 + +proc contains*[T](a: openArray[T], item: T): bool {.inline.}= + ## Returns true if `item` is in `a` or false if not found. This is a shortcut + ## for ``find(a, item) >= 0``. + return find(a, item) >= 0 + +proc pop*[T](s: var seq[T]): T {.inline, noSideEffect.} = + ## returns the last item of `s` and decreases ``s.len`` by one. This treats + ## `s` as a stack and implements the common *pop* operation. + var L = s.len-1 + result = s[L] + setLen(s, L) + +proc each*[T, S](data: openArray[T], op: proc (x: T): S): seq[S] {. + noSideEffect.} = + ## The well-known ``map`` operation from functional programming. Applies + ## `op` to every item in `data` and returns the result as a sequence. + newSeq(result, data.len) + for i in 0..data.len-1: result[i] = op(data[i]) + + +# ----------------- FPU ------------------------------------------------------ + +#proc disableFPUExceptions*() +# disables all floating point unit exceptions + +#proc enableFPUExceptions*() +# enables all floating point unit exceptions + +# ----------------- GC interface --------------------------------------------- + +proc GC_disable*() + ## disables the GC. If called n-times, n calls to `GC_enable` are needed to + ## reactivate the GC. Note that in most circumstances one should only disable + ## the mark and sweep phase with `GC_disableMarkAndSweep`. + +proc GC_enable*() + ## enables the GC again. + +proc GC_fullCollect*() + ## forces a full garbage collection pass. + ## Ordinary code does not need to call this (and should not). + +type + TGC_Strategy* = enum ## the strategy the GC should use for the application + gcThroughput, ## optimize for throughput + gcResponsiveness, ## optimize for responsiveness (default) + gcOptimizeTime, ## optimize for speed + gcOptimizeSpace ## optimize for memory footprint + +proc GC_setStrategy*(strategy: TGC_Strategy) + ## tells the GC the desired strategy for the application. + +proc GC_enableMarkAndSweep*() +proc GC_disableMarkAndSweep*() + ## the current implementation uses a reference counting garbage collector + ## with a seldomly run mark and sweep phase to free cycles. The mark and + ## sweep phase may take a long time and is not needed if the application + ## does not create cycles. Thus the mark and sweep phase can be deactivated + ## and activated separately from the rest of the GC. + +proc GC_getStatistics*(): string + ## returns an informative string about the GC's activity. This may be useful + ## for tweaking. + +proc GC_ref*[T](x: ref T) {.magic: "GCref".} +proc GC_ref*[T](x: seq[T]) {.magic: "GCref".} +proc GC_ref*(x: string) {.magic: "GCref".} + ## marks the object `x` as referenced, so that it will not be freed until + ## it is unmarked via `GC_unref`. If called n-times for the same object `x`, + ## n calls to `GC_unref` are needed to unmark `x`. + +proc GC_unref*[T](x: ref T) {.magic: "GCunref".} +proc GC_unref*[T](x: seq[T]) {.magic: "GCunref".} +proc GC_unref*(x: string) {.magic: "GCunref".} + ## see the documentation of `GC_ref`. + +template accumulateResult*(iter: expr) = + ## helps to convert an iterator to a proc. + result = @[] + for x in iter: add(result, x) + +{.push checks: off, line_dir: off, debugger: off.} +# obviously we cannot generate checking operations here :-) +# because it would yield into an endless recursion +# however, stack-traces are available for most parts +# of the code + +proc echo*[Ty](x: openarray[Ty]) {.magic: "Echo".} + ## equivalent to ``writeln(stdout, x); flush(stdout)``. BUT: This is + ## available for the ECMAScript target too! + +template newException(exceptn, message: expr): expr = + block: # open a new scope + var + e: ref exceptn + new(e) + e.msg = message + e + +const + QuitSuccess* = 0 + ## is the value that should be passed to ``quit`` to indicate + ## success. + + QuitFailure* = 1 + ## is the value that should be passed to ``quit`` to indicate + ## failure. + +proc quit*(errorcode: int = QuitSuccess) {. + magic: "Exit", importc: "exit", noDecl, noReturn.} + ## stops the program immediately; before stopping the program the + ## "quit procedures" are called in the opposite order they were added + ## with ``addQuitProc``. ``quit`` never returns and ignores any + ## exception that may have been raised by the quit procedures. + ## It does *not* call the garbage collector to free all the memory, + ## unless a quit procedure calls ``GC_collect``. + +when not defined(EcmaScript) and not defined(NimrodVM): + proc quit*(errormsg: string) {.noReturn.} + ## a shorthand for ``echo(errormsg); quit(quitFailure)``. + +when not defined(EcmaScript) and not defined(NimrodVM): + + proc initGC() + + var + strDesc: TNimType + + strDesc.size = sizeof(string) + strDesc.kind = tyString + strDesc.flags = {ntfAcyclic} + initGC() # BUGFIX: need to be called here! + + {.push stack_trace: off.} + + include "system/ansi_c" + + proc cmp(x, y: string): int = + return int(c_strcmp(x, y)) + + const pccHack = if defined(pcc): "_" else: "" # Hack for PCC + when defined(windows): + # work-around C's sucking abstraction: + # BUGFIX: stdin and stdout should be binary files! + proc setmode(handle, mode: int) {.importc: pccHack & "setmode", + header: "<io.h>".} + proc fileno(f: C_TextFileStar): int {.importc: pccHack & "fileno", + header: "<fcntl.h>".} + var + O_BINARY {.importc: pccHack & "O_BINARY", nodecl.}: int + + # we use binary mode in Windows: + setmode(fileno(c_stdin), O_BINARY) + setmode(fileno(c_stdout), O_BINARY) + + when defined(endb): + proc endbStep() + + # ----------------- IO Part -------------------------------------------------- + + type + CFile {.importc: "FILE", nodecl, final.} = object # empty record for + # data hiding + TFile* = ptr CFile ## The type representing a file handle. + + TFileMode* = enum ## The file mode when opening a file. + fmRead, ## Open the file for read access only. + fmWrite, ## Open the file for write access only. + fmReadWrite, ## Open the file for read and write access. + ## If the file does not exist, it will be + ## created. + fmReadWriteExisting, ## Open the file for read and write access. + ## If the file does not exist, it will not be + ## created. + fmAppend ## Open the file for writing only; append data + ## at the end. + + TFileHandle* = cint ## type that represents an OS file handle; this is + ## useful for low-level file access + + # text file handling: + var + stdin* {.importc: "stdin", noDecl.}: TFile ## The standard input stream. + stdout* {.importc: "stdout", noDecl.}: TFile ## The standard output stream. + stderr* {.importc: "stderr", noDecl.}: TFile + ## The standard error stream. + ## + ## Note: In my opinion, this should not be used -- the concept of a + ## separate error stream is a design flaw of UNIX. A seperate *message + ## stream* is a good idea, but since it is named ``stderr`` there are few + ## programs out there that distinguish properly between ``stdout`` and + ## ``stderr``. So, that's what you get if you don't name your variables + ## appropriately. It also annoys people if redirection via ``>output.txt`` + ## does not work because the program writes to ``stderr``. + + proc OpenFile*(f: var TFile, filename: string, + mode: TFileMode = fmRead, + bufSize: int = -1): Bool {.deprecated.} + ## **Deprecated since version 0.8.0**: Use `open` instead. + + proc OpenFile*(f: var TFile, filehandle: TFileHandle, + mode: TFileMode = fmRead): Bool {.deprecated.} + ## **Deprecated since version 0.8.0**: Use `open` instead. + + proc Open*(f: var TFile, filename: string, + mode: TFileMode = fmRead, bufSize: int = -1): Bool + ## Opens a file named `filename` with given `mode`. + ## + ## Default mode is readonly. Returns true iff the file could be opened. + ## This throws no exception if the file could not be opened. The reason is + ## that the programmer needs to provide an appropriate error message anyway + ## (yes, even in scripts). + + proc Open*(f: var TFile, filehandle: TFileHandle, + mode: TFileMode = fmRead): Bool + ## Creates a ``TFile`` from a `filehandle` with given `mode`. + ## + ## Default mode is readonly. Returns true iff the file could be opened. + + proc CloseFile*(f: TFile) {.importc: "fclose", nodecl, deprecated.} + ## Closes the file. + ## **Deprecated since version 0.8.0**: Use `close` instead. + + proc Close*(f: TFile) {.importc: "fclose", nodecl.} + ## Closes the file. + + proc EndOfFile*(f: TFile): Bool + ## Returns true iff `f` is at the end. + proc readChar*(f: TFile): char {.importc: "fgetc", nodecl.} + ## Reads a single character from the stream `f`. If the stream + ## has no more characters, `EEndOfFile` is raised. + proc FlushFile*(f: TFile) {.importc: "fflush", noDecl.} + ## Flushes `f`'s buffer. + + proc readFile*(filename: string): string + ## Opens a file name `filename` for reading. Then reads the + ## file's content completely into a string and + ## closes the file afterwards. Returns the string. Returns nil if there was + ## an error. Does not throw an IO exception. + + proc write*(f: TFile, r: float) + proc write*(f: TFile, i: int) + proc write*(f: TFile, s: string) + proc write*(f: TFile, b: Bool) + proc write*(f: TFile, c: char) + proc write*(f: TFile, c: cstring) + proc write*(f: TFile, a: openArray[string]) + ## Writes a value to the file `f`. May throw an IO exception. + + proc readLine*(f: TFile): string + ## reads a line of text from the file `f`. May throw an IO exception. + ## Reading from an empty file buffer, does not throw an exception, but + ## returns nil. A line of text may be delimited by ``CR``, ``LF`` or + ## ``CRLF``. The newline character(s) are not part of the returned string. + + proc writeln*[Ty](f: TFile, x: Ty) {.inline.} + ## writes a value `x` to `f` and then writes "\n". + ## May throw an IO exception. + + proc writeln*[Ty](f: TFile, x: openArray[Ty]) {.inline.} + ## writes a value `x` to `f` and then writes "\n". + ## May throw an IO exception. + + proc getFileSize*(f: TFile): int64 + ## retrieves the file size (in bytes) of `f`. + + proc ReadBytes*(f: TFile, a: var openarray[byte], start, len: int): int + ## reads `len` bytes into the buffer `a` starting at ``a[start]``. Returns + ## the actual number of bytes that have been read which may be less than + ## `len` (if not as many bytes are remaining), but not greater. + + proc ReadChars*(f: TFile, a: var openarray[char], start, len: int): int + ## reads `len` bytes into the buffer `a` starting at ``a[start]``. Returns + ## the actual number of bytes that have been read which may be less than + ## `len` (if not as many bytes are remaining), but not greater. + + proc readBuffer*(f: TFile, buffer: pointer, len: int): int + ## reads `len` bytes into the buffer pointed to by `buffer`. Returns + ## the actual number of bytes that have been read which may be less than + ## `len` (if not as many bytes are remaining), but not greater. + + proc writeBytes*(f: TFile, a: openarray[byte], start, len: int): int + ## writes the bytes of ``a[start..start+len-1]`` to the file `f`. Returns + ## the number of actual written bytes, which may be less than `len` in case + ## of an error. + + proc writeChars*(f: tFile, a: openarray[char], start, len: int): int + ## writes the bytes of ``a[start..start+len-1]`` to the file `f`. Returns + ## the number of actual written bytes, which may be less than `len` in case + ## of an error. + + proc writeBuffer*(f: TFile, buffer: pointer, len: int): int + ## writes the bytes of buffer pointed to by the parameter `buffer` to the + ## file `f`. Returns the number of actual written bytes, which may be less + ## than `len` in case of an error. + + proc setFilePos*(f: TFile, pos: int64) + ## sets the position of the file pointer that is used for read/write + ## operations. The file's first byte has the index zero. + + proc getFilePos*(f: TFile): int64 + ## retrieves the current position of the file pointer that is used to + ## read from the file `f`. The file's first byte has the index zero. + + include "system/sysio" + + iterator lines*(filename: string): string = + ## Iterate over any line in the file named `filename`. + ## If the file does not exist `EIO` is raised. + var + f: TFile + if not open(f, filename): + raise newException(EIO, "cannot open: " & filename) + var res = "" + while not endOfFile(f): + rawReadLine(f, res) + yield res + Close(f) + + proc fileHandle*(f: TFile): TFileHandle {.importc: "fileno", + header: "<stdio.h>"} + ## returns the OS file handle of the file ``f``. This is only useful for + ## platform specific programming. + + proc quit(errormsg: string) = + echo(errormsg) + quit(quitFailure) + + # ---------------------------------------------------------------------------- + + include "system/excpt" + # we cannot compile this with stack tracing on + # as it would recurse endlessly! + include "system/arithm" + {.pop.} # stack trace + include "system/dyncalls" + include "system/sets" + + const + GenericSeqSize = (2 * sizeof(int)) + + proc reprAny(p: pointer, typ: PNimType): string {.compilerproc.} + + proc getDiscriminant(aa: Pointer, n: ptr TNimNode): int = + assert(n.kind == nkCase) + var d: int + var a = cast[TAddress](aa) + case n.typ.size + of 1: d = ze(cast[ptr int8](a +% n.offset)^) + of 2: d = ze(cast[ptr int16](a +% n.offset)^) + of 4: d = int(cast[ptr int32](a +% n.offset)^) + else: assert(false) + return d + + proc selectBranch(aa: Pointer, n: ptr TNimNode): ptr TNimNode = + var discr = getDiscriminant(aa, n) + if discr <% n.len: + result = n.sons[discr] + if result == nil: result = n.sons[n.len] + # n.sons[n.len] contains the ``else`` part (but may be nil) + else: + result = n.sons[n.len] + + include "system/mm" + include "system/sysstr" + include "system/assign" + include "system/repr" + + # we have to implement it here after gentostr for the cstrToNimStrDummy proc + proc getCurrentExceptionMsg(): string = + if excHandler == nil: return "" + return $excHandler.exc.msg + + {.push stack_trace: off.} + when defined(endb): + include "system/debugger" + + when defined(profiler): + include "system/profiler" + {.pop.} # stacktrace + +elif defined(ecmaScript): + include "system/ecmasys" +elif defined(NimrodVM): + # Stubs for the GC interface: + proc GC_disable() = nil + proc GC_enable() = nil + proc GC_fullCollect() = nil + proc GC_setStrategy(strategy: TGC_Strategy) = nil + proc GC_enableMarkAndSweep() = nil + proc GC_disableMarkAndSweep() = nil + proc GC_getStatistics(): string = return "" + + proc getOccupiedMem(): int = return -1 + proc getFreeMem(): int = return -1 + proc getTotalMem(): int = return -1 + + proc cmp(x, y: string): int = + if x == y: return 0 + if x < y: return -1 + return 1 + + proc dealloc(p: pointer) = nil + proc alloc(size: int): pointer = nil + proc alloc0(size: int): pointer = nil + proc realloc(p: Pointer, newsize: int): pointer = nil + +{.pop.} # checks +{.pop.} # hints diff --git a/nimlib/system/alloc.nim b/nimlib/system/alloc.nim new file mode 100755 index 000000000..95feff854 --- /dev/null +++ b/nimlib/system/alloc.nim @@ -0,0 +1,596 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# Low level allocator for Nimrod. Has been designed to support the GC. +# TODO: +# - eliminate "used" field +# - make searching for block O(1) + +# ------------ platform specific chunk allocation code ----------------------- + +when defined(posix): + const + PROT_READ = 1 # page can be read + PROT_WRITE = 2 # page can be written + MAP_PRIVATE = 2 # Changes are private + + when defined(linux) or defined(aix): + const MAP_ANONYMOUS = 0x20 # don't use a file + elif defined(macosx) or defined(bsd): + const MAP_ANONYMOUS = 0x1000 + elif defined(solaris): + const MAP_ANONYMOUS = 0x100 + else: + {.error: "Port memory manager to your platform".} + + proc mmap(adr: pointer, len: int, prot, flags, fildes: cint, + off: int): pointer {.header: "<sys/mman.h>".} + + proc munmap(adr: pointer, len: int) {.header: "<sys/mman.h>".} + + proc osAllocPages(size: int): pointer {.inline.} = + result = mmap(nil, size, PROT_READ or PROT_WRITE, + MAP_PRIVATE or MAP_ANONYMOUS, -1, 0) + if result == nil or result == cast[pointer](-1): + raiseOutOfMem() + + proc osDeallocPages(p: pointer, size: int) {.inline} = + when reallyOsDealloc: munmap(p, size) + +elif defined(windows): + const + MEM_RESERVE = 0x2000 + MEM_COMMIT = 0x1000 + MEM_TOP_DOWN = 0x100000 + PAGE_READWRITE = 0x04 + + MEM_DECOMMIT = 0x4000 + MEM_RELEASE = 0x8000 + + proc VirtualAlloc(lpAddress: pointer, dwSize: int, flAllocationType, + flProtect: int32): pointer {. + header: "<windows.h>", stdcall.} + + proc VirtualFree(lpAddress: pointer, dwSize: int, + dwFreeType: int32) {.header: "<windows.h>", stdcall.} + + proc osAllocPages(size: int): pointer {.inline.} = + result = VirtualAlloc(nil, size, MEM_RESERVE or MEM_COMMIT, + PAGE_READWRITE) + if result == nil: raiseOutOfMem() + + proc osDeallocPages(p: pointer, size: int) {.inline.} = + # according to Microsoft, 0 is the only correct value here: + when reallyOsDealloc: VirtualFree(p, 0, MEM_RELEASE) + +else: + {.error: "Port memory manager to your platform".} + +# --------------------- end of non-portable code ----------------------------- + +# We manage *chunks* of memory. Each chunk is a multiple of the page size. +# Each chunk starts at an address that is divisible by the page size. Chunks +# that are bigger than ``ChunkOsReturn`` are returned back to the operating +# system immediately. + +const + ChunkOsReturn = 256 * PageSize + InitialMemoryRequest = ChunkOsReturn div 2 # < ChunkOsReturn! + SmallChunkSize = PageSize + +type + PTrunk = ptr TTrunk + TTrunk {.final.} = object + next: PTrunk # all nodes are connected with this pointer + key: int # start address at bit 0 + bits: array[0..IntsPerTrunk-1, int] # a bit vector + + TTrunkBuckets = array[0..1023, PTrunk] + TIntSet {.final.} = object + data: TTrunkBuckets + +type + TAlignType = biggestFloat + TFreeCell {.final, pure.} = object + next: ptr TFreeCell # next free cell in chunk (overlaid with refcount) + zeroField: int # 0 means cell is not used (overlaid with typ field) + # 1 means cell is manually managed pointer + + PChunk = ptr TBaseChunk + PBigChunk = ptr TBigChunk + PSmallChunk = ptr TSmallChunk + TBaseChunk {.pure.} = object + prevSize: int # size of previous chunk; for coalescing + size: int # if < PageSize it is a small chunk + used: bool # later will be optimized into prevSize... + + TSmallChunk = object of TBaseChunk + next, prev: PSmallChunk # chunks of the same size + freeList: ptr TFreeCell + free: int # how many bytes remain + acc: int # accumulator for small object allocation + data: TAlignType # start of usable memory + + TBigChunk = object of TBaseChunk # not necessarily > PageSize! + next: PBigChunk # chunks of the same (or bigger) size + prev: PBigChunk + align: int + data: TAlignType # start of usable memory + +template smallChunkOverhead(): expr = sizeof(TSmallChunk)-sizeof(TAlignType) +template bigChunkOverhead(): expr = sizeof(TBigChunk)-sizeof(TAlignType) + +proc roundup(x, v: int): int {.inline.} = + result = (x + (v-1)) and not (v-1) + assert(result >= x) + #return ((-x) and (v-1)) +% x + +assert(roundup(14, PageSize) == PageSize) +assert(roundup(15, 8) == 16) +assert(roundup(65, 8) == 72) + +# ------------- chunk table --------------------------------------------------- +# We use a PtrSet of chunk starts and a table[Page, chunksize] for chunk +# endings of big chunks. This is needed by the merging operation. The only +# remaining operation is best-fit for big chunks. Since there is a size-limit +# for big chunks (because greater than the limit means they are returned back +# to the OS), a fixed size array can be used. + +type + PLLChunk = ptr TLLChunk + TLLChunk {.pure.} = object ## *low-level* chunk + size: int # remaining size + acc: int # accumulator + + TAllocator {.final, pure.} = object + llmem: PLLChunk + currMem, maxMem, freeMem: int # memory sizes (allocated from OS) + freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk] + freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access + chunkStarts: TIntSet + +proc incCurrMem(a: var TAllocator, bytes: int) {.inline.} = + inc(a.currMem, bytes) + +proc decCurrMem(a: var TAllocator, bytes: int) {.inline.} = + a.maxMem = max(a.maxMem, a.currMem) + dec(a.currMem, bytes) + +proc getMaxMem(a: var TAllocator): int = + # Since we update maxPagesCount only when freeing pages, + # maxPagesCount may not be up to date. Thus we use the + # maximum of these both values here: + return max(a.currMem, a.maxMem) + +var + allocator: TAllocator + +proc llAlloc(a: var TAllocator, size: int): pointer = + # *low-level* alloc for the memory managers data structures. Deallocation + # is never done. + if a.llmem == nil or size > a.llmem.size: + var request = roundup(size+sizeof(TLLChunk), PageSize) + a.llmem = cast[PLLChunk](osAllocPages(request)) + incCurrMem(a, request) + a.llmem.size = request - sizeof(TLLChunk) + a.llmem.acc = sizeof(TLLChunk) + result = cast[pointer](cast[TAddress](a.llmem) + a.llmem.acc) + dec(a.llmem.size, size) + inc(a.llmem.acc, size) + zeroMem(result, size) + +proc IntSetGet(t: TIntSet, key: int): PTrunk = + var it = t.data[key and high(t.data)] + while it != nil: + if it.key == key: return it + it = it.next + result = nil + +proc IntSetPut(t: var TIntSet, key: int): PTrunk = + result = IntSetGet(t, key) + if result == nil: + result = cast[PTrunk](llAlloc(allocator, sizeof(result^))) + result.next = t.data[key and high(t.data)] + t.data[key and high(t.data)] = result + result.key = key + +proc Contains(s: TIntSet, key: int): bool = + var t = IntSetGet(s, key shr TrunkShift) + if t != nil: + var u = key and TrunkMask + result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0 + else: + result = false + +proc Incl(s: var TIntSet, key: int) = + var t = IntSetPut(s, key shr TrunkShift) + var u = key and TrunkMask + t.bits[u shr IntShift] = t.bits[u shr IntShift] or (1 shl (u and IntMask)) + +proc Excl(s: var TIntSet, key: int) = + var t = IntSetGet(s, key shr TrunkShift) + if t != nil: + var u = key and TrunkMask + t.bits[u shr IntShift] = t.bits[u shr IntShift] and not + (1 shl (u and IntMask)) + +proc ContainsOrIncl(s: var TIntSet, key: int): bool = + var t = IntSetGet(s, key shr TrunkShift) + if t != nil: + var u = key and TrunkMask + result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0 + if not result: + t.bits[u shr IntShift] = t.bits[u shr IntShift] or + (1 shl (u and IntMask)) + else: + Incl(s, key) + result = false + +# ------------- chunk management ---------------------------------------------- +proc pageIndex(c: PChunk): int {.inline.} = + result = cast[TAddress](c) shr PageShift + +proc pageIndex(p: pointer): int {.inline.} = + result = cast[TAddress](p) shr PageShift + +proc pageAddr(p: pointer): PChunk {.inline.} = + result = cast[PChunk](cast[TAddress](p) and not PageMask) + assert(Contains(allocator.chunkStarts, pageIndex(result))) + +var lastSize = PageSize + +proc requestOsChunks(a: var TAllocator, size: int): PBigChunk = + incCurrMem(a, size) + inc(a.freeMem, size) + result = cast[PBigChunk](osAllocPages(size)) + assert((cast[TAddress](result) and PageMask) == 0) + #zeroMem(result, size) + result.next = nil + result.prev = nil + result.used = false + result.size = size + # update next.prevSize: + var nxt = cast[TAddress](result) +% size + assert((nxt and PageMask) == 0) + var next = cast[PChunk](nxt) + if pageIndex(next) in a.chunkStarts: + #echo("Next already allocated!") + next.prevSize = size + # set result.prevSize: + var prv = cast[TAddress](result) -% lastSize + assert((nxt and PageMask) == 0) + var prev = cast[PChunk](prv) + if pageIndex(prev) in a.chunkStarts and prev.size == lastSize: + #echo("Prev already allocated!") + result.prevSize = lastSize + else: + result.prevSize = 0 # unknown + lastSize = size # for next request + +proc freeOsChunks(a: var TAllocator, p: pointer, size: int) = + # update next.prevSize: + var c = cast[PChunk](p) + var nxt = cast[TAddress](p) +% c.size + assert((nxt and PageMask) == 0) + var next = cast[PChunk](nxt) + if pageIndex(next) in a.chunkStarts: + next.prevSize = 0 # XXX used + excl(a.chunkStarts, pageIndex(p)) + osDeallocPages(p, size) + decCurrMem(a, size) + dec(a.freeMem, size) + #c_fprintf(c_stdout, "[Alloc] back to OS: %ld\n", size) + +proc isAccessible(p: pointer): bool {.inline.} = + result = Contains(allocator.chunkStarts, pageIndex(p)) + +proc contains[T](list, x: T): bool = + var it = list + while it != nil: + if it == x: return true + it = it.next + +proc writeFreeList(a: TAllocator) = + var it = a.freeChunksList + c_fprintf(c_stdout, "freeChunksList: %p\n", it) + while it != nil: + c_fprintf(c_stdout, "it: %p, next: %p, prev: %p\n", + it, it.next, it.prev) + it = it.next + +proc ListAdd[T](head: var T, c: T) {.inline.} = + assert(c notin head) + assert c.prev == nil + assert c.next == nil + c.next = head + if head != nil: + assert head.prev == nil + head.prev = c + head = c + +proc ListRemove[T](head: var T, c: T) {.inline.} = + assert(c in head) + if c == head: + head = c.next + assert c.prev == nil + if head != nil: head.prev = nil + else: + assert c.prev != nil + c.prev.next = c.next + if c.next != nil: c.next.prev = c.prev + c.next = nil + c.prev = nil + +proc isSmallChunk(c: PChunk): bool {.inline.} = + return c.size <= SmallChunkSize-smallChunkOverhead() + #return c.size < SmallChunkSize + +proc chunkUnused(c: PChunk): bool {.inline.} = + result = not c.used + +proc updatePrevSize(a: var TAllocator, c: PBigChunk, + prevSize: int) {.inline.} = + var ri = cast[PChunk](cast[TAddress](c) +% c.size) + assert((cast[TAddress](ri) and PageMask) == 0) + if isAccessible(ri): + ri.prevSize = prevSize + +proc freeBigChunk(a: var TAllocator, c: PBigChunk) = + var c = c + assert(c.size >= PageSize) + inc(a.freeMem, c.size) + when coalescRight: + var ri = cast[PChunk](cast[TAddress](c) +% c.size) + assert((cast[TAddress](ri) and PageMask) == 0) + if isAccessible(ri) and chunkUnused(ri): + assert(not isSmallChunk(ri)) + if not isSmallChunk(ri): + ListRemove(a.freeChunksList, cast[PBigChunk](ri)) + inc(c.size, ri.size) + excl(a.chunkStarts, pageIndex(ri)) + when coalescLeft: + if c.prevSize != 0: + var le = cast[PChunk](cast[TAddress](c) -% c.prevSize) + assert((cast[TAddress](le) and PageMask) == 0) + if isAccessible(le) and chunkUnused(le): + assert(not isSmallChunk(le)) + if not isSmallChunk(le): + ListRemove(a.freeChunksList, cast[PBigChunk](le)) + inc(le.size, c.size) + excl(a.chunkStarts, pageIndex(c)) + c = cast[PBigChunk](le) + + if c.size < ChunkOsReturn: + incl(a.chunkStarts, pageIndex(c)) + updatePrevSize(a, c, c.size) + ListAdd(a.freeChunksList, c) + c.used = false + else: + freeOsChunks(a, c, c.size) + +proc splitChunk(a: var TAllocator, c: PBigChunk, size: int) = + var rest = cast[PBigChunk](cast[TAddress](c) +% size) + if rest in a.freeChunksList: + c_fprintf(c_stdout, "to add: %p\n", rest) + writeFreeList(allocator) + assert false + rest.size = c.size - size + rest.used = false + rest.next = nil + rest.prev = nil + rest.prevSize = size + updatePrevSize(a, c, rest.size) + c.size = size + incl(a.chunkStarts, pageIndex(rest)) + ListAdd(a.freeChunksList, rest) + +proc getBigChunk(a: var TAllocator, size: int): PBigChunk = + # use first fit for now: + assert((size and PageMask) == 0) + assert(size > 0) + result = a.freeChunksList + block search: + while result != nil: + #if not chunkUnused(result): + # c_fprintf(c_stdout, "%lld\n", int(result.used)) + assert chunkUnused(result) + if result.size == size: + ListRemove(a.freeChunksList, result) + break search + elif result.size > size: + #c_fprintf(c_stdout, "res size: %lld; size: %lld\n", result.size, size) + ListRemove(a.freeChunksList, result) + splitChunk(a, result, size) + break search + result = result.next + assert result != a.freeChunksList + if size < InitialMemoryRequest: + result = requestOsChunks(a, InitialMemoryRequest) + splitChunk(a, result, size) + else: + result = requestOsChunks(a, size) + result.prevSize = 0 # XXX why is this needed? + result.used = true + incl(a.chunkStarts, pageIndex(result)) + dec(a.freeMem, size) + +proc getSmallChunk(a: var TAllocator): PSmallChunk = + var res = getBigChunk(a, PageSize) + assert res.prev == nil + assert res.next == nil + result = cast[PSmallChunk](res) + +# ----------------------------------------------------------------------------- + +proc getCellSize(p: pointer): int {.inline.} = + var c = pageAddr(p) + result = c.size + +proc rawAlloc(a: var TAllocator, requestedSize: int): pointer = + assert(roundup(65, 8) == 72) + assert requestedSize >= sizeof(TFreeCell) + var size = roundup(requestedSize, MemAlign) + #c_fprintf(c_stdout, "alloc; size: %ld; %ld\n", requestedSize, size) + if size <= SmallChunkSize-smallChunkOverhead(): + # allocate a small block: for small chunks, we use only its next pointer + var s = size div MemAlign + var c = a.freeSmallChunks[s] + if c == nil: + c = getSmallChunk(a) + c.freeList = nil + assert c.size == PageSize + c.size = size + c.acc = size + c.free = SmallChunkSize - smallChunkOverhead() - size + c.next = nil + c.prev = nil + ListAdd(a.freeSmallChunks[s], c) + result = addr(c.data) + assert((cast[TAddress](result) and (MemAlign-1)) == 0) + else: + assert c.next != c + #if c.size != size: + # c_fprintf(c_stdout, "csize: %lld; size %lld\n", c.size, size) + assert c.size == size + if c.freeList == nil: + assert(c.acc + smallChunkOverhead() + size <= SmallChunkSize) + result = cast[pointer](cast[TAddress](addr(c.data)) +% c.acc) + inc(c.acc, size) + else: + result = c.freeList + assert(c.freeList.zeroField == 0) + c.freeList = c.freeList.next + dec(c.free, size) + assert((cast[TAddress](result) and (MemAlign-1)) == 0) + if c.free < size: + ListRemove(a.freeSmallChunks[s], c) + else: + size = roundup(requestedSize+bigChunkOverhead(), PageSize) + # allocate a large block + var c = getBigChunk(a, size) + assert c.prev == nil + assert c.next == nil + assert c.size == size + result = addr(c.data) + assert((cast[TAddress](result) and (MemAlign-1)) == 0) + assert(isAccessible(result)) + +proc rawDealloc(a: var TAllocator, p: pointer) = + var c = pageAddr(p) + if isSmallChunk(c): + # `p` is within a small chunk: + var c = cast[PSmallChunk](c) + var s = c.size + var f = cast[ptr TFreeCell](p) + #echo("setting to nil: ", $cast[TAddress](addr(f.zeroField))) + assert(f.zeroField != 0) + f.zeroField = 0 + f.next = c.freeList + c.freeList = f + when overwriteFree: + # set to 0xff to check for usage after free bugs: + c_memset(cast[pointer](cast[int](p) +% sizeof(TFreeCell)), -1'i32, + s -% sizeof(TFreeCell)) + # check if it is not in the freeSmallChunks[s] list: + if c.free < s: + assert c notin a.freeSmallChunks[s div memAlign] + # add it to the freeSmallChunks[s] array: + ListAdd(a.freeSmallChunks[s div memAlign], c) + inc(c.free, s) + else: + inc(c.free, s) + if c.free == SmallChunkSize-smallChunkOverhead(): + ListRemove(a.freeSmallChunks[s div memAlign], c) + c.size = SmallChunkSize + freeBigChunk(a, cast[PBigChunk](c)) + else: + # set to 0xff to check for usage after free bugs: + when overwriteFree: c_memset(p, -1'i32, c.size -% bigChunkOverhead()) + # free big chunk + freeBigChunk(a, cast[PBigChunk](c)) + +proc isAllocatedPtr(a: TAllocator, p: pointer): bool = + if isAccessible(p): + var c = pageAddr(p) + if not chunkUnused(c): + if isSmallChunk(c): + var c = cast[PSmallChunk](c) + var offset = (cast[TAddress](p) and (PageSize-1)) -% + smallChunkOverhead() + result = (c.acc >% offset) and (offset %% c.size == 0) and + (cast[ptr TFreeCell](p).zeroField >% 1) + else: + var c = cast[PBigChunk](c) + result = p == addr(c.data) and cast[ptr TFreeCell](p).zeroField >% 1 + +# ---------------------- interface to programs ------------------------------- + +proc alloc(size: int): pointer = + result = rawAlloc(allocator, size+sizeof(TFreeCell)) + cast[ptr TFreeCell](result).zeroField = 1 # mark it as used + assert(not isAllocatedPtr(allocator, result)) + result = cast[pointer](cast[TAddress](result) +% sizeof(TFreeCell)) + +proc alloc0(size: int): pointer = + result = alloc(size) + zeroMem(result, size) + +proc dealloc(p: pointer) = + var x = cast[pointer](cast[TAddress](p) -% sizeof(TFreeCell)) + assert(cast[ptr TFreeCell](x).zeroField == 1) + rawDealloc(allocator, x) + assert(not isAllocatedPtr(allocator, x)) + +proc ptrSize(p: pointer): int = + var x = cast[pointer](cast[TAddress](p) -% sizeof(TFreeCell)) + result = pageAddr(x).size - sizeof(TFreeCell) + +proc realloc(p: pointer, newsize: int): pointer = + if newsize > 0: + result = alloc(newsize) + if p != nil: + copyMem(result, p, ptrSize(p)) + dealloc(p) + elif p != nil: + dealloc(p) + +proc countFreeMem(): int = + # only used for assertions + var it = allocator.freeChunksList + while it != nil: + inc(result, it.size) + it = it.next + +proc getFreeMem(): int = + result = allocator.freeMem + #assert(result == countFreeMem()) + +proc getTotalMem(): int = return allocator.currMem +proc getOccupiedMem(): int = return getTotalMem() - getFreeMem() + +when isMainModule: + const iterations = 4000_000 + incl(allocator.chunkStarts, 11) + assert 11 in allocator.chunkStarts + excl(allocator.chunkStarts, 11) + assert 11 notin allocator.chunkStarts + var p: array [1..iterations, pointer] + for i in 7..7: + var x = i * 8 + for j in 1.. iterations: + p[j] = alloc(allocator, x) + for j in 1..iterations: + assert isAllocatedPtr(allocator, p[j]) + echo($i, " used memory: ", $(allocator.currMem)) + for j in countdown(iterations, 1): + #echo("j: ", $j) + dealloc(allocator, p[j]) + assert(not isAllocatedPtr(allocator, p[j])) + echo($i, " after freeing: ", $(allocator.currMem)) + diff --git a/nimlib/system/ansi_c.nim b/nimlib/system/ansi_c.nim new file mode 100755 index 000000000..e9300949b --- /dev/null +++ b/nimlib/system/ansi_c.nim @@ -0,0 +1,105 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# This include file contains headers of Ansi C procs +# and definitions of Ansi C types in Nimrod syntax +# All symbols are prefixed with 'c_' to avoid ambiguities + +{.push hints:off} + +proc c_strcmp(a, b: CString): cint {.nodecl, noSideEffect, importc: "strcmp".} +proc c_memcmp(a, b: CString, size: cint): cint {. + nodecl, noSideEffect, importc: "memcmp".} +proc c_memcpy(a, b: CString, size: cint) {.nodecl, importc: "memcpy".} +proc c_strlen(a: CString): int {.nodecl, noSideEffect, importc: "strlen".} +proc c_memset(p: pointer, value: cint, size: int) {.nodecl, importc: "memset".} + +type + C_TextFile {.importc: "FILE", nodecl, final.} = object # empty record for + # data hiding + C_BinaryFile {.importc: "FILE", nodecl, final.} = object + C_TextFileStar = ptr CTextFile + C_BinaryFileStar = ptr CBinaryFile + + C_JmpBuf {.importc: "jmp_buf".} = array[0..31, int] + +var + c_stdin {.importc: "stdin", noDecl.}: C_TextFileStar + c_stdout {.importc: "stdout", noDecl.}: C_TextFileStar + c_stderr {.importc: "stderr", noDecl.}: C_TextFileStar + +var # constants faked as variables: + SIGINT {.importc: "SIGINT", nodecl.}: cint + SIGSEGV {.importc: "SIGSEGV", nodecl.}: cint + SIGABRT {.importc: "SIGABRT", nodecl.}: cint + SIGFPE {.importc: "SIGFPE", nodecl.}: cint + SIGILL {.importc: "SIGILL", nodecl.}: cint + +when defined(macosx): + var + SIGBUS {.importc: "SIGBUS", nodecl.}: cint + # hopefully this does not lead to new bugs +else: + var + SIGBUS {.importc: "SIGSEGV", nodecl.}: cint + # only Mac OS X has this shit + +proc c_longjmp(jmpb: C_JmpBuf, retval: cint) {.nodecl, importc: "longjmp".} +proc c_setjmp(jmpb: var C_JmpBuf): cint {.nodecl, importc: "setjmp".} + +proc c_signal(sig: cint, handler: proc (a: cint) {.noconv.}) {. + importc: "signal", header: "<signal.h>".} +proc c_raise(sig: cint) {.importc: "raise", header: "<signal.h>".} + +proc c_fputs(c: cstring, f: C_TextFileStar) {.importc: "fputs", noDecl.} +proc c_fgets(c: cstring, n: int, f: C_TextFileStar): cstring {. + importc: "fgets", noDecl.} +proc c_fgetc(stream: C_TextFileStar): int {.importc: "fgetc", nodecl.} +proc c_ungetc(c: int, f: C_TextFileStar) {.importc: "ungetc", nodecl.} +proc c_putc(c: Char, stream: C_TextFileStar) {.importc: "putc", nodecl.} +proc c_fprintf(f: C_TextFileStar, frmt: CString) {. + importc: "fprintf", nodecl, varargs.} + +proc c_fopen(filename, mode: cstring): C_TextFileStar {. + importc: "fopen", nodecl.} +proc c_fclose(f: C_TextFileStar) {.importc: "fclose", nodecl.} + +proc c_sprintf(buf, frmt: CString) {.nodecl, importc: "sprintf", varargs.} + # we use it only in a way that cannot lead to security issues + +proc c_fread(buf: Pointer, size, n: int, f: C_BinaryFileStar): int {. + importc: "fread", noDecl.} +proc c_fseek(f: C_BinaryFileStar, offset: clong, whence: int): int {. + importc: "fseek", noDecl.} + +proc c_fwrite(buf: Pointer, size, n: int, f: C_BinaryFileStar): int {. + importc: "fwrite", noDecl.} + +proc c_exit(errorcode: cint) {.importc: "exit", nodecl.} +proc c_ferror(stream: C_TextFileStar): bool {.importc: "ferror", nodecl.} +proc c_fflush(stream: C_TextFileStar) {.importc: "fflush", nodecl.} +proc c_abort() {.importc: "abort", nodecl.} +proc c_feof(stream: C_TextFileStar): bool {.importc: "feof", nodecl.} + +proc c_malloc(size: int): pointer {.importc: "malloc", nodecl.} +proc c_free(p: pointer) {.importc: "free", nodecl.} +proc c_realloc(p: pointer, newsize: int): pointer {.importc: "realloc", nodecl.} + +var errno {.importc, header: "<errno.h>".}: cint ## error variable +proc strerror(errnum: cint): cstring {.importc, header: "<string.h>".} + +proc c_remove(filename: CString): cint {.importc: "remove", noDecl.} +proc c_rename(oldname, newname: CString): cint {.importc: "rename", noDecl.} + +proc c_system(cmd: CString): cint {.importc: "system", header: "<stdlib.h>".} +proc c_getenv(env: CString): CString {.importc: "getenv", noDecl.} +proc c_putenv(env: CString): cint {.importc: "putenv", noDecl.} + +{.pop} + diff --git a/nimlib/system/arithm.nim b/nimlib/system/arithm.nim new file mode 100755 index 000000000..f097ee794 --- /dev/null +++ b/nimlib/system/arithm.nim @@ -0,0 +1,316 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + + +# simple integer arithmetic with overflow checking + +proc raiseOverflow {.compilerproc, noinline, noreturn.} = + # a single proc to reduce code size to a minimum + raise newException(EOverflow, "over- or underflow") + +proc raiseDivByZero {.compilerproc, noinline, noreturn.} = + raise newException(EDivByZero, "divison by zero") + +proc addInt64(a, b: int64): int64 {.compilerProc, inline.} = + result = a +% b + if (result xor a) >= int64(0) or (result xor b) >= int64(0): + return result + raiseOverflow() + +proc subInt64(a, b: int64): int64 {.compilerProc, inline.} = + result = a -% b + if (result xor a) >= int64(0) or (result xor not b) >= int64(0): + return result + raiseOverflow() + +proc negInt64(a: int64): int64 {.compilerProc, inline.} = + if a != low(int64): return -a + raiseOverflow() + +proc absInt64(a: int64): int64 {.compilerProc, inline.} = + if a != low(int64): + if a >= 0: return a + else: return -a + raiseOverflow() + +proc divInt64(a, b: int64): int64 {.compilerProc, inline.} = + if b == int64(0): + raiseDivByZero() + if a == low(int64) and b == int64(-1): + raiseOverflow() + return a div b + +proc modInt64(a, b: int64): int64 {.compilerProc, inline.} = + if b == int64(0): + raiseDivByZero() + return a mod b + +# +# This code has been inspired by Python's source code. +# The native int product x*y is either exactly right or *way* off, being +# just the last n bits of the true product, where n is the number of bits +# in an int (the delivered product is the true product plus i*2**n for +# some integer i). +# +# The native float64 product x*y is subject to three +# rounding errors: on a sizeof(int)==8 box, each cast to double can lose +# info, and even on a sizeof(int)==4 box, the multiplication can lose info. +# But, unlike the native int product, it's not in *range* trouble: even +# if sizeof(int)==32 (256-bit ints), the product easily fits in the +# dynamic range of a float64. So the leading 50 (or so) bits of the float64 +# product are correct. +# +# We check these two ways against each other, and declare victory if they're +# approximately the same. Else, because the native int product is the only +# one that can lose catastrophic amounts of information, it's the native int +# product that must have overflowed. +# +proc mulInt64(a, b: int64): int64 {.compilerproc.} = + var + resAsFloat, floatProd: float64 + result = a *% b + floatProd = toBiggestFloat(a) # conversion + floatProd = floatProd * toBiggestFloat(b) + resAsFloat = toBiggestFloat(result) + + # Fast path for normal case: small multiplicands, and no info + # is lost in either method. + if resAsFloat == floatProd: return result + + # Somebody somewhere lost info. Close enough, or way off? Note + # that a != 0 and b != 0 (else resAsFloat == floatProd == 0). + # The difference either is or isn't significant compared to the + # true value (of which floatProd is a good approximation). + + # abs(diff)/abs(prod) <= 1/32 iff + # 32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough" + if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd): + return result + raiseOverflow() + + +proc absInt(a: int): int {.compilerProc, inline.} = + if a != low(int): + if a >= 0: return a + else: return -a + raiseOverflow() + +const + asmVersion = defined(I386) and (defined(vcc) or defined(wcc) or + defined(dmc) or defined(gcc) or defined(llvm_gcc)) + # my Version of Borland C++Builder does not have + # tasm32, which is needed for assembler blocks + # this is why Borland is not included in the 'when' + +when asmVersion and not defined(gcc) and not defined(llvm_gcc): + # assembler optimized versions for compilers that + # have an intel syntax assembler: + proc addInt(a, b: int): int {.compilerProc, pure.} = + # a in eax, and b in edx + asm """ + mov eax, `a` + add eax, `b` + jno theEnd + call `raiseOverflow` + theEnd: + """ + + proc subInt(a, b: int): int {.compilerProc, pure.} = + asm """ + mov eax, `a` + sub eax, `b` + jno theEnd + call `raiseOverflow` + theEnd: + """ + + proc negInt(a: int): int {.compilerProc, pure.} = + asm """ + mov eax, `a` + neg eax + jno theEnd + call `raiseOverflow` + theEnd: + """ + + proc divInt(a, b: int): int {.compilerProc, pure.} = + asm """ + mov eax, `a` + mov ecx, `b` + xor edx, edx + idiv ecx + jno theEnd + call `raiseOverflow` + theEnd: + """ + + proc modInt(a, b: int): int {.compilerProc, pure.} = + asm """ + mov eax, `a` + mov ecx, `b` + xor edx, edx + idiv ecx + jno theEnd + call `raiseOverflow` + theEnd: + mov eax, edx + """ + + proc mulInt(a, b: int): int {.compilerProc, pure.} = + asm """ + mov eax, `a` + mov ecx, `b` + xor edx, edx + imul ecx + jno theEnd + call `raiseOverflow` + theEnd: + """ + +elif false: # asmVersion and (defined(gcc) or defined(llvm_gcc)): + proc addInt(a, b: int): int {.compilerProc, inline.} = + # don't use a pure proc here! + asm """ + "addl %%ecx, %%eax\n" + "jno 1\n" + "call _raiseOverflow\n" + "1: \n" + :"=a"(`result`) + :"a"(`a`), "c"(`b`) + """ + + proc subInt(a, b: int): int {.compilerProc, inline.} = + asm """ "subl %%ecx,%%eax\n" + "jno 1\n" + "call _raiseOverflow\n" + "1: \n" + :"=a"(`result`) + :"a"(`a`), "c"(`b`) + """ + + proc mulInt(a, b: int): int {.compilerProc, inline.} = + asm """ "xorl %%edx, %%edx\n" + "imull %%ecx\n" + "jno 1\n" + "call _raiseOverflow\n" + "1: \n" + :"=a"(`result`) + :"a"(`a`), "c"(`b`) + :"%edx" + """ + + proc negInt(a: int): int {.compilerProc, inline.} = + asm """ "negl %%eax\n" + "jno 1\n" + "call _raiseOverflow\n" + "1: \n" + :"=a"(`result`) + :"a"(`a`) + """ + + proc divInt(a, b: int): int {.compilerProc, inline.} = + asm """ "xorl %%edx, %%edx\n" + "idivl %%ecx\n" + "jno 1\n" + "call _raiseOverflow\n" + "1: \n" + :"=a"(`result`) + :"a"(`a`), "c"(`b`) + :"%edx" + """ + + proc modInt(a, b: int): int {.compilerProc, inline.} = + asm """ "xorl %%edx, %%edx\n" + "idivl %%ecx\n" + "jno 1\n" + "call _raiseOverflow\n" + "1: \n" + "movl %%edx, %%eax" + :"=a"(`result`) + :"a"(`a`), "c"(`b`) + :"%edx" + """ + +# Platform independant versions of the above (slower!) +when not defined(addInt): + proc addInt(a, b: int): int {.compilerProc, inline.} = + result = a +% b + if (result xor a) >= 0 or (result xor b) >= 0: + return result + raiseOverflow() + +when not defined(subInt): + proc subInt(a, b: int): int {.compilerProc, inline.} = + result = a -% b + if (result xor a) >= 0 or (result xor not b) >= 0: + return result + raiseOverflow() + +when not defined(negInt): + proc negInt(a: int): int {.compilerProc, inline.} = + if a != low(int): return -a + raiseOverflow() + +when not defined(divInt): + proc divInt(a, b: int): int {.compilerProc, inline.} = + if b == 0: + raiseDivByZero() + if a == low(int) and b == -1: + raiseOverflow() + return a div b + +when not defined(modInt): + proc modInt(a, b: int): int {.compilerProc, inline.} = + if b == 0: + raiseDivByZero() + return a mod b + +when not defined(mulInt): + # + # This code has been inspired by Python's source code. + # The native int product x*y is either exactly right or *way* off, being + # just the last n bits of the true product, where n is the number of bits + # in an int (the delivered product is the true product plus i*2**n for + # some integer i). + # + # The native float64 product x*y is subject to three + # rounding errors: on a sizeof(int)==8 box, each cast to double can lose + # info, and even on a sizeof(int)==4 box, the multiplication can lose info. + # But, unlike the native int product, it's not in *range* trouble: even + # if sizeof(int)==32 (256-bit ints), the product easily fits in the + # dynamic range of a float64. So the leading 50 (or so) bits of the float64 + # product are correct. + # + # We check these two ways against each other, and declare victory if + # they're approximately the same. Else, because the native int product is + # the only one that can lose catastrophic amounts of information, it's the + # native int product that must have overflowed. + # + proc mulInt(a, b: int): int {.compilerProc.} = + var + resAsFloat, floatProd: float + + result = a *% b + floatProd = toFloat(a) * toFloat(b) + resAsFloat = toFloat(result) + + # Fast path for normal case: small multiplicands, and no info + # is lost in either method. + if resAsFloat == floatProd: return result + + # Somebody somewhere lost info. Close enough, or way off? Note + # that a != 0 and b != 0 (else resAsFloat == floatProd == 0). + # The difference either is or isn't significant compared to the + # true value (of which floatProd is a good approximation). + + # abs(diff)/abs(prod) <= 1/32 iff + # 32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough" + if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd): + return result + raiseOverflow() diff --git a/nimlib/system/assign.nim b/nimlib/system/assign.nim new file mode 100755 index 000000000..44d2e5c64 --- /dev/null +++ b/nimlib/system/assign.nim @@ -0,0 +1,120 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +#when defined(debugGC): +# {.define: logAssign.} +proc genericAssign(dest, src: Pointer, mt: PNimType) {.compilerProc.} +proc genericAssignAux(dest, src: Pointer, n: ptr TNimNode) = + var + d = cast[TAddress](dest) + s = cast[TAddress](src) + case n.kind + of nkNone: assert(false) + of nkSlot: + genericAssign(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset), + n.typ) + of nkList: + for i in 0..n.len-1: + genericAssignAux(dest, src, n.sons[i]) + of nkCase: + copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset), + n.typ.size) + var m = selectBranch(src, n) + if m != nil: genericAssignAux(dest, src, m) + +proc genericAssign(dest, src: Pointer, mt: PNimType) = + var + d = cast[TAddress](dest) + s = cast[TAddress](src) + + assert(mt != nil) + case mt.Kind + of tySequence: + var s2 = cast[ppointer](src)^ + var seq = cast[PGenericSeq](s2) + if s2 == nil: # this can happen! nil sequences are allowed + var x = cast[ppointer](dest) + x^ = nil + return + assert(dest != nil) + unsureAsgnRef(cast[ppointer](dest), + newObj(mt, seq.len * mt.base.size + GenericSeqSize)) + var dst = cast[taddress](cast[ppointer](dest)^) + for i in 0..seq.len-1: + genericAssign( + cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize), + cast[pointer](cast[taddress](s2) +% i *% mt.base.size +% + GenericSeqSize), + mt.Base) + var dstseq = cast[PGenericSeq](dst) + dstseq.len = seq.len + dstseq.space = seq.len + of tyObject, tyTuple, tyPureObject: + # we don't need to copy m_type field for tyObject, as they are equal anyway + genericAssignAux(dest, src, mt.node) + of tyArray, tyArrayConstr: + for i in 0..(mt.size div mt.base.size)-1: + genericAssign(cast[pointer](d +% i*% mt.base.size), + cast[pointer](s +% i*% mt.base.size), mt.base) + of tyString: # a leaf + var s2 = cast[ppointer](s)^ + if s2 != nil: # nil strings are possible! + unsureAsgnRef(cast[ppointer](dest), copyString(cast[NimString](s2))) + else: + var x = cast[ppointer](dest) + x^ = nil + return + of tyRef: # BUGFIX: a long time this has been forgotten! + unsureAsgnRef(cast[ppointer](dest), cast[ppointer](s)^) + else: + copyMem(dest, src, mt.size) # copy raw bits + +proc genericSeqAssign(dest, src: Pointer, mt: PNimType) {.compilerProc.} = + var src = src # ugly, but I like to stress the parser sometimes :-) + genericAssign(dest, addr(src), mt) + +proc genericAssignOpenArray(dest, src: pointer, len: int, + mt: PNimType) {.compilerproc.} = + var + d = cast[TAddress](dest) + s = cast[TAddress](src) + for i in 0..len-1: + genericAssign(cast[pointer](d +% i*% mt.base.size), + cast[pointer](s +% i*% mt.base.size), mt.base) + +proc objectInit(dest: Pointer, typ: PNimType) {.compilerProc.} +proc objectInitAux(dest: Pointer, n: ptr TNimNode) = + var d = cast[TAddress](dest) + case n.kind + of nkNone: assert(false) + of nkSLot: objectInit(cast[pointer](d +% n.offset), n.typ) + of nkList: + for i in 0..n.len-1: + objectInitAux(dest, n.sons[i]) + of nkCase: + var m = selectBranch(dest, n) + if m != nil: objectInitAux(dest, m) + +proc objectInit(dest: Pointer, typ: PNimType) = + # the generic init proc that takes care of initialization of complex + # objects on the stack or heap + var d = cast[TAddress](dest) + case typ.kind + of tyObject: + # iterate over any structural type + # here we have to init the type field: + var pint = cast[ptr PNimType](dest) + pint^ = typ + objectInitAux(dest, typ.node) + of tyTuple, tyPureObject: + objectInitAux(dest, typ.node) + of tyArray, tyArrayConstr: + for i in 0..(typ.size div typ.base.size)-1: + objectInit(cast[pointer](d +% i * typ.base.size), typ.base) + else: nil # nothing to do diff --git a/nimlib/system/cellsets.nim b/nimlib/system/cellsets.nim new file mode 100755 index 000000000..0ce83864c --- /dev/null +++ b/nimlib/system/cellsets.nim @@ -0,0 +1,196 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# Efficient set of pointers for the GC (and repr) + +type + TCell {.pure.} = object + refcount: int # the refcount and some flags + typ: PNimType + when debugGC: + filename: cstring + line: int + + PCell = ptr TCell + + PPageDesc = ptr TPageDesc + TBitIndex = range[0..UnitsPerPage-1] + TPageDesc {.final, pure.} = object + next: PPageDesc # all nodes are connected with this pointer + key: TAddress # start address at bit 0 + bits: array[TBitIndex, int] # a bit vector + + PPageDescArray = ptr array[0..1000_000, PPageDesc] + TCellSet {.final, pure.} = object + counter, max: int + head: PPageDesc + data: PPageDescArray + + PCellArray = ptr array[0..100_000_000, PCell] + TCellSeq {.final, pure.} = object + len, cap: int + d: PCellArray + +# ------------------- cell set handling --------------------------------------- + +proc contains(s: TCellSeq, c: PCell): bool {.inline.} = + for i in 0 .. s.len-1: + if s.d[i] == c: return True + return False + +proc add(s: var TCellSeq, c: PCell) {.inline.} = + if s.len >= s.cap: + s.cap = s.cap * 3 div 2 + var d = cast[PCellArray](alloc(s.cap * sizeof(PCell))) + copyMem(d, s.d, s.len * sizeof(PCell)) + dealloc(s.d) + s.d = d + # XXX: realloc? + s.d[s.len] = c + inc(s.len) + +proc init(s: var TCellSeq, cap: int = 1024) = + s.len = 0 + s.cap = cap + s.d = cast[PCellArray](alloc0(cap * sizeof(PCell))) + +proc deinit(s: var TCellSeq) = + dealloc(s.d) + s.d = nil + s.len = 0 + s.cap = 0 + +const + InitCellSetSize = 1024 # must be a power of two! + +proc Init(s: var TCellSet) = + s.data = cast[PPageDescArray](alloc0(InitCellSetSize * sizeof(PPageDesc))) + s.max = InitCellSetSize-1 + s.counter = 0 + s.head = nil + +proc Deinit(s: var TCellSet) = + var it = s.head + while it != nil: + var n = it.next + dealloc(it) + it = n + s.head = nil # play it safe here + dealloc(s.data) + s.data = nil + s.counter = 0 + +proc nextTry(h, maxHash: int): int {.inline.} = + result = ((5*h) + 1) and maxHash + # For any initial h in range(maxHash), repeating that maxHash times + # generates each int in range(maxHash) exactly once (see any text on + # random-number generation for proof). + +proc CellSetGet(t: TCellSet, key: TAddress): PPageDesc = + var h = cast[int](key) and t.max + while t.data[h] != nil: + if t.data[h].key == key: return t.data[h] + h = nextTry(h, t.max) + return nil + +proc CellSetRawInsert(t: TCellSet, data: PPageDescArray, desc: PPageDesc) = + var h = cast[int](desc.key) and t.max + while data[h] != nil: + assert(data[h] != desc) + h = nextTry(h, t.max) + assert(data[h] == nil) + data[h] = desc + +proc CellSetEnlarge(t: var TCellSet) = + var oldMax = t.max + t.max = ((t.max+1)*2)-1 + var n = cast[PPageDescArray](alloc0((t.max + 1) * sizeof(PPageDesc))) + for i in 0 .. oldmax: + if t.data[i] != nil: + CellSetRawInsert(t, n, t.data[i]) + dealloc(t.data) + t.data = n + +proc CellSetPut(t: var TCellSet, key: TAddress): PPageDesc = + var h = cast[int](key) and t.max + while true: + var x = t.data[h] + if x == nil: break + if x.key == key: return x + h = nextTry(h, t.max) + + if ((t.max+1)*2 < t.counter*3) or ((t.max+1)-t.counter < 4): + CellSetEnlarge(t) + inc(t.counter) + h = cast[int](key) and t.max + while t.data[h] != nil: h = nextTry(h, t.max) + assert(t.data[h] == nil) + # the new page descriptor goes into result + result = cast[PPageDesc](alloc0(sizeof(TPageDesc))) + result.next = t.head + result.key = key + t.head = result + t.data[h] = result + +# ---------- slightly higher level procs -------------------------------------- + +proc contains(s: TCellSet, cell: PCell): bool = + var u = cast[TAddress](cell) + var t = CellSetGet(s, u shr PageShift) + if t != nil: + u = (u %% PageSize) /% MemAlign + result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0 + else: + result = false + +proc incl(s: var TCellSet, cell: PCell) {.noinline.} = + var u = cast[TAddress](cell) + var t = CellSetPut(s, u shr PageShift) + u = (u %% PageSize) /% MemAlign + t.bits[u shr IntShift] = t.bits[u shr IntShift] or (1 shl (u and IntMask)) + +proc excl(s: var TCellSet, cell: PCell) = + var u = cast[TAddress](cell) + var t = CellSetGet(s, u shr PageShift) + if t != nil: + u = (u %% PageSize) /% MemAlign + t.bits[u shr IntShift] = (t.bits[u shr IntShift] and + not (1 shl (u and IntMask))) + +proc containsOrIncl(s: var TCellSet, cell: PCell): bool = + var u = cast[TAddress](cell) + var t = CellSetGet(s, u shr PageShift) + if t != nil: + u = (u %% PageSize) /% MemAlign + result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0 + if not result: + t.bits[u shr IntShift] = t.bits[u shr IntShift] or + (1 shl (u and IntMask)) + else: + Incl(s, cell) + result = false + +iterator elements(t: TCellSet): PCell {.inline.} = + # while traversing it is forbidden to add pointers to the tree! + var r = t.head + while r != nil: + var i = 0 + while i <= high(r.bits): + var w = r.bits[i] # taking a copy of r.bits[i] here is correct, because + # modifying operations are not allowed during traversation + var j = 0 + while w != 0: # test all remaining bits for zero + if (w and 1) != 0: # the bit is set! + yield cast[PCell]((r.key shl PageShift) or + (i shl IntShift +% j) *% MemAlign) + inc(j) + w = w shr 1 + inc(i) + r = r.next + diff --git a/nimlib/system/cntbits.nim b/nimlib/system/cntbits.nim new file mode 100755 index 000000000..281b96dd0 --- /dev/null +++ b/nimlib/system/cntbits.nim @@ -0,0 +1,12 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2006 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + + + + diff --git a/nimlib/system/debugger.nim b/nimlib/system/debugger.nim new file mode 100755 index 000000000..01d8bd8a2 --- /dev/null +++ b/nimlib/system/debugger.nim @@ -0,0 +1,500 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# This file implements the embedded debugger that can be linked +# with the application. We should not use dynamic memory here as that +# would interfere with the GC and trigger ON/OFF errors if the +# user program corrupts memory. Unfortunately, for dispaying +# variables we use the ``system.repr()`` proc which uses Nimrod +# strings and thus allocates memory from the heap. Pity, but +# I do not want to implement ``repr()`` twice. We also cannot deactivate +# the GC here as that might run out of memory too quickly... + +type + TDbgState = enum + dbOff, # debugger is turned off + dbStepInto, # debugger is in tracing mode + dbStepOver, + dbSkipCurrent, + dbQuiting, # debugger wants to quit + dbBreakpoints # debugger is only interested in breakpoints + + TDbgBreakpoint {.final.} = object + low, high: int # range from low to high; if disabled + # both low and high are set to their negative values + # this makes the check faster and safes memory + filename: string + name: string # name of breakpoint + + TVarSlot {.compilerproc, final.} = object # variable slots used for debugger: + address: pointer + typ: PNimType + name: cstring # for globals this is "module.name" + + PExtendedFrame = ptr TExtendedFrame + TExtendedFrame {.final.} = object # If the debugger is enabled the compiler + # provides an extended frame. Of course + # only slots that are + # needed are allocated and not 10_000, + # except for the global data description. + f: TFrame + slots: array[0..10_000, TVarSlot] + +var + dbgInSignal: bool # wether the debugger is in the signal handler + dbgIn: TFile # debugger input stream + dbgUser: string = "s" # buffer for user input; first command is ``step_into`` + # needs to be global cause we store the last command + # in it + dbgState: TDbgState = dbStepInto # state of debugger + dbgBP: array[0..127, TDbgBreakpoint] # breakpoints + dbgBPlen: int = 0 + + dbgSkipToFrame: PFrame # frame to be skipped to + + dbgGlobalData: TExtendedFrame # this reserves much space, but + # for now it is the most practical way + + maxDisplayRecDepth: int = 5 # do not display too much data! + +proc findBreakpoint(name: string): int = + # returns -1 if not found + for i in countdown(dbgBPlen-1, 0): + if name == dbgBP[i].name: return i + return -1 + +proc ListBreakPoints() = + write(stdout, "*** endb| Breakpoints:\n") + for i in 0 .. dbgBPlen-1: + write(stdout, dbgBP[i].name & ": " & $abs(dbgBP[i].low) & ".." & + $abs(dbgBP[i].high) & dbgBP[i].filename) + if dbgBP[i].low < 0: + write(stdout, " [disabled]\n") + else: + write(stdout, "\n") + write(stdout, "***\n") + +proc openAppend(filename: string): TFile = + if open(result, filename, fmAppend): + write(result, "----------------------------------------\n") + +proc dbgRepr(p: pointer, typ: PNimType): string = + var + cl: TReprClosure + initReprClosure(cl) + cl.recDepth = maxDisplayRecDepth + # locks for the GC turned out to be a bad idea... + # inc(recGcLock) + result = "" + reprAux(result, p, typ, cl) + # dec(recGcLock) + deinitReprClosure(cl) + +proc writeVariable(stream: TFile, slot: TVarSlot) = + write(stream, slot.name) + write(stream, " = ") + writeln(stream, dbgRepr(slot.address, slot.typ)) + +proc ListFrame(stream: TFile, f: PExtendedFrame) = + write(stream, "*** endb| Frame (" & $f.f.len & " slots):\n") + for i in 0 .. f.f.len-1: + writeVariable(stream, f.slots[i]) + write(stream, "***\n") + +proc ListVariables(stream: TFile, f: PExtendedFrame) = + write(stream, "*** endb| Frame (" & $f.f.len & " slots):\n") + for i in 0 .. f.f.len-1: + writeln(stream, f.slots[i].name) + write(stream, "***\n") + +proc debugOut(msg: cstring) = + # the *** *** markers are for easy recognition of debugger + # output for external frontends. + write(stdout, "*** endb| ") + write(stdout, msg) + write(stdout, "***\n") + +proc dbgFatal(msg: cstring) = + debugOut(msg) + dbgAborting = True # the debugger wants to abort + quit(1) + +proc findVariable(frame: PExtendedFrame, varname: cstring): int = + for i in 0 .. frame.f.len - 1: + if c_strcmp(frame.slots[i].name, varname) == 0: return i + return -1 + +proc dbgShowCurrentProc(dbgFramePointer: PFrame) = + if dbgFramePointer != nil: + write(stdout, "*** endb| now in proc: ") + write(stdout, dbgFramePointer.procname) + write(stdout, " ***\n") + else: + write(stdout, "*** endb| (procedure name not available) ***\n") + +proc dbgShowExecutionPoint() = + write(stdout, "*** endb| " & $framePtr.filename & "(" & $framePtr.line & + ") " & $framePtr.procname & " ***\n") + +when defined(windows) or defined(dos) or defined(os2): + {.define: FileSystemCaseInsensitive.} + +proc fileMatches(c, bp: cstring): bool = + # bp = breakpoint filename + # c = current filename + # we consider it a match if bp is a suffix of c + # and the character for the suffix does not exist or + # is one of: \ / : + # depending on the OS case does not matter! + var blen: int = c_strlen(bp) + var clen: int = c_strlen(c) + if blen > clen: return false + # check for \ / : + if clen-blen-1 >= 0 and c[clen-blen-1] notin {'\\', '/', ':'}: + return false + var i = 0 + while i < blen: + var x, y: char + x = bp[i] + y = c[i+clen-blen] + when defined(FileSystemCaseInsensitive): + if x >= 'A' and x <= 'Z': x = chr(ord(x) - ord('A') + ord('a')) + if y >= 'A' and y <= 'Z': y = chr(ord(y) - ord('A') + ord('a')) + if x != y: return false + inc(i) + return true + +proc dbgBreakpointReached(line: int): int = + for i in 0..dbgBPlen-1: + if line >= dbgBP[i].low and line <= dbgBP[i].high and + fileMatches(framePtr.filename, dbgBP[i].filename): return i + return -1 + +proc scanAndAppendWord(src: string, a: var string, start: int): int = + result = start + # skip whitespace: + while src[result] in {'\t', ' '}: inc(result) + while True: + case src[result] + of 'a'..'z', '0'..'9': add(a, src[result]) + of '_': nil # just skip it + of 'A'..'Z': add(a, chr(ord(src[result]) - ord('A') + ord('a'))) + else: break + inc(result) + +proc scanWord(src: string, a: var string, start: int): int = + a = "" + result = scanAndAppendWord(src, a, start) + +proc scanFilename(src: string, a: var string, start: int): int = + result = start + a = "" + # skip whitespace: + while src[result] in {'\t', ' '}: inc(result) + while src[result] notin {'\t', ' ', '\0'}: + add(a, src[result]) + inc(result) + +proc scanNumber(src: string, a: var int, start: int): int = + result = start + a = 0 + while src[result] in {'\t', ' '}: inc(result) + while true: + case src[result] + of '0'..'9': a = a * 10 + ord(src[result]) - ord('0') + of '_': nil # skip underscores (nice for long line numbers) + else: break + inc(result) + +proc dbgHelp() = + debugOut(""" +list of commands (see the manual for further help): + GENERAL +h, help display this help message +q, quit quit the debugger and the program +<ENTER> repeat the previous debugger command + EXECUTING +s, step single step, stepping into routine calls +n, next single step, without stepping into routine calls +f, skipcurrent continue execution until the current routine finishes +c, continue continue execution until the next breakpoint +i, ignore continue execution, ignore all breakpoints + BREAKPOINTS +b, break <name> [fromline [toline]] [file] + set a new breakpoint named 'name' for line and file + if line or file are omitted the current one is used +breakpoints display the entire breakpoint list +disable <name> disable a breakpoint +enable <name> enable a breakpoint + DATA DISPLAY +e, eval <expr> evaluate the expression <expr> +o, out <file> <expr> evaluate <expr> and write it to <file> +w, where display the current execution point +stackframe [file] display current stack frame [and write it to file] +u, up go up in the call stack +d, down go down in the call stack +bt, backtrace display the entire call stack +l, locals display available local variables +g, globals display available global variables +maxdisplay <integer> set the display's recursion maximum +""") + +proc InvalidCommand() = + debugOut("[Warning] invalid command ignored (type 'h' for help) ") + +proc hasExt(s: string): bool = + # returns true if s has a filename extension + for i in countdown(len(s)-1, 0): + if s[i] == '.': return true + return false + +proc setBreakPoint(s: string, start: int) = + var dbgTemp: string + var i = scanWord(s, dbgTemp, start) + if i <= start: + InvalidCommand() + return + if dbgBPlen >= high(dbgBP): + debugOut("[Warning] no breakpoint could be set; out of breakpoint space ") + return + var x = dbgBPlen + inc(dbgBPlen) + dbgBP[x].name = dbgTemp + i = scanNumber(s, dbgBP[x].low, i) + if dbgBP[x].low == 0: + # set to current line: + dbgBP[x].low = framePtr.line + i = scanNumber(s, dbgBP[x].high, i) + if dbgBP[x].high == 0: # set to low: + dbgBP[x].high = dbgBP[x].low + i = scanFilename(s, dbgTemp, i) + if not (dbgTemp.len == 0): + if not hasExt(dbgTemp): add(dbgTemp, ".nim") + dbgBP[x].filename = dbgTemp + else: # use current filename + dbgBP[x].filename = $framePtr.filename + # skip whitespace: + while s[i] in {' ', '\t'}: inc(i) + if s[i] != '\0': + dec(dbgBPLen) # remove buggy breakpoint + InvalidCommand() + +proc BreakpointSetEnabled(s: string, start, enabled: int) = + var dbgTemp: string + var i = scanWord(s, dbgTemp, start) + if i <= start: + InvalidCommand() + return + var x = findBreakpoint(dbgTemp) + if x < 0: debugOut("[Warning] breakpoint does not exist ") + elif enabled * dbgBP[x].low < 0: # signs are different? + dbgBP[x].low = -dbgBP[x].low + dbgBP[x].high = -dbgBP[x].high + +proc dbgEvaluate(stream: TFile, s: string, start: int, + currFrame: PExtendedFrame) = + var dbgTemp: string + var i = scanWord(s, dbgTemp, start) + while s[i] in {' ', '\t'}: inc(i) + var f = currFrame + if s[i] == '.': + inc(i) # skip '.' + add(dbgTemp, '.') + i = scanAndAppendWord(s, dbgTemp, i) + # search for global var: + f = addr(dbgGlobalData) + if s[i] != '\0': + debugOut("[Warning] could not parse expr ") + return + var j = findVariable(f, dbgTemp) + if j < 0: + debugOut("[Warning] could not find variable ") + return + writeVariable(stream, f.slots[j]) + +proc dbgOut(s: string, start: int, currFrame: PExtendedFrame) = + var dbgTemp: string + var i = scanFilename(s, dbgTemp, start) + if dbgTemp.len == 0: + InvalidCommand() + return + var stream = openAppend(dbgTemp) + if stream == nil: + debugOut("[Warning] could not open or create file ") + return + dbgEvaluate(stream, s, i, currFrame) + close(stream) + +proc dbgStackFrame(s: string, start: int, currFrame: PExtendedFrame) = + var dbgTemp: string + var i = scanFilename(s, dbgTemp, start) + if dbgTemp.len == 0: + # just write it to stdout: + ListFrame(stdout, currFrame) + else: + var stream = openAppend(dbgTemp) + if stream == nil: + debugOut("[Warning] could not open or create file ") + return + ListFrame(stream, currFrame) + close(stream) + +proc CommandPrompt() = + # if we return from this routine, user code executes again + var + again = True + dbgFramePtr = framePtr # for going down and up the stack + dbgDown = 0 # how often we did go down + + while again: + write(stdout, "*** endb| >>") + var tmp = readLine(stdin) + if tmp.len > 0: dbgUser = tmp + # now look what we have to do: + var dbgTemp: string + var i = scanWord(dbgUser, dbgTemp, 0) + case dbgTemp + of "": InvalidCommand() + of "s", "step": + dbgState = dbStepInto + again = false + of "n", "next": + dbgState = dbStepOver + dbgSkipToFrame = framePtr + again = false + of "f", "skipcurrent": + dbgState = dbSkipCurrent + dbgSkipToFrame = framePtr.prev + again = false + of "c", "continue": + dbgState = dbBreakpoints + again = false + of "i", "ignore": + dbgState = dbOff + again = false + of "h", "help": + dbgHelp() + of "q", "quit": + dbgState = dbQuiting + dbgAborting = True + again = false + quit(1) # BUGFIX: quit with error code > 0 + of "e", "eval": + dbgEvaluate(stdout, dbgUser, i, cast[PExtendedFrame](dbgFramePtr)) + of "o", "out": + dbgOut(dbgUser, i, cast[PExtendedFrame](dbgFramePtr)) + of "stackframe": + dbgStackFrame(dbgUser, i, cast[PExtendedFrame](dbgFramePtr)) + of "w", "where": + dbgShowExecutionPoint() + of "l", "locals": + ListVariables(stdout, cast[PExtendedFrame](dbgFramePtr)) + of "g", "globals": + ListVariables(stdout, addr(dbgGlobalData)) + of "u", "up": + if dbgDown <= 0: + debugOut("[Warning] cannot go up any further ") + else: + dbgFramePtr = framePtr + for j in 0 .. dbgDown-2: # BUGFIX + dbgFramePtr = dbgFramePtr.prev + dec(dbgDown) + dbgShowCurrentProc(dbgFramePtr) + of "d", "down": + if dbgFramePtr != nil: + inc(dbgDown) + dbgFramePtr = dbgFramePtr.prev + dbgShowCurrentProc(dbgFramePtr) + else: + debugOut("[Warning] cannot go down any further ") + of "bt", "backtrace": + WriteStackTrace() + of "b", "break": + setBreakPoint(dbgUser, i) + of "breakpoints": + ListBreakPoints() + of "disable": + BreakpointSetEnabled(dbgUser, i, -1) + of "enable": + BreakpointSetEnabled(dbgUser, i, +1) + of "maxdisplay": + var parsed: int + i = scanNumber(dbgUser, parsed, i) + if dbgUser[i-1] in {'0'..'9'}: + if parsed == 0: maxDisplayRecDepth = -1 + else: maxDisplayRecDepth = parsed + else: + InvalidCommand() + else: + InvalidCommand() + +proc endbStep() = + # we get into here if an unhandled exception has been raised + # XXX: do not allow the user to run the program any further? + # XXX: BUG: the frame is lost here! + dbgShowExecutionPoint() + CommandPrompt() + +proc checkForBreakpoint() = + var i = dbgBreakpointReached(framePtr.line) + if i >= 0: + write(stdout, "*** endb| reached ") + write(stdout, dbgBP[i].name) + write(stdout, " in ") + write(stdout, framePtr.filename) + write(stdout, "(") + write(stdout, framePtr.line) + write(stdout, ") ") + write(stdout, framePtr.procname) + write(stdout, " ***\n") + CommandPrompt() + +# interface to the user program: + +proc dbgRegisterBreakpoint(line: int, + filename, name: cstring) {.compilerproc.} = + var x = dbgBPlen + inc(dbgBPlen) + dbgBP[x].name = $name + dbgBP[x].filename = $filename + dbgBP[x].low = line + dbgBP[x].high = line + +proc dbgRegisterGlobal(name: cstring, address: pointer, + typ: PNimType) {.compilerproc.} = + var i = dbgGlobalData.f.len + if i >= high(dbgGlobalData.slots): + debugOut("[Warning] cannot register global ") + return + dbgGlobalData.slots[i].name = name + dbgGlobalData.slots[i].typ = typ + dbgGlobalData.slots[i].address = address + inc(dbgGlobalData.f.len) + +proc endb(line: int) {.compilerproc.} = + # This proc is called before every Nimrod code line! + # Thus, it must have as few parameters as possible to keep the + # code size small! + # Check if we are at an enabled breakpoint or "in the mood" + framePtr.line = line # this is done here for smaller code size! + if dbgLineHook != nil: dbgLineHook() + case dbgState + of dbStepInto: + # we really want the command prompt here: + dbgShowExecutionPoint() + CommandPrompt() + of dbSkipCurrent, dbStepOver: # skip current routine + if framePtr == dbgSkipToFrame: + dbgShowExecutionPoint() + CommandPrompt() + else: # breakpoints are wanted though (I guess) + checkForBreakpoint() + of dbBreakpoints: # debugger is only interested in breakpoints + checkForBreakpoint() + else: nil diff --git a/nimlib/system/dyncalls.nim b/nimlib/system/dyncalls.nim new file mode 100755 index 000000000..0946ee355 --- /dev/null +++ b/nimlib/system/dyncalls.nim @@ -0,0 +1,127 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# This file implements the ability to call native procs from libraries. +# It is not possible to do this in a platform independant way, unfortunately. +# However, the interface has been designed to take platform differences into +# account and been ported to all major platforms. + +type + TLibHandle = pointer # private type + TProcAddr = pointer # libary loading and loading of procs: + +const + NilLibHandle: TLibHandle = nil + +proc nimLoadLibrary(path: string): TLibHandle {.compilerproc.} +proc nimUnloadLibrary(lib: TLibHandle) {.compilerproc.} +proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr {.compilerproc.} + +proc nimLoadLibraryError(path: string) {.compilerproc, noinline.} = + raise newException(EInvalidLibrary, "could not load: " & path) + +# this code was inspired from Lua's source code: +# Lua - An Extensible Extension Language +# Tecgraf: Computer Graphics Technology Group, PUC-Rio, Brazil +# http://www.lua.org +# mailto:info@lua.org + +when defined(posix): + # + # ========================================================================= + # This is an implementation based on the dlfcn interface. + # The dlfcn interface is available in Linux, SunOS, Solaris, IRIX, FreeBSD, + # NetBSD, AIX 4.2, HPUX 11, and probably most other Unix flavors, at least + # as an emulation layer on top of native functions. + # ========================================================================= + # + + # c stuff: + var + RTLD_NOW {.importc: "RTLD_NOW", header: "<dlfcn.h>".}: int + + proc dlclose(lib: TLibHandle) {.importc, header: "<dlfcn.h>".} + proc dlopen(path: CString, mode: int): TLibHandle {. + importc, header: "<dlfcn.h>".} + proc dlsym(lib: TLibHandle, name: cstring): TProcAddr {. + importc, header: "<dlfcn.h>".} + + proc nimUnloadLibrary(lib: TLibHandle) = + dlclose(lib) + + proc nimLoadLibrary(path: string): TLibHandle = + result = dlopen(path, RTLD_NOW) + + proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr = + result = dlsym(lib, name) + if result == nil: nimLoadLibraryError($name) + +elif defined(windows) or defined(dos): + # + # ======================================================================= + # Native Windows Implementation + # ======================================================================= + # + type + THINSTANCE {.importc: "HINSTANCE".} = pointer + + proc FreeLibrary(lib: THINSTANCE) {.importc, header: "<windows.h>", stdcall.} + proc winLoadLibrary(path: cstring): THINSTANCE {. + importc: "LoadLibraryA", header: "<windows.h>", stdcall.} + proc GetProcAddress(lib: THINSTANCE, name: cstring): TProcAddr {. + importc: "GetProcAddress", header: "<windows.h>", stdcall.} + + proc nimUnloadLibrary(lib: TLibHandle) = + FreeLibrary(cast[THINSTANCE](lib)) + + proc nimLoadLibrary(path: string): TLibHandle = + result = cast[TLibHandle](winLoadLibrary(path)) + + proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr = + result = GetProcAddress(cast[THINSTANCE](lib), name) + if result == nil: nimLoadLibraryError($name) + +elif defined(mac): + # + # ======================================================================= + # Native Mac OS X / Darwin Implementation + # ======================================================================= + # + {.error: "no implementation for dyncalls yet".} + + proc nimUnloadLibrary(lib: TLibHandle) = + NSUnLinkModule(NSModule(lib), NSUNLINKMODULE_OPTION_RESET_LAZY_REFERENCES) + + var + dyld_present {.importc: "_dyld_present", header: "<dyld.h>".}: int + + proc nimLoadLibrary(path: string): TLibHandle = + var + img: NSObjectFileImage + ret: NSObjectFileImageReturnCode + modul: NSModule + # this would be a rare case, but prevents crashing if it happens + result = nil + if dyld_present != 0: + ret = NSCreateObjectFileImageFromFile(path, addr(img)) + if ret == NSObjectFileImageSuccess: + modul = NSLinkModule(img, path, NSLINKMODULE_OPTION_PRIVATE or + NSLINKMODULE_OPTION_RETURN_ON_ERROR) + NSDestroyObjectFileImage(img) + result = TLibHandle(modul) + + proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr = + var + nss: NSSymbol + nss = NSLookupSymbolInModule(NSModule(lib), name) + result = TProcAddr(NSAddressOfSymbol(nss)) + if result == nil: nimLoadLibraryError($name) + +else: + {.error: "no implementation for dyncalls".} diff --git a/nimlib/system/ecmasys.nim b/nimlib/system/ecmasys.nim new file mode 100755 index 000000000..c0d0a5fd6 --- /dev/null +++ b/nimlib/system/ecmasys.nim @@ -0,0 +1,531 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2008 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Stubs for the GC interface: + +proc GC_disable() = nil +proc GC_enable() = nil +proc GC_fullCollect() = nil +proc GC_setStrategy(strategy: TGC_Strategy) = nil +proc GC_enableMarkAndSweep() = nil +proc GC_disableMarkAndSweep() = nil +proc GC_getStatistics(): string = return "" + +proc getOccupiedMem(): int = return -1 +proc getFreeMem(): int = return -1 +proc getTotalMem(): int = return -1 + +proc alert(s: cstring) {.importc, nodecl.} + +type + PSafePoint = ptr TSafePoint + TSafePoint {.compilerproc, final.} = object + prev: PSafePoint # points to next safe point + exc: ref E_Base + + PCallFrame = ptr TCallFrame + TCallFrame {.importc, nodecl, final.} = object + prev: PCallFrame + procname: CString + line: int # current line number + filename: CString + +var + framePtr {.importc, nodecl, volatile.}: PCallFrame + excHandler {.importc, nodecl, volatile.}: PSafePoint = nil + # list of exception handlers + # a global variable for the root of all try blocks + +{.push stacktrace: off.} +proc nimBoolToStr(x: bool): string {.compilerproc.} = + if x: result = "true" + else: result = "false" + +proc nimCharToStr(x: char): string {.compilerproc.} = + result = newString(1) + result[0] = x + +proc getCurrentExceptionMsg(): string = + if excHandler != nil: return $excHandler.exc.msg + return "" + +proc auxWriteStackTrace(f: PCallFrame): string = + type + TTempFrame = tuple[procname: CString, line: int] + var + it = f + i = 0 + total = 0 + tempFrames: array [0..63, TTempFrame] + while it != nil and i <= high(tempFrames): + tempFrames[i].procname = it.procname + tempFrames[i].line = it.line + inc(i) + inc(total) + it = it.prev + while it != nil: + inc(total) + it = it.prev + result = "" + # if the buffer overflowed print '...': + if total != i: + add(result, "(") + add(result, $(total-i)) + add(result, " calls omitted) ...\n") + for j in countdown(i-1, 0): + add(result, tempFrames[j].procname) + if tempFrames[j].line > 0: + add(result, ", line: ") + add(result, $tempFrames[j].line) + add(result, "\n") + +proc rawWriteStackTrace(): string = + if framePtr == nil: + result = "No stack traceback available\n" + else: + result = "Traceback (most recent call last)\n"& auxWriteStackTrace(framePtr) + framePtr = nil + +proc raiseException(e: ref E_Base, ename: cstring) {.compilerproc, pure.} = + e.name = ename + if excHandler != nil: + excHandler.exc = e + else: + var buf = rawWriteStackTrace() + if e.msg != nil and e.msg[0] != '\0': + add(buf, "Error: unhandled exception: ") + add(buf, e.msg) + else: + add(buf, "Error: unhandled exception") + add(buf, " [") + add(buf, ename) + add(buf, "]\n") + alert(buf) + asm """throw `e`;""" + +proc reraiseException() = + if excHandler == nil: + raise newException(ENoExceptionToReraise, "no exception to reraise") + else: + asm """throw excHandler.exc;""" + +proc raiseOverflow {.exportc: "raiseOverflow", noreturn.} = + raise newException(EOverflow, "over- or underflow") + +proc raiseDivByZero {.exportc: "raiseDivByZero", noreturn.} = + raise newException(EDivByZero, "divison by zero") + +proc raiseRangeError() {.compilerproc, noreturn.} = + raise newException(EOutOfRange, "value out of range") + +proc raiseIndexError() {.compilerproc, noreturn.} = + raise newException(EInvalidIndex, "index out of bounds") + +proc raiseFieldError(f: string) {.compilerproc, noreturn.} = + raise newException(EInvalidField, f & " is not accessible") + + + +proc SetConstr() {.varargs, pure, compilerproc.} = + asm """ + var result = {}; + for (var i = 0; i < arguments.length; ++i) { + var x = arguments[i]; + if (typeof(x) == "object") { + for (var j = x[0]; j <= x[1]; ++j) { + result[j] = true; + } + } else { + result[x] = true; + } + } + return result; + """ + +proc cstrToNimstr(c: cstring): string {.pure, compilerproc.} = + asm """ + var result = []; + for (var i = 0; i < `c`.length; ++i) { + result[i] = `c`.charCodeAt(i); + } + result[result.length] = 0; // terminating zero + return result; + """ + +proc toEcmaStr(s: string): cstring {.pure, compilerproc.} = + asm """ + var len = `s`.length-1; + var result = new Array(len); + var fcc = String.fromCharCode; + for (var i = 0; i < len; ++i) { + result[i] = fcc(`s`[i]); + } + return result.join(""); + """ + +proc mnewString(len: int): string {.pure, compilerproc.} = + asm """ + var result = new Array(`len`+1); + result[0] = 0; + result[`len`] = 0; + return result; + """ + +proc SetCard(a: int): int {.compilerproc, pure.} = + # argument type is a fake + asm """ + var result = 0; + for (var elem in `a`) { ++result; } + return result; + """ + +proc SetEq(a, b: int): bool {.compilerproc, pure.} = + asm """ + for (var elem in `a`) { if (!`b`[elem]) return false; } + for (var elem in `b`) { if (!`a`[elem]) return false; } + return true; + """ + +proc SetLe(a, b: int): bool {.compilerproc, pure.} = + asm """ + for (var elem in `a`) { if (!`b`[elem]) return false; } + return true; + """ + +proc SetLt(a, b: int): bool {.compilerproc.} = + result = SetLe(a, b) and not SetEq(a, b) + +proc SetMul(a, b: int): int {.compilerproc, pure.} = + asm """ + var result = {}; + for (var elem in `a`) { + if (`b`[elem]) { result[elem] = true; } + } + return result; + """ + +proc SetPlus(a, b: int): int {.compilerproc, pure.} = + asm """ + var result = {}; + for (var elem in `a`) { result[elem] = true; } + for (var elem in `b`) { result[elem] = true; } + return result; + """ + +proc SetMinus(a, b: int): int {.compilerproc, pure.} = + asm """ + var result = {}; + for (var elem in `a`) { + if (!`b`[elem]) { result[elem] = true; } + } + return result; + """ + +proc cmpStrings(a, b: string): int {.pure, compilerProc.} = + asm """ + if (`a` == `b`) return 0; + if (!`a`) return -1; + if (!`b`) return 1; + for (var i = 0; i < `a`.length-1; ++i) { + var result = `a`[i] - `b`[i]; + if (result != 0) return result; + } + return 0; + """ + +proc cmp(x, y: string): int = return cmpStrings(x, y) + +proc eqStrings(a, b: string): bool {.pure, compilerProc.} = + asm """ + if (`a == `b`) return true; + if ((!`a`) || (!`b`)) return false; + var alen = `a`.length; + if (alen != `b`.length) return false; + for (var i = 0; i < alen; ++i) + if (`a`[i] != `b`[i]) return false; + return true; + """ + +type + TDocument {.importc.} = object of TObject + write: proc (text: cstring) + writeln: proc (text: cstring) + createAttribute: proc (identifier: cstring): ref TNode + createElement: proc (identifier: cstring): ref TNode + createTextNode: proc (identifier: cstring): ref TNode + getElementById: proc (id: cstring): ref TNode + getElementsByName: proc (name: cstring): seq[ref TNode] + getElementsByTagName: proc (name: cstring): seq[ref TNode] + + TNodeType* = enum + ElementNode = 1, + AttributeNode, + TextNode, + CDATANode, + EntityRefNode, + EntityNode, + ProcessingInstructionNode, + CommentNode, + DocumentNode, + DocumentTypeNode, + DocumentFragmentNode, + NotationNode + TNode* {.importc.} = object of TObject + attributes*: seq[ref TNode] + childNodes*: seq[ref TNode] + data*: cstring + firstChild*: ref TNode + lastChild*: ref TNode + nextSibling*: ref TNode + nodeName*: cstring + nodeType*: TNodeType + nodeValue*: cstring + parentNode*: ref TNode + previousSibling*: ref TNode + appendChild*: proc (child: ref TNode) + appendData*: proc (data: cstring) + cloneNode*: proc (copyContent: bool) + deleteData*: proc (start, len: int) + getAttribute*: proc (attr: cstring): cstring + getAttributeNode*: proc (attr: cstring): ref TNode + getElementsByTagName*: proc (): seq[ref TNode] + hasChildNodes*: proc (): bool + insertBefore*: proc (newNode, before: ref TNode) + insertData*: proc (position: int, data: cstring) + removeAttribute*: proc (attr: cstring) + removeAttributeNode*: proc (attr: ref TNode) + removeChild*: proc (child: ref TNode) + replaceChild*: proc (newNode, oldNode: ref TNode) + replaceData*: proc (start, len: int, text: cstring) + setAttribute*: proc (name, value: cstring) + setAttributeNode*: proc (attr: ref TNode) + +var + document {.importc, nodecl.}: ref TDocument + +proc ewriteln(x: cstring) = + var node = document.getElementsByTagName("body")[0] + if node != nil: + node.appendChild(document.createTextNode(x)) + node.appendChild(document.createElement("br")) + else: + raise newException(EInvalidValue, "<body> element does not exist yet!") + +proc echo*(x: int) = ewriteln($x) +proc echo*(x: float) = ewriteln($x) +proc echo*(x: bool) = ewriteln(if x: cstring("true") else: cstring("false")) +proc echo*(x: string) = ewriteln(x) +proc echo*(x: cstring) = ewriteln(x) + +proc echo[Ty](x: Ty) = + echo(x) + +proc echo[Ty](x: openArray[Ty]) = + for a in items(x): echo(a) + +# Arithmetic: +proc addInt(a, b: int): int {.pure, compilerproc.} = + asm """ + var result = `a` + `b`; + if (result > 2147483647 || result < -2147483648) raiseOverflow(); + return result; + """ + +proc subInt(a, b: int): int {.pure, compilerproc.} = + asm """ + var result = `a` - `b`; + if (result > 2147483647 || result < -2147483648) raiseOverflow(); + return result; + """ + +proc mulInt(a, b: int): int {.pure, compilerproc.} = + asm """ + var result = `a` * `b`; + if (result > 2147483647 || result < -2147483648) raiseOverflow(); + return result; + """ + +proc divInt(a, b: int): int {.pure, compilerproc.} = + asm """ + if (`b` == 0) raiseDivByZero(); + if (`b` == -1 && `a` == 2147483647) raiseOverflow(); + return Math.floor(`a` / `b`); + """ + +proc modInt(a, b: int): int {.pure, compilerproc.} = + asm """ + if (`b` == 0) raiseDivByZero(); + if (`b` == -1 && `a` == 2147483647) raiseOverflow(); + return Math.floor(`a` % `b`); + """ + + + +proc addInt64(a, b: int): int {.pure, compilerproc.} = + asm """ + var result = `a` + `b`; + if (result > 9223372036854775807 + || result < -9223372036854775808) raiseOverflow(); + return result; + """ + +proc subInt64(a, b: int): int {.pure, compilerproc.} = + asm """ + var result = `a` - `b`; + if (result > 9223372036854775807 + || result < -9223372036854775808) raiseOverflow(); + return result; + """ + +proc mulInt64(a, b: int): int {.pure, compilerproc.} = + asm """ + var result = `a` * `b`; + if (result > 9223372036854775807 + || result < -9223372036854775808) raiseOverflow(); + return result; + """ + +proc divInt64(a, b: int): int {.pure, compilerproc.} = + asm """ + if (`b` == 0) raiseDivByZero(); + if (`b` == -1 && `a` == 9223372036854775807) raiseOverflow(); + return Math.floor(`a` / `b`); + """ + +proc modInt64(a, b: int): int {.pure, compilerproc.} = + asm """ + if (`b` == 0) raiseDivByZero(); + if (`b` == -1 && `a` == 9223372036854775807) raiseOverflow(); + return Math.floor(`a` % `b`); + """ + +proc nimMin(a, b: int): int {.compilerproc.} = return if a <= b: a else: b +proc nimMax(a, b: int): int {.compilerproc.} = return if a >= b: a else: b + +proc internalAssert(file: cstring, line: int) {.pure, compilerproc.} = + var + e: ref EAssertionFailed + new(e) + asm """`e`.message = "[Assertion failure] file: "+`file`+", line: "+`line`""" + raise e + +include hti + +proc isFatPointer(ti: PNimType): bool = + # This has to be consistent with the code generator! + return ti.base.kind notin {tyRecord, tyRecordConstr, tyObject, + tyArray, tyArrayConstr, tyPureObject, tyTuple, + tyEmptySet, tyOpenArray, tySet, tyVar, tyRef, tyPtr} + +proc NimCopy(x: pointer, ti: PNimType): pointer {.compilerproc.} + +proc NimCopyAux(dest, src: Pointer, n: ptr TNimNode) {.exportc.} = + case n.kind + of nkNone: assert(false) + of nkSlot: + asm "`dest`[`n`.offset] = NimCopy(`src`[`n`.offset], `n`.typ);" + of nkList: + for i in 0..n.len-1: + NimCopyAux(dest, src, n.sons[i]) + of nkCase: + asm """ + `dest`[`n`.offset] = NimCopy(`src`[`n`.offset], `n`.typ); + for (var i = 0; i < `n`.sons.length; ++i) { + NimCopyAux(`dest`, `src`, `n`.sons[i][1]); + } + """ + +proc NimCopy(x: pointer, ti: PNimType): pointer = + case ti.kind + of tyPtr, tyRef, tyVar, tyNil: + if not isFatPointer(ti): + result = x + else: + asm """ + `result` = [null, 0]; + `result`[0] = `x`[0]; + `result`[1] = `x`[1]; + """ + of tyEmptySet, tySet: + asm """ + `result` = {}; + for (var key in `x`) { `result`[key] = `x`[key]; } + """ + of tyPureObject, tyTuple, tyObject: + if ti.base != nil: result = NimCopy(x, ti.base) + elif ti.kind == tyObject: + asm "`result` = {m_type: `ti`};" + else: + asm "`result` = {};" + NimCopyAux(result, x, ti.node) + of tySequence, tyArrayConstr, tyOpenArray, tyArray: + asm """ + `result` = new Array(`x`.length); + for (var i = 0; i < `x`.length; ++i) { + `result`[i] = NimCopy(`x`[i], `ti`.base); + } + """ + of tyString: + asm "`result` = `x`.slice(0);" + else: + result = x + + +proc ArrayConstr(len: int, value: pointer, typ: PNimType): pointer {. + pure, compilerproc.} = + # types are fake + asm """ + var result = new Array(`len`); + for (var i = 0; i < `len`; ++i) result[i] = NimCopy(`value`, `typ`); + return result; + """ + +proc chckIndx(i, a, b: int): int {.compilerproc.} = + if i >= a and i <= b: return i + else: raiseIndexError() + +proc chckRange(i, a, b: int): int {.compilerproc.} = + if i >= a and i <= b: return i + else: raiseRangeError() + +proc chckObj(obj, subclass: PNimType) {.compilerproc.} = + # checks if obj is of type subclass: + var x = obj + if x == subclass: return # optimized fast path + while x != subclass: + if x == nil: + raise newException(EInvalidObjectConversion, "invalid object conversion") + x = x.base + +{.pop.} + +#proc AddU($1, $2) +#SubU($1, $2) +#MulU($1, $2) +#DivU($1, $2) +#ModU($1, $2) +#AddU64($1, $2) +#SubU64($1, $2) +#MulU64($1, $2) +#DivU64($1, $2) +#ModU64($1, $2) +#LeU($1, $2) +#LtU($1, $2) +#LeU64($1, $2) +#LtU64($1, $2) +#Ze($1) +#Ze64($1) +#ToU8($1) +#ToU16($1) +#ToU32($1) + +#NegInt($1) +#NegInt64($1) +#AbsInt($1) +#AbsInt64($1) diff --git a/nimlib/system/excpt.nim b/nimlib/system/excpt.nim new file mode 100755 index 000000000..293491fe9 --- /dev/null +++ b/nimlib/system/excpt.nim @@ -0,0 +1,285 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + + +# Exception handling code. This is difficult because it has +# to work if there is no more memory. Thus we have to use +# a static string. Do not use ``sprintf``, etc. as they are +# unsafe! + +when not defined(windows) or not defined(guiapp): + proc writeToStdErr(msg: CString) = write(stdout, msg) + +else: + proc MessageBoxA(hWnd: cint, lpText, lpCaption: cstring, uType: int): int32 {. + header: "<windows.h>", nodecl.} + + proc writeToStdErr(msg: CString) = + discard MessageBoxA(0, msg, nil, 0) + +proc raiseException(e: ref E_Base, ename: CString) {.compilerproc.} +proc reraiseException() {.compilerproc.} + +proc registerSignalHandler() {.compilerproc.} + +proc chckIndx(i, a, b: int): int {.inline, compilerproc.} +proc chckRange(i, a, b: int): int {.inline, compilerproc.} +proc chckRangeF(x, a, b: float): float {.inline, compilerproc.} +proc chckNil(p: pointer) {.inline, compilerproc.} + +type + PSafePoint = ptr TSafePoint + TSafePoint {.compilerproc, final.} = object + prev: PSafePoint # points to next safe point ON THE STACK + exc: ref E_Base + status: int + context: C_JmpBuf + +var + excHandler {.compilerproc.}: PSafePoint = nil + # list of exception handlers + # a global variable for the root of all try blocks + +proc reraiseException() = + if excHandler == nil: + raise newException(ENoExceptionToReraise, "no exception to reraise") + else: + c_longjmp(excHandler.context, 1) + +type + PFrame = ptr TFrame + TFrame {.importc, nodecl, final.} = object + prev: PFrame + procname: CString + line: int # current line number + filename: CString + len: int # length of slots (when not debugging always zero) + +var + buf: string # cannot be allocated on the stack! + assertBuf: string # we need a different buffer for + # assert, as it raises an exception and + # exception handler needs the buffer too + + framePtr {.exportc.}: PFrame + + tempFrames: array [0..127, PFrame] # cannot be allocated on the stack! + + stackTraceNewLine* = "\n" ## undocumented feature + +proc auxWriteStackTrace(f: PFrame, s: var string) = + const + firstCalls = 32 + var + it = f + i = 0 + total = 0 + while it != nil and i <= high(tempFrames)-(firstCalls-1): + # the (-1) is for a nil entry that marks where the '...' should occur + tempFrames[i] = it + inc(i) + inc(total) + it = it.prev + var b = it + while it != nil: + inc(total) + it = it.prev + for j in 1..total-i-(firstCalls-1): + if b != nil: b = b.prev + if total != i: + tempFrames[i] = nil + inc(i) + while b != nil and i <= high(tempFrames): + tempFrames[i] = b + inc(i) + b = b.prev + for j in countdown(i-1, 0): + if tempFrames[j] == nil: + add(s, "(") + add(s, $(total-i-1)) + add(s, " calls omitted) ...") + else: + add(s, $tempFrames[j].procname) + if tempFrames[j].line > 0: + add(s, ", line: ") + add(s, $tempFrames[j].line) + add(s, stackTraceNewLine) + +proc rawWriteStackTrace(s: var string) = + if framePtr == nil: + add(s, "No stack traceback available") + add(s, stackTraceNewLine) + else: + add(s, "Traceback (most recent call last)") + add(s, stackTraceNewLine) + auxWriteStackTrace(framePtr, s) + +proc quitOrDebug() {.inline.} = + when not defined(endb): + quit(1) + else: + endbStep() # call the debugger + +proc raiseException(e: ref E_Base, ename: CString) = + GC_disable() # a bad thing is an error in the GC while raising an exception + e.name = ename + if excHandler != nil: + excHandler.exc = e + c_longjmp(excHandler.context, 1) + else: + if not isNil(buf): + setLen(buf, 0) + rawWriteStackTrace(buf) + if e.msg != nil and e.msg[0] != '\0': + add(buf, "Error: unhandled exception: ") + add(buf, $e.msg) + else: + add(buf, "Error: unhandled exception") + add(buf, " [") + add(buf, $ename) + add(buf, "]\n") + writeToStdErr(buf) + else: + writeToStdErr(ename) + quitOrDebug() + GC_enable() + +var + gAssertionFailed: ref EAssertionFailed + +proc internalAssert(file: cstring, line: int, cond: bool) {.compilerproc.} = + if not cond: + #c_fprintf(c_stdout, "Assertion failure: file %s line %ld\n", file, line) + #quit(1) + GC_disable() # BUGFIX: `$` allocates a new string object! + if not isNil(assertBuf): + # BUGFIX: when debugging the GC, assertBuf may be nil + setLen(assertBuf, 0) + add(assertBuf, "[Assertion failure] file: ") + add(assertBuf, file) + add(assertBuf, " line: ") + add(assertBuf, $line) + add(assertBuf, "\n") + gAssertionFailed.msg = assertBuf + GC_enable() + if gAssertionFailed != nil: + raise gAssertionFailed + else: + c_fprintf(c_stdout, "Assertion failure: file %s line %ld\n", file, line) + quit(1) + +proc WriteStackTrace() = + var s = "" + rawWriteStackTrace(s) + writeToStdErr(s) + +#proc stackTraceWrapper {.noconv.} = +# writeStackTrace() + +#addQuitProc(stackTraceWrapper) + +var + dbgAborting: bool # whether the debugger wants to abort + +proc signalHandler(sig: cint) {.exportc: "signalHandler", noconv.} = + # print stack trace and quit + var s = sig + GC_disable() + setLen(buf, 0) + rawWriteStackTrace(buf) + + if s == SIGINT: add(buf, "SIGINT: Interrupted by Ctrl-C.\n") + elif s == SIGSEGV: add(buf, "SIGSEGV: Illegal storage access.\n") + elif s == SIGABRT: + if dbgAborting: return # the debugger wants to abort + add(buf, "SIGABRT: Abnormal termination.\n") + elif s == SIGFPE: add(buf, "SIGFPE: Arithmetic error.\n") + elif s == SIGILL: add(buf, "SIGILL: Illegal operation.\n") + elif s == SIGBUS: add(buf, "SIGBUS: Illegal storage access.\n") + else: add(buf, "unknown signal\n") + writeToStdErr(buf) + dbgAborting = True # play safe here... + GC_enable() + quit(1) # always quit when SIGABRT + +proc registerSignalHandler() = + c_signal(SIGINT, signalHandler) + c_signal(SIGSEGV, signalHandler) + c_signal(SIGABRT, signalHandler) + c_signal(SIGFPE, signalHandler) + c_signal(SIGILL, signalHandler) + c_signal(SIGBUS, signalHandler) + +when not defined(noSignalHandler): + registerSignalHandler() # call it in initialization section +# for easier debugging of the GC, this memory is only allocated after the +# signal handlers have been registered +new(gAssertionFailed) +buf = newString(2048) +assertBuf = newString(2048) +setLen(buf, 0) +setLen(assertBuf, 0) + +proc raiseRangeError(val: biggestInt) {.compilerproc, noreturn, noinline.} = + raise newException(EOutOfRange, "value " & $val & " out of range") + +proc raiseIndexError() {.compilerproc, noreturn, noinline.} = + raise newException(EInvalidIndex, "index out of bounds") + +proc raiseFieldError(f: string) {.compilerproc, noreturn, noinline.} = + raise newException(EInvalidField, f & " is not accessible") + +proc chckIndx(i, a, b: int): int = + if i >= a and i <= b: + return i + else: + raiseIndexError() + +proc chckRange(i, a, b: int): int = + if i >= a and i <= b: + return i + else: + raiseRangeError(i) + +proc chckRange64(i, a, b: int64): int64 {.compilerproc.} = + if i >= a and i <= b: + return i + else: + raiseRangeError(i) + +proc chckRangeF(x, a, b: float): float = + if x >= a and x <= b: + return x + else: + raise newException(EOutOfRange, "value " & $x & " out of range") + +proc chckNil(p: pointer) = + if p == nil: c_raise(SIGSEGV) + +proc chckObj(obj, subclass: PNimType) {.compilerproc.} = + # checks if obj is of type subclass: + var x = obj + if x == subclass: return # optimized fast path + while x != subclass: + if x == nil: + raise newException(EInvalidObjectConversion, "invalid object conversion") + x = x.base + +proc chckObjAsgn(a, b: PNimType) {.compilerproc, inline.} = + if a != b: + raise newException(EInvalidObjectAssignment, "invalid object assignment") + +proc isObj(obj, subclass: PNimType): bool {.compilerproc.} = + # checks if obj is of type subclass: + var x = obj + if x == subclass: return true # optimized fast path + while x != subclass: + if x == nil: return false + x = x.base + return true diff --git a/nimlib/system/gc.nim b/nimlib/system/gc.nim new file mode 100755 index 000000000..da8f75768 --- /dev/null +++ b/nimlib/system/gc.nim @@ -0,0 +1,647 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + + +# Garbage Collector +# +# The basic algorithm is *Deferrent Reference Counting* with cycle detection. +# Special care has been taken to avoid recursion as far as possible to avoid +# stack overflows when traversing deep datastructures. This is comparable to +# an incremental and generational GC. It should be well-suited for soft real +# time applications (like games). +# +# Future Improvements: +# * Support for multi-threading. However, locks for the reference counting +# might turn out to be too slow. + +const + CycleIncrease = 2 # is a multiplicative increase + InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow + ZctThreshold = 256 # we collect garbage if the ZCT's size + # reaches this threshold + # this seems to be a good value + +const + rcIncrement = 0b1000 # so that lowest 3 bits are not touched + # NOTE: Most colors are currently unused + rcBlack = 0b000 # cell is colored black; in use or free + rcGray = 0b001 # possible member of a cycle + rcWhite = 0b010 # member of a garbage cycle + rcPurple = 0b011 # possible root of a cycle + rcZct = 0b100 # in ZCT + rcRed = 0b101 # Candidate cycle undergoing sigma-computation + rcOrange = 0b110 # Candidate cycle awaiting epoch boundary + rcShift = 3 # shift by rcShift to get the reference counter + colorMask = 0b111 +type + TWalkOp = enum + waZctDecRef, waPush, waCycleDecRef + + TFinalizer {.compilerproc.} = proc (self: pointer) + # A ref type can have a finalizer that is called before the object's + # storage is freed. + + TGcStat {.final, pure.} = object + stackScans: int # number of performed stack scans (for statistics) + cycleCollections: int # number of performed full collections + maxThreshold: int # max threshold that has been set + maxStackSize: int # max stack size + maxStackCells: int # max stack cells in ``decStack`` + cycleTableSize: int # max entries in cycle table + + TGcHeap {.final, pure.} = object # this contains the zero count and + # non-zero count table + zct: TCellSeq # the zero count table + decStack: TCellSeq # cells in the stack that are to decref again + cycleRoots: TCellSet + tempStack: TCellSeq # temporary stack for recursion elimination + stat: TGcStat + +var + stackBottom: pointer + gch: TGcHeap + cycleThreshold: int = InitialCycleThreshold + recGcLock: int = 0 + # we use a lock to prevend the garbage collector to be triggered in a + # finalizer; the collector should not call itself this way! Thus every + # object allocated by a finalizer will not trigger a garbage collection. + # This is wasteful but safe. This is a lock against recursive garbage + # collection, not a lock for threads! + +proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerproc.} + # unsureAsgnRef updates the reference counters only if dest is not on the + # stack. It is used by the code generator if it cannot decide wether a + # reference is in the stack or not (this can happen for var parameters). +#proc growObj(old: pointer, newsize: int): pointer {.compilerproc.} +proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} +proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} + +proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} = + if (c.refcount and rcZct) == 0: + c.refcount = c.refcount and not colorMask or rcZct + add(s, c) + +proc cellToUsr(cell: PCell): pointer {.inline.} = + # convert object (=pointer to refcount) to pointer to userdata + result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell))) + +proc usrToCell(usr: pointer): PCell {.inline.} = + # convert pointer to userdata to object (=pointer to refcount) + result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell))) + +proc canbeCycleRoot(c: PCell): bool {.inline.} = + result = ntfAcyclic notin c.typ.flags + +proc extGetCellType(c: pointer): PNimType {.compilerproc.} = + # used for code generation concerning debugging + result = usrToCell(c).typ + +proc internRefcount(p: pointer): int {.exportc: "getRefcount".} = + result = int(usrToCell(p).refcount) shr rcShift + +proc GC_disable() = inc(recGcLock) +proc GC_enable() = + if recGcLock > 0: dec(recGcLock) + +proc GC_setStrategy(strategy: TGC_Strategy) = + case strategy + of gcThroughput: nil + of gcResponsiveness: nil + of gcOptimizeSpace: nil + of gcOptimizeTime: nil + +proc GC_enableMarkAndSweep() = + cycleThreshold = InitialCycleThreshold + +proc GC_disableMarkAndSweep() = + cycleThreshold = high(cycleThreshold)-1 + # set to the max value to suppress the cycle detector + +# this that has to equals zero, otherwise we have to round up UnitsPerPage: +when BitsPerPage mod (sizeof(int)*8) != 0: + {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".} + +when debugGC: + proc writeCell(msg: CString, c: PCell) = + var kind = -1 + if c.typ != nil: kind = ord(c.typ.kind) + when debugGC: + c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n", + msg, c, kind, c.refcount shr rcShift, c.filename, c.line) + else: + c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n", + msg, c, kind, c.refcount shr rcShift) + +when traceGC: + # traceGC is a special switch to enable extensive debugging + type + TCellState = enum + csAllocated, csZctFreed, csCycFreed + var + states: array[TCellState, TCellSet] + + proc traceCell(c: PCell, state: TCellState) = + case state + of csAllocated: + if c in states[csAllocated]: + writeCell("attempt to alloc an already allocated cell", c) + assert(false) + excl(states[csCycFreed], c) + excl(states[csZctFreed], c) + of csZctFreed: + if c in states[csZctFreed]: + writeCell("attempt to free zct cell twice", c) + assert(false) + if c in states[csCycFreed]: + writeCell("attempt to free with zct, but already freed with cyc", c) + assert(false) + if c notin states[csAllocated]: + writeCell("attempt to free not an allocated cell", c) + assert(false) + excl(states[csAllocated], c) + of csCycFreed: + if c notin states[csAllocated]: + writeCell("attempt to free a not allocated cell", c) + assert(false) + if c in states[csCycFreed]: + writeCell("attempt to free cyc cell twice", c) + assert(false) + if c in states[csZctFreed]: + writeCell("attempt to free with cyc, but already freed with zct", c) + assert(false) + excl(states[csAllocated], c) + incl(states[state], c) + + proc writeLeakage() = + var z = 0 + var y = 0 + var e = 0 + for c in elements(states[csAllocated]): + inc(e) + if c in states[csZctFreed]: inc(z) + elif c in states[csCycFreed]: inc(z) + else: writeCell("leak", c) + cfprintf(cstdout, "Allocations: %ld; ZCT freed: %ld; CYC freed: %ld\n", + e, z, y) + +template gcTrace(cell, state: expr): stmt = + when traceGC: traceCell(cell, state) + +# ----------------------------------------------------------------------------- + +# forward declarations: +proc collectCT(gch: var TGcHeap) +proc IsOnStack(p: pointer): bool {.noinline.} +proc forAllChildren(cell: PCell, op: TWalkOp) +proc doOperation(p: pointer, op: TWalkOp) +proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) +# we need the prototype here for debugging purposes + +proc prepareDealloc(cell: PCell) = + if cell.typ.finalizer != nil: + # the finalizer could invoke something that + # allocates memory; this could trigger a garbage + # collection. Since we are already collecting we + # prevend recursive entering here by a lock. + # XXX: we should set the cell's children to nil! + inc(recGcLock) + (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell)) + dec(recGcLock) + +proc setStackBottom(theStackBottom: pointer) {.compilerproc.} = + stackBottom = theStackBottom + +proc PossibleRoot(gch: var TGcHeap, c: PCell) {.inline.} = + if canbeCycleRoot(c): incl(gch.cycleRoots, c) + +proc decRef(c: PCell) {.inline.} = + when stressGC: + if c.refcount <% rcIncrement: + writeCell("broken cell", c) + assert(c.refcount >=% rcIncrement) + c.refcount = c.refcount -% rcIncrement + if c.refcount <% rcIncrement: + addZCT(gch.zct, c) + elif canBeCycleRoot(c): + incl(gch.cycleRoots, c) + +proc incRef(c: PCell) {.inline.} = + c.refcount = c.refcount +% rcIncrement + if canBeCycleRoot(c): + incl(gch.cycleRoots, c) + +proc nimGCref(p: pointer) {.compilerproc, inline.} = incRef(usrToCell(p)) +proc nimGCunref(p: pointer) {.compilerproc, inline.} = decRef(usrToCell(p)) + +proc asgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} = + # the code generator calls this proc! + assert(not isOnStack(dest)) + # BUGFIX: first incRef then decRef! + if src != nil: incRef(usrToCell(src)) + if dest^ != nil: decRef(usrToCell(dest^)) + dest^ = src + +proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerproc, inline.} = + # the code generator calls this proc if it is known at compile time that no + # cycle is possible. + if src != nil: + var c = usrToCell(src) + c.refcount = c.refcount +% rcIncrement + if dest^ != nil: + var c = usrToCell(dest^) + c.refcount = c.refcount -% rcIncrement + if c.refcount <% rcIncrement: + addZCT(gch.zct, c) + dest^ = src + +proc unsureAsgnRef(dest: ppointer, src: pointer) = + if not IsOnStack(dest): + if src != nil: incRef(usrToCell(src)) + if dest^ != nil: decRef(usrToCell(dest^)) + dest^ = src + +proc initGC() = + when traceGC: + for i in low(TCellState)..high(TCellState): Init(states[i]) + gch.stat.stackScans = 0 + gch.stat.cycleCollections = 0 + gch.stat.maxThreshold = 0 + gch.stat.maxStackSize = 0 + gch.stat.maxStackCells = 0 + gch.stat.cycleTableSize = 0 + # init the rt + init(gch.zct) + init(gch.tempStack) + Init(gch.cycleRoots) + Init(gch.decStack) + new(gOutOfMem) # reserve space for the EOutOfMemory exception here! + +proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) = + var d = cast[TAddress](dest) + case n.kind + of nkNone: assert(false) + of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op) + of nkList: + for i in 0..n.len-1: forAllSlotsAux(dest, n.sons[i], op) + of nkCase: + var m = selectBranch(dest, n) + if m != nil: forAllSlotsAux(dest, m, op) + +proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) = + var d = cast[TAddress](dest) + if dest == nil: return # nothing to do + if ntfNoRefs notin mt.flags: + case mt.Kind + of tyArray, tyArrayConstr, tyOpenArray: + for i in 0..(mt.size div mt.base.size)-1: + forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op) + of tyRef, tyString, tySequence: # leaf: + doOperation(cast[ppointer](d)^, op) + of tyObject, tyTuple, tyPureObject: + forAllSlotsAux(dest, mt.node, op) + else: nil + +proc forAllChildren(cell: PCell, op: TWalkOp) = + assert(cell != nil) + assert(cell.typ != nil) + case cell.typ.Kind + of tyRef: # common case + forAllChildrenAux(cellToUsr(cell), cell.typ.base, op) + of tySequence: + var d = cast[TAddress](cellToUsr(cell)) + var s = cast[PGenericSeq](d) + if s != nil: + for i in 0..s.len-1: + forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +% + GenericSeqSize), cell.typ.base, op) + of tyString: nil + else: assert(false) + +proc checkCollection {.inline.} = + # checks if a collection should be done + if recGcLock == 0: + collectCT(gch) + +proc newObj(typ: PNimType, size: int): pointer = + # generates a new object and sets its reference counter to 0 + assert(typ.kind in {tyRef, tyString, tySequence}) + checkCollection() + var res = cast[PCell](rawAlloc(allocator, size + sizeof(TCell))) + zeroMem(res, size+sizeof(TCell)) + assert((cast[TAddress](res) and (MemAlign-1)) == 0) + # now it is buffered in the ZCT + res.typ = typ + when debugGC: + if framePtr != nil and framePtr.prev != nil: + res.filename = framePtr.prev.filename + res.line = framePtr.prev.line + res.refcount = rcZct # refcount is zero, but mark it to be in the ZCT + assert(isAllocatedPtr(allocator, res)) + # its refcount is zero, so add it to the ZCT: + block addToZCT: + # we check the last 8 entries (cache line) for a slot + # that could be reused + var L = gch.zct.len + var d = gch.zct.d + for i in countdown(L-1, max(0, L-8)): + var c = d[i] + if c.refcount >=% rcIncrement: + c.refcount = c.refcount and not colorMask + d[i] = res + break addToZCT + add(gch.zct, res) + when logGC: writeCell("new cell", res) + gcTrace(res, csAllocated) + result = cellToUsr(res) + +proc newSeq(typ: PNimType, len: int): pointer = + result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize)) + cast[PGenericSeq](result).len = len + cast[PGenericSeq](result).space = len + +proc growObj(old: pointer, newsize: int): pointer = + checkCollection() + var ol = usrToCell(old) + assert(ol.typ != nil) + assert(ol.typ.kind in {tyString, tySequence}) + var res = cast[PCell](rawAlloc(allocator, newsize + sizeof(TCell))) + var elemSize = 1 + if ol.typ.kind != tyString: + elemSize = ol.typ.base.size + + var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize + copyMem(res, ol, oldsize + sizeof(TCell)) + zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)), + newsize-oldsize) + assert((cast[TAddress](res) and (MemAlign-1)) == 0) + assert(res.refcount shr rcShift <=% 1) + #if res.refcount <% rcIncrement: + # add(gch.zct, res) + #else: # XXX: what to do here? + # decRef(ol) + if (ol.refcount and colorMask) == rcZct: + var j = gch.zct.len-1 + var d = gch.zct.d + while j >= 0: + if d[j] == ol: + d[j] = res + break + dec(j) + if canBeCycleRoot(ol): excl(gch.cycleRoots, ol) + when logGC: + writeCell("growObj old cell", ol) + writeCell("growObj new cell", res) + gcTrace(ol, csZctFreed) + gcTrace(res, csAllocated) + when reallyDealloc: rawDealloc(allocator, ol) + else: + assert(ol.typ != nil) + zeroMem(ol, sizeof(TCell)) + result = cellToUsr(res) + +# ---------------- cycle collector ------------------------------------------- + +proc doOperation(p: pointer, op: TWalkOp) = + if p == nil: return + var c: PCell = usrToCell(p) + assert(c != nil) + case op # faster than function pointers because of easy prediction + of waZctDecRef: + assert(c.refcount >=% rcIncrement) + c.refcount = c.refcount -% rcIncrement + when logGC: writeCell("decref (from doOperation)", c) + if c.refcount <% rcIncrement: addZCT(gch.zct, c) + of waPush: + add(gch.tempStack, c) + of waCycleDecRef: + assert(c.refcount >=% rcIncrement) + c.refcount = c.refcount -% rcIncrement + +# we now use a much simpler and non-recursive algorithm for cycle removal +proc collectCycles(gch: var TGcHeap) = + var tabSize = 0 + for c in elements(gch.cycleRoots): + inc(tabSize) + forallChildren(c, waCycleDecRef) + gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize) + + # restore reference counts (a depth-first traversal is needed): + var marker: TCellSet + Init(marker) + for c in elements(gch.cycleRoots): + if c.refcount >=% rcIncrement: + if not containsOrIncl(marker, c): + gch.tempStack.len = 0 + forAllChildren(c, waPush) + while gch.tempStack.len > 0: + dec(gch.tempStack.len) + var d = gch.tempStack.d[gch.tempStack.len] + d.refcount = d.refcount +% rcIncrement + if d in gch.cycleRoots and not containsOrIncl(marker, d): + forAllChildren(d, waPush) + # remove cycles: + for c in elements(gch.cycleRoots): + if c.refcount <% rcIncrement: + gch.tempStack.len = 0 + forAllChildren(c, waPush) + while gch.tempStack.len > 0: + dec(gch.tempStack.len) + var d = gch.tempStack.d[gch.tempStack.len] + if d.refcount <% rcIncrement: + if d notin gch.cycleRoots: # d is leaf of c and not part of cycle + addZCT(gch.zct, d) + when logGC: writeCell("add to ZCT (from cycle collector)", d) + prepareDealloc(c) + gcTrace(c, csCycFreed) + when logGC: writeCell("cycle collector dealloc cell", c) + when reallyDealloc: rawDealloc(allocator, c) + else: + assert(c.typ != nil) + zeroMem(c, sizeof(TCell)) + Deinit(gch.cycleRoots) + Init(gch.cycleRoots) + +proc gcMark(p: pointer) {.inline.} = + # the addresses are not as cells on the stack, so turn them to cells: + var cell = usrToCell(p) + var c = cast[TAddress](cell) + if c >% PageSize and (c and (MemAlign-1)) == 0: + # fast check: does it look like a cell? + if isAllocatedPtr(allocator, cell): + # mark the cell: + cell.refcount = cell.refcount +% rcIncrement + add(gch.decStack, cell) + +# ----------------- stack management -------------------------------------- +# inspired from Smart Eiffel + +proc stackSize(): int {.noinline.} = + var stackTop: array[0..1, pointer] + result = abs(cast[int](addr(stackTop[0])) - cast[int](stackBottom)) + +when defined(sparc): # For SPARC architecture. + proc isOnStack(p: pointer): bool = + var stackTop: array [0..1, pointer] + var b = cast[TAddress](stackBottom) + var a = cast[TAddress](addr(stackTop[0])) + var x = cast[TAddress](p) + result = x >=% a and x <=% b + + proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = + when defined(sparcv9): + asm """"flushw \n" """ + else: + asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """ + + var + max = stackBottom + sp: PPointer + stackTop: array[0..1, pointer] + sp = addr(stackTop[0]) + # Addresses decrease as the stack grows. + while sp <= max: + gcMark(sp^) + sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer)) + +elif defined(ELATE): + {.error: "stack marking code is to be written for this architecture".} + +elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or + defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820): + # --------------------------------------------------------------------------- + # Generic code for architectures where addresses increase as the stack grows. + # --------------------------------------------------------------------------- + proc isOnStack(p: pointer): bool = + var stackTop: array [0..1, pointer] + var a = cast[TAddress](stackBottom) + var b = cast[TAddress](addr(stackTop[0])) + var x = cast[TAddress](p) + result = x >=% a and x <=% b + + var + jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int + # a little hack to get the size of a TJmpBuf in the generated C code + # in a platform independant way + + proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = + var registers: C_JmpBuf + if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. + var max = cast[TAddress](stackBottom) + var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer) + # sp will traverse the JMP_BUF as well (jmp_buf size is added, + # otherwise sp would be below the registers structure). + while sp >=% max: + gcMark(cast[ppointer](sp)^) + sp = sp -% sizeof(pointer) + +else: + # --------------------------------------------------------------------------- + # Generic code for architectures where addresses decrease as the stack grows. + # --------------------------------------------------------------------------- + proc isOnStack(p: pointer): bool = + var stackTop: array [0..1, pointer] + var b = cast[TAddress](stackBottom) + var a = cast[TAddress](addr(stackTop[0])) + var x = cast[TAddress](p) + result = x >=% a and x <=% b + + proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = + # We use a jmp_buf buffer that is in the C stack. + # Used to traverse the stack and registers assuming + # that 'setjmp' will save registers in the C stack. + var registers: C_JmpBuf + if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. + var max = cast[TAddress](stackBottom) + var sp = cast[TAddress](addr(registers)) + while sp <=% max: + gcMark(cast[ppointer](sp)^) + sp = sp +% sizeof(pointer) + +# ---------------------------------------------------------------------------- +# end of non-portable code +# ---------------------------------------------------------------------------- + +proc CollectZCT(gch: var TGcHeap) = + # Note: Freeing may add child objects to the ZCT! So essentially we do + # deep freeing, which is bad for incremental operation. In order to + # avoid a deep stack, we move objects to keep the ZCT small. + # This is performance critical! + var L = addr(gch.zct.len) + while L^ > 0: + var c = gch.zct.d[0] + # remove from ZCT: + assert((c.refcount and colorMask) == rcZct) + c.refcount = c.refcount and not colorMask + gch.zct.d[0] = gch.zct.d[L^ - 1] + dec(L^) + if c.refcount <% rcIncrement: + # It may have a RC > 0, if it is in the hardware stack or + # it has not been removed yet from the ZCT. This is because + # ``incref`` does not bother to remove the cell from the ZCT + # as this might be too slow. + # In any case, it should be removed from the ZCT. But not + # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!** + if canBeCycleRoot(c): excl(gch.cycleRoots, c) + when logGC: writeCell("zct dealloc cell", c) + gcTrace(c, csZctFreed) + # We are about to free the object, call the finalizer BEFORE its + # children are deleted as well, because otherwise the finalizer may + # access invalid memory. This is done by prepareDealloc(): + prepareDealloc(c) + forAllChildren(c, waZctDecRef) + when reallyDealloc: rawDealloc(allocator, c) + else: + assert(c.typ != nil) + zeroMem(c, sizeof(TCell)) + +proc unmarkStackAndRegisters(gch: var TGcHeap) = + var d = gch.decStack.d + for i in 0..gch.decStack.len-1: + assert isAllocatedPtr(allocator, d[i]) + decRef(d[i]) # OPT: cannot create a cycle! + gch.decStack.len = 0 + +proc collectCT(gch: var TGcHeap) = + if gch.zct.len >= ZctThreshold or (cycleGC and + getOccupiedMem() >= cycleThreshold) or stressGC: + gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize()) + assert(gch.decStack.len == 0) + markStackAndRegisters(gch) + gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len) + inc(gch.stat.stackScans) + collectZCT(gch) + when cycleGC: + if getOccupiedMem() >= cycleThreshold or stressGC: + collectCycles(gch) + collectZCT(gch) + inc(gch.stat.cycleCollections) + cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * + cycleIncrease) + gch.stat.maxThreshold = max(gch.stat.maxThreshold, cycleThreshold) + unmarkStackAndRegisters(gch) + +proc GC_fullCollect() = + var oldThreshold = cycleThreshold + cycleThreshold = 0 # forces cycle collection + collectCT(gch) + cycleThreshold = oldThreshold + +proc GC_getStatistics(): string = + GC_disable() + result = "[GC] total memory: " & $(getTotalMem()) & "\n" & + "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" & + "[GC] stack scans: " & $gch.stat.stackScans & "\n" & + "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" & + "[GC] cycle collections: " & $gch.stat.cycleCollections & "\n" & + "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" & + "[GC] zct capacity: " & $gch.zct.cap & "\n" & + "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" & + "[GC] max stack size: " & $gch.stat.maxStackSize + when traceGC: writeLeakage() + GC_enable() diff --git a/nimlib/system/hti.nim b/nimlib/system/hti.nim new file mode 100755 index 000000000..3343000ae --- /dev/null +++ b/nimlib/system/hti.nim @@ -0,0 +1,58 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +type # This should be he same as ast.TTypeKind + # many enum fields are not used at runtime + TNimKind = enum + tyNone, tyBool, tyChar, + tyEmpty, tyArrayConstr, tyNil, tyExpr, tyStmt, tyTypeDesc, + tyGenericInvokation, # ``T[a, b]`` for types to invoke + tyGenericBody, # ``T[a, b, body]`` last parameter is the body + tyGenericInst, # ``T[a, b, realInstance]`` instantiated generic type + tyGenericParam, # ``a`` in the example + tyDistinct, # distinct type + tyEnum, + tyOrdinal, + tyArray, + tyObject, + tyTuple, + tySet, + tyRange, + tyPtr, tyRef, + tyVar, + tySequence, + tyProc, + tyPointer, tyOpenArray, + tyString, tyCString, tyForward, + tyInt, tyInt8, tyInt16, tyInt32, tyInt64, + tyFloat, tyFloat32, tyFloat64, tyFloat128, + tyPureObject # signals that object has no `n_type` field + + TNimNodeKind = enum nkNone, nkSlot, nkList, nkCase + TNimNode {.compilerproc, final.} = object + kind: TNimNodeKind + offset: int + typ: ptr TNimType + name: Cstring + len: int + sons: ptr array [0..0x7fff, ptr TNimNode] + + TNimTypeFlag = enum + ntfNoRefs = 0, # type contains no tyRef, tySequence, tyString + ntfAcyclic = 1 # type cannot form a cycle + TNimType {.compilerproc, final.} = object + size: int + kind: TNimKind + flags: set[TNimTypeFlag] + base: ptr TNimType + node: ptr TNimNode # valid for tyRecord, tyObject, tyTuple, tyEnum + finalizer: pointer # the finalizer for the type + PNimType = ptr TNimType + +# node.len may be the ``first`` element of a set diff --git a/nimlib/system/mm.nim b/nimlib/system/mm.nim new file mode 100755 index 000000000..76b5d83bd --- /dev/null +++ b/nimlib/system/mm.nim @@ -0,0 +1,189 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# Nimrod high-level memory manager: It supports Boehm's GC, no GC and the +# native Nimrod GC. The native Nimrod GC is the default. + +#{.push checks:on, assertions:on.} +{.push checks:off.} + +const + debugGC = false # we wish to debug the GC... + logGC = false + traceGC = false # extensive debugging + reallyDealloc = true # for debugging purposes this can be set to false + cycleGC = true # (de)activate the cycle GC + stressGC = false + reallyOsDealloc = true + coalescRight = true + coalescLeft = true + overwriteFree = false + +type + PPointer = ptr pointer + TByteArray = array[0..1000_0000, byte] + PByte = ptr TByteArray + PString = ptr string + +# Page size of the system; in most cases 4096 bytes. For exotic OS or +# CPU this needs to be changed: +const + PageShift = 12 + PageSize = 1 shl PageShift + PageMask = PageSize-1 + + MemAlign = 8 # also minimal allocatable memory block + + BitsPerPage = PageSize div MemAlign + UnitsPerPage = BitsPerPage div (sizeof(int)*8) + # how many ints do we need to describe a page: + # on 32 bit systems this is only 16 (!) + + TrunkShift = 9 + BitsPerTrunk = 1 shl TrunkShift # needs to be power of 2 and divisible by 64 + TrunkMask = BitsPerTrunk - 1 + IntsPerTrunk = BitsPerTrunk div (sizeof(int)*8) + IntShift = 5 + ord(sizeof(int) == 8) # 5 or 6, depending on int width + IntMask = 1 shl IntShift - 1 + +var + gOutOfMem: ref EOutOfMemory + +proc raiseOutOfMem() {.noreturn.} = + if gOutOfMem == nil: quit("out of memory; cannot even throw an exception") + gOutOfMem.msg = "out of memory" + raise gOutOfMem + +when defined(boehmgc): + when defined(windows): + const boehmLib = "boehmgc.dll" + else: + const boehmLib = "/usr/lib/libgc.so.1" + + proc boehmGC_disable {.importc: "GC_disable", dynlib: boehmLib.} + proc boehmGC_enable {.importc: "GC_enable", dynlib: boehmLib.} + proc boehmGCincremental {. + importc: "GC_enable_incremental", dynlib: boehmLib.} + proc boehmGCfullCollect {.importc: "GC_gcollect", dynlib: boehmLib.} + proc boehmAlloc(size: int): pointer {. + importc: "GC_malloc", dynlib: boehmLib.} + proc boehmAllocAtomic(size: int): pointer {. + importc: "GC_malloc_atomic", dynlib: boehmLib.} + proc boehmRealloc(p: pointer, size: int): pointer {. + importc: "GC_realloc", dynlib: boehmLib.} + proc boehmDealloc(p: pointer) {.importc: "GC_free", dynlib: boehmLib.} + + proc alloc(size: int): pointer = + result = boehmAlloc(size) + if result == nil: raiseOutOfMem() + proc alloc0(size: int): pointer = + result = alloc(size) + zeroMem(result, size) + proc realloc(p: Pointer, newsize: int): pointer = + result = boehmRealloc(p, newsize) + if result == nil: raiseOutOfMem() + proc dealloc(p: Pointer) = + boehmDealloc(p) + + proc initGC() = nil + + #boehmGCincremental() + + proc GC_disable() = boehmGC_disable() + proc GC_enable() = boehmGC_enable() + proc GC_fullCollect() = boehmGCfullCollect() + proc GC_setStrategy(strategy: TGC_Strategy) = nil + proc GC_enableMarkAndSweep() = nil + proc GC_disableMarkAndSweep() = nil + proc GC_getStatistics(): string = return "" + + proc getOccupiedMem(): int = return -1 + proc getFreeMem(): int = return -1 + proc getTotalMem(): int = return -1 + + proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} = + result = alloc(size) + proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} = + result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize)) + cast[PGenericSeq](result).len = len + cast[PGenericSeq](result).space = len + + proc growObj(old: pointer, newsize: int): pointer = + result = realloc(old, newsize) + + proc setStackBottom(theStackBottom: pointer) {.compilerproc.} = nil + proc nimGCref(p: pointer) {.compilerproc, inline.} = nil + proc nimGCunref(p: pointer) {.compilerproc, inline.} = nil + + proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} = + dest^ = src + proc asgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} = + dest^ = src + proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerproc, inline.} = + dest^ = src + + include "system/cellsets" +elif defined(nogc): + include "system/alloc" + + when false: + proc alloc(size: int): pointer = + result = c_malloc(size) + if result == nil: raiseOutOfMem() + proc alloc0(size: int): pointer = + result = alloc(size) + zeroMem(result, size) + proc realloc(p: Pointer, newsize: int): pointer = + result = c_realloc(p, newsize) + if result == nil: raiseOutOfMem() + proc dealloc(p: Pointer) = c_free(p) + proc getOccupiedMem(): int = return -1 + proc getFreeMem(): int = return -1 + proc getTotalMem(): int = return -1 + + proc initGC() = nil + proc GC_disable() = nil + proc GC_enable() = nil + proc GC_fullCollect() = nil + proc GC_setStrategy(strategy: TGC_Strategy) = nil + proc GC_enableMarkAndSweep() = nil + proc GC_disableMarkAndSweep() = nil + proc GC_getStatistics(): string = return "" + + + proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} = + result = alloc0(size) + proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} = + result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize)) + cast[PGenericSeq](result).len = len + cast[PGenericSeq](result).space = len + proc growObj(old: pointer, newsize: int): pointer = + result = realloc(old, newsize) + + proc setStackBottom(theStackBottom: pointer) {.compilerproc.} = nil + proc nimGCref(p: pointer) {.compilerproc, inline.} = nil + proc nimGCunref(p: pointer) {.compilerproc, inline.} = nil + + proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} = + dest^ = src + proc asgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} = + dest^ = src + proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerproc, inline.} = + dest^ = src + + include "system/cellsets" +else: + include "system/alloc" + include "system/cellsets" + assert(sizeof(TCell) == sizeof(TFreeCell)) + include "system/gc" + +{.pop.} + + diff --git a/nimlib/system/profiler.nim b/nimlib/system/profiler.nim new file mode 100755 index 000000000..b87b30d4a --- /dev/null +++ b/nimlib/system/profiler.nim @@ -0,0 +1,61 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2008 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# This file implements the Nimrod profiler. The profiler needs support by the +# code generator. + +type + TProfileData {.compilerproc, final.} = object + procname: cstring + total: float + +var + profileData {.compilerproc.}: array [0..64*1024-1, TProfileData] + +proc sortProfile(a: var array[0..64*1024-1, TProfileData], N: int) = + # we use shellsort here; fast enough and simple + var h = 1 + while true: + h = 3 * h + 1 + if h > N: break + while true: + h = h div 3 + for i in countup(h, N - 1): + var v = a[i] + var j = i + while a[j-h].total <= v.total: + a[j] = a[j-h] + j = j-h + if j < h: break + a[j] = v + if h == 1: break + +proc writeProfile() {.noconv.} = + const filename = "profile_results" + var i = 0 + var f: TFile + var j = 1 + while open(f, filename & $j & ".txt"): + close(f) + inc(j) + if open(f, filename & $j & ".txt", fmWrite): + var N = 0 + # we have to compute the actual length of the array: + while profileData[N].procname != nil: inc(N) + sortProfile(profileData, N) + writeln(f, "total running time of each proc" & + " (interpret these numbers relatively)") + while profileData[i].procname != nil: + write(f, profileData[i].procname) + write(f, ": ") + writeln(f, profileData[i].total) + inc(i) + close(f) + +addQuitProc(writeProfile) diff --git a/nimlib/system/repr.nim b/nimlib/system/repr.nim new file mode 100755 index 000000000..e340f1d7c --- /dev/null +++ b/nimlib/system/repr.nim @@ -0,0 +1,249 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# The generic ``repr`` procedure. It is an invaluable debugging tool. + +#proc cstrToNimStrDummy(s: cstring): string {.inline.} = +# result = cast[string](cstrToNimStr(s)) + +proc reprInt(x: int64): string {.compilerproc.} = return $x +proc reprFloat(x: float): string {.compilerproc.} = return $x + +proc reprPointer(x: pointer): string {.compilerproc.} = + var buf: array [0..59, char] + c_sprintf(buf, "%p", x) + return $buf + +proc reprStrAux(result: var string, s: string) = + if cast[pointer](s) == nil: + add result, "nil" + return + add result, reprPointer(cast[pointer](s)) & "\"" + for c in items(s): + case c + of '"': add result, "\\\"" + of '\\': add result, "\\\\" # BUGFIX: forgotten + of '\10': add result, "\\10\"\n\"" # " \n " # better readability + of '\128' .. '\255', '\0'..'\9', '\11'..'\31': + add result, "\\" & reprInt(ord(c)) + else: result.add(c) + add result, "\"" + +proc reprStr(s: string): string {.compilerproc.} = + result = "" + reprStrAux(result, s) + +proc reprBool(x: bool): string {.compilerproc.} = + if x: result = "true" + else: result = "false" + +proc reprChar(x: char): string {.compilerproc.} = + result = "\'" + case x + of '"': add result, "\\\"" + of '\\': add result, "\\\\" + of '\128' .. '\255', '\0'..'\31': add result, "\\" & reprInt(ord(x)) + else: add result, x + add result, "\'" + +proc reprEnum(e: int, typ: PNimType): string {.compilerproc.} = + if e <% typ.node.len: # BUGFIX + result = $typ.node.sons[e].name + else: + result = $e & " (invalid data!)" + +type + pbyteArray = ptr array[0.. 0xffff, byte] + +proc addSetElem(result: var string, elem: int, typ: PNimType) = + case typ.kind + of tyEnum: add result, reprEnum(elem, typ) + of tyBool: add result, reprBool(bool(elem)) + of tyChar: add result, reprChar(chr(elem)) + of tyRange: addSetElem(result, elem, typ.base) + of tyInt..tyInt64: add result, reprInt(elem) + else: # data corrupt --> inform the user + add result, " (invalid data!)" + +proc reprSetAux(result: var string, p: pointer, typ: PNimType) = + # "typ.slots.len" field is for sets the "first" field + var elemCounter = 0 # we need this flag for adding the comma at + # the right places + add result, "{" + var u: int64 + case typ.size + of 1: u = ze64(cast[ptr int8](p)^) + of 2: u = ze64(cast[ptr int16](p)^) + of 4: u = ze64(cast[ptr int32](p)^) + of 8: u = cast[ptr int64](p)^ + else: + var a = cast[pbyteArray](p) + for i in 0 .. typ.size*8-1: + if (ze(a[i div 8]) and (1 shl (i mod 8))) != 0: + if elemCounter > 0: add result, ", " + addSetElem(result, i+typ.node.len, typ.base) + inc(elemCounter) + if typ.size <= 8: + for i in 0..sizeof(int64)*8-1: + if (u and (1 shl i)) != 0: + if elemCounter > 0: add result, ", " + addSetElem(result, i+typ.node.len, typ.base) + inc(elemCounter) + add result, "}" + +proc reprSet(p: pointer, typ: PNimType): string {.compilerproc.} = + result = "" + reprSetAux(result, p, typ) + +type + TReprClosure {.final.} = object # we cannot use a global variable here + # as this wouldn't be thread-safe + marked: TCellSet + recdepth: int # do not recurse endless + indent: int # indentation + +proc initReprClosure(cl: var TReprClosure) = + Init(cl.marked) + cl.recdepth = -1 # default is to display everything! + cl.indent = 0 + +proc deinitReprClosure(cl: var TReprClosure) = + Deinit(cl.marked) + +proc reprBreak(result: var string, cl: TReprClosure) = + add result, "\n" + for i in 0..cl.indent-1: add result, ' ' + +proc reprAux(result: var string, p: pointer, typ: PNimType, + cl: var TReprClosure) + +proc reprArray(result: var string, p: pointer, typ: PNimType, + cl: var TReprClosure) = + add result, "[" + var bs = typ.base.size + for i in 0..typ.size div bs - 1: + if i > 0: add result, ", " + reprAux(result, cast[pointer](cast[TAddress](p) + i*bs), typ.base, cl) + add result, "]" + +proc reprSequence(result: var string, p: pointer, typ: PNimType, + cl: var TReprClosure) = + if p == nil: + add result, "nil" + return + result.add(reprPointer(p) & "[") + var bs = typ.base.size + for i in 0..cast[PGenericSeq](p).len-1: + if i > 0: add result, ", " + reprAux(result, cast[pointer](cast[TAddress](p) + GenericSeqSize + i*bs), + typ.Base, cl) + add result, "]" + +proc reprRecordAux(result: var string, p: pointer, n: ptr TNimNode, + cl: var TReprClosure) = + case n.kind + of nkNone: assert(false) + of nkSlot: + add result, $n.name + add result, " = " + reprAux(result, cast[pointer](cast[TAddress](p) + n.offset), n.typ, cl) + of nkList: + for i in 0..n.len-1: + if i > 0: add result, ",\n" + reprRecordAux(result, p, n.sons[i], cl) + of nkCase: + var m = selectBranch(p, n) + reprAux(result, cast[pointer](cast[TAddress](p) + n.offset), n.typ, cl) + if m != nil: reprRecordAux(result, p, m, cl) + +proc reprRecord(result: var string, p: pointer, typ: PNimType, + cl: var TReprClosure) = + add result, "[" + reprRecordAux(result, p, typ.node, cl) + add result, "]" + +proc reprRef(result: var string, p: pointer, typ: PNimType, + cl: var TReprClosure) = + # we know that p is not nil here: + when defined(boehmGC) or defined(nogc): + var cell = cast[PCell](p) + else: + var cell = usrToCell(p) + add result, "ref " & reprPointer(p) + if cell notin cl.marked: + # only the address is shown: + incl(cl.marked, cell) + add result, " --> " + reprAux(result, p, typ.base, cl) + +proc reprAux(result: var string, p: pointer, typ: PNimType, + cl: var TReprClosure) = + if cl.recdepth == 0: + add result, "..." + return + dec(cl.recdepth) + case typ.kind + of tySet: reprSetAux(result, p, typ) + of tyArray: reprArray(result, p, typ, cl) + of tyTuple, tyPureObject: reprRecord(result, p, typ, cl) + of tyObject: + var t = cast[ptr PNimType](p)^ + reprRecord(result, p, t, cl) + of tyRef, tyPtr: + assert(p != nil) + if cast[ppointer](p)^ == nil: add result, "nil" + else: reprRef(result, cast[ppointer](p)^, typ, cl) + of tySequence: + reprSequence(result, cast[ppointer](p)^, typ, cl) + of tyInt: add result, $(cast[ptr int](p)^) + of tyInt8: add result, $int(cast[ptr Int8](p)^) + of tyInt16: add result, $int(cast[ptr Int16](p)^) + of tyInt32: add result, $int(cast[ptr Int32](p)^) + of tyInt64: add result, $(cast[ptr Int64](p)^) + of tyFloat: add result, $(cast[ptr float](p)^) + of tyFloat32: add result, $(cast[ptr float32](p)^) + of tyFloat64: add result, $(cast[ptr float64](p)^) + of tyEnum: add result, reprEnum(cast[ptr int](p)^, typ) + of tyBool: add result, reprBool(cast[ptr bool](p)^) + of tyChar: add result, reprChar(cast[ptr char](p)^) + of tyString: reprStrAux(result, cast[ptr string](p)^) + of tyCString: reprStrAux(result, $(cast[ptr cstring](p)^)) + of tyRange: reprAux(result, p, typ.base, cl) + of tyProc, tyPointer: + if cast[ppointer](p)^ == nil: add result, "nil" + else: add result, reprPointer(cast[ppointer](p)^) + else: + add result, "(invalid data!)" + inc(cl.recdepth) + +proc reprOpenArray(p: pointer, length: int, elemtyp: PNimType): string {. + compilerproc.} = + var + cl: TReprClosure + initReprClosure(cl) + result = "[" + var bs = elemtyp.size + for i in 0..length - 1: + if i > 0: add result, ", " + reprAux(result, cast[pointer](cast[TAddress](p) + i*bs), elemtyp, cl) + add result, "]" + deinitReprClosure(cl) + +proc reprAny(p: pointer, typ: PNimType): string = + var + cl: TReprClosure + initReprClosure(cl) + result = "" + if typ.kind in {tyObject, tyPureObject, tyTuple, tyArray, tySet}: + reprAux(result, p, typ, cl) + else: + var p = p + reprAux(result, addr(p), typ, cl) + add result, "\n" + deinitReprClosure(cl) diff --git a/nimlib/system/sets.nim b/nimlib/system/sets.nim new file mode 100755 index 000000000..f9f3eb32b --- /dev/null +++ b/nimlib/system/sets.nim @@ -0,0 +1,28 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# set handling + +type + TNimSet = array [0..4*2048-1, int8] + +proc countBits32(n: int32): int {.compilerproc.} = + var v = n + v = v -% ((v shr 1'i32) and 0x55555555'i32) + v = (v and 0x33333333'i32) +% ((v shr 2'i32) and 0x33333333'i32) + result = ((v +% (v shr 4'i32) and 0xF0F0F0F'i32) *% 0x1010101'i32) shr 24'i32 + +proc countBits64(n: int64): int {.compilerproc.} = + result = countBits32(toU32(n and 0xffff'i64)) + + countBits32(toU32(n shr 16'i64)) + +proc cardSet(s: TNimSet, len: int): int {.compilerproc.} = + result = 0 + for i in countup(0, len-1): + inc(result, countBits32(int32(ze(s[i])))) diff --git a/nimlib/system/sysio.nim b/nimlib/system/sysio.nim new file mode 100755 index 000000000..8b6d0e285 --- /dev/null +++ b/nimlib/system/sysio.nim @@ -0,0 +1,184 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + + +## Nimrod's standard IO library. It contains high-performance +## routines for reading and writing data to (buffered) files or +## TTYs. + +{.push debugger:off .} # the user does not want to trace a part + # of the standard library! + + +proc fputs(c: cstring, f: TFile) {.importc: "fputs", noDecl.} +proc fgets(c: cstring, n: int, f: TFile): cstring {.importc: "fgets", noDecl.} +proc fgetc(stream: TFile): cint {.importc: "fgetc", nodecl.} +proc ungetc(c: cint, f: TFile) {.importc: "ungetc", nodecl.} +proc putc(c: Char, stream: TFile) {.importc: "putc", nodecl.} +proc fprintf(f: TFile, frmt: CString) {.importc: "fprintf", nodecl, varargs.} +proc strlen(c: cstring): int {.importc: "strlen", nodecl.} + +proc setvbuf(stream: TFile, buf: pointer, typ, size: cint): cint {. + importc, nodecl.} + +proc write(f: TFile, c: cstring) = fputs(c, f) + +var + IOFBF {.importc: "_IOFBF", nodecl.}: cint + IONBF {.importc: "_IONBF", nodecl.}: cint + +proc rawReadLine(f: TFile, result: var string) = + # of course this could be optimized a bit; but IO is slow anyway... + # and it was difficult to get this CORRECT with Ansi C's methods + setLen(result, 0) # reuse the buffer! + while True: + var c = fgetc(f) + if c < 0'i32: break # EOF + if c == 10'i32: break # LF + if c == 13'i32: # CR + c = fgetc(f) # is the next char LF? + if c != 10'i32: ungetc(c, f) # no, put the character back + break + add result, chr(int(c)) + +proc readLine(f: TFile): string = + result = "" + rawReadLine(f, result) + +proc write(f: TFile, s: string) = fputs(s, f) +proc write(f: TFile, i: int) = + when sizeof(int) == 8: + fprintf(f, "%lld", i) + else: + fprintf(f, "%ld", i) + +proc write(f: TFile, b: bool) = + if b: write(f, "true") + else: write(f, "false") +proc write(f: TFile, r: float) = fprintf(f, "%g", r) +proc write(f: TFile, c: Char) = putc(c, f) +proc write(f: TFile, a: openArray[string]) = + for x in items(a): write(f, x) + +#{.error: "for debugging.".} + +proc readFile(filename: string): string = + var f: TFile + try: + if open(f, filename): + var len = getFileSize(f) + if len < high(int): + result = newString(int(len)) + if readBuffer(f, addr(result[0]), int(len)) != len: + result = nil + close(f) + else: + result = nil + except EIO: + result = nil + +proc EndOfFile(f: TFile): bool = + # do not blame me; blame the ANSI C standard this is so brain-damaged + var c = fgetc(f) + ungetc(c, f) + return c == -1'i32 + +proc writeln[Ty](f: TFile, x: Ty) = + write(f, x) + write(f, "\n") + +proc writeln[Ty](f: TFile, x: openArray[Ty]) = + for i in items(x): write(f, i) + write(f, "\n") + +proc rawEcho(x: string) {.inline, compilerproc.} = write(stdout, x) +proc rawEchoNL() {.inline, compilerproc.} = write(stdout, "\n") + +# interface to the C procs: +proc fopen(filename, mode: CString): pointer {.importc: "fopen", noDecl.} + +const + FormatOpen: array [TFileMode, string] = ["rb", "wb", "w+b", "r+b", "ab"] + #"rt", "wt", "w+t", "r+t", "at" + # we always use binary here as for Nimrod the OS line ending + # should not be translated. + + +proc Open(f: var TFile, filename: string, + mode: TFileMode = fmRead, + bufSize: int = -1): Bool = + var + p: pointer + p = fopen(filename, FormatOpen[mode]) + result = (p != nil) + f = cast[TFile](p) + if bufSize > 0: + if setvbuf(f, nil, IOFBF, bufSize) != 0'i32: + raise newException(EOutOfMemory, "out of memory") + elif bufSize == 0: + discard setvbuf(f, nil, IONBF, 0) + +proc fdopen(filehandle: TFileHandle, mode: cstring): TFile {. + importc: pccHack & "fdopen", header: "<stdio.h>".} + +proc open(f: var TFile, filehandle: TFileHandle, mode: TFileMode): bool = + f = fdopen(filehandle, FormatOpen[mode]) + result = f != nil + +proc OpenFile(f: var TFile, filename: string, + mode: TFileMode = fmRead, + bufSize: int = -1): Bool = + result = open(f, filename, mode, bufSize) + +proc openFile(f: var TFile, filehandle: TFileHandle, mode: TFileMode): bool = + result = open(f, filehandle, mode) + +# C routine that is used here: +proc fread(buf: Pointer, size, n: int, f: TFile): int {. + importc: "fread", noDecl.} +proc fseek(f: TFile, offset: clong, whence: int): int {. + importc: "fseek", noDecl.} +proc ftell(f: TFile): int {.importc: "ftell", noDecl.} + +proc fwrite(buf: Pointer, size, n: int, f: TFile): int {. + importc: "fwrite", noDecl.} + +proc readBuffer(f: TFile, buffer: pointer, len: int): int = + result = fread(buffer, 1, len, f) + +proc ReadBytes(f: TFile, a: var openarray[byte], start, len: int): int = + result = readBuffer(f, addr(a[start]), len) + +proc ReadChars(f: TFile, a: var openarray[char], start, len: int): int = + result = readBuffer(f, addr(a[start]), len) + +proc writeBytes(f: TFile, a: openarray[byte], start, len: int): int = + var x = cast[ptr array[0..1000_000_000, byte]](a) + result = writeBuffer(f, addr(x[start]), len) +proc writeChars(f: TFile, a: openarray[char], start, len: int): int = + var x = cast[ptr array[0..1000_000_000, byte]](a) + result = writeBuffer(f, addr(x[start]), len) +proc writeBuffer(f: TFile, buffer: pointer, len: int): int = + result = fwrite(buffer, 1, len, f) + +proc setFilePos(f: TFile, pos: int64) = + if fseek(f, clong(pos), 0) != 0: + raise newException(EIO, "cannot set file position") + +proc getFilePos(f: TFile): int64 = + result = ftell(f) + if result < 0: raise newException(EIO, "cannot retrieve file position") + +proc getFileSize(f: TFile): int64 = + var oldPos = getFilePos(f) + discard fseek(f, 0, 2) # seek the end of the file + result = getFilePos(f) + setFilePos(f, oldPos) + +{.pop.} diff --git a/nimlib/system/sysstr.nim b/nimlib/system/sysstr.nim new file mode 100755 index 000000000..808941c06 --- /dev/null +++ b/nimlib/system/sysstr.nim @@ -0,0 +1,289 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# string & sequence handling procedures needed by the code generator + +# strings are dynamically resized, have a length field +# and are zero-terminated, so they can be casted to C +# strings easily +# we don't use refcounts because that's a behaviour +# the programmer may not want + +# implementation: + +proc resize(old: int): int {.inline.} = + if old <= 0: return 4 + elif old < 65536: return old * 2 + else: return old * 3 div 2 # for large arrays * 3/2 is better + +proc cmpStrings(a, b: NimString): int {.inline, compilerProc.} = + if a == b: return 0 + if a == nil: return -1 + if b == nil: return 1 + return c_strcmp(a.data, b.data) + +proc eqStrings(a, b: NimString): bool {.inline, compilerProc.} = + if a == b: return true + if a == nil or b == nil: return false + return a.len == b.len and + c_memcmp(a.data, b.data, a.len * sizeof(char)) == 0'i32 + +proc rawNewString(space: int): NimString {.compilerProc.} = + var s = space + if s < 8: s = 7 + result = cast[NimString](newObj(addr(strDesc), sizeof(TGenericSeq) + + (s+1) * sizeof(char))) + result.space = s + +proc mnewString(len: int): NimString {.exportc.} = + #c_fprintf(c_stdout, "[NEWSTRING] len: %ld\n", len) + result = rawNewString(len) + result.len = len + +proc toNimStr(str: CString, len: int): NimString {.compilerProc.} = + result = rawNewString(len) + result.len = len + c_memcpy(result.data, str, (len+1) * sizeof(Char)) + result.data[len] = '\0' # readline relies on this! + +proc cstrToNimstr(str: CString): NimString {.compilerProc.} = + return toNimstr(str, c_strlen(str)) + +proc copyString(src: NimString): NimString {.compilerProc.} = + if src == nil: return nil + result = rawNewString(src.space) + result.len = src.len + c_memcpy(result.data, src.data, (src.len + 1) * sizeof(Char)) + +proc hashString(s: string): int {.compilerproc.} = + # the compiler needs exactly the same hash function! + # this used to be used for efficient generation of string case statements + var h = 0 + for i in 0..Len(s)-1: + h = h +% Ord(s[i]) + h = h +% h shl 10 + h = h xor (h shr 6) + h = h +% h shl 3 + h = h xor (h shr 11) + h = h +% h shl 15 + result = h + +proc copyStrLast(s: NimString, start, last: int): NimString {.exportc.} = + var start = max(start, 0) + var len = min(last, s.len-1) - start + 1 + if len > 0: + result = rawNewString(len) + result.len = len + c_memcpy(result.data, addr(s.data[start]), len * sizeof(Char)) + result.data[len] = '\0' + else: + result = mnewString(0) + +proc copyStr(s: NimString, start: int): NimString {.exportc.} = + return copyStrLast(s, start, s.len-1) + +proc addChar(s: NimString, c: char): NimString {.compilerProc.} = + result = s + if result.len >= result.space: + result.space = resize(result.space) + result = cast[NimString](growObj(result, + sizeof(TGenericSeq) + (result.space+1) * sizeof(char))) + #var space = resize(result.space) + #result = rawNewString(space) + #copyMem(result, s, s.len * sizeof(char) + sizeof(TGenericSeq)) + #result.space = space + result.data[result.len] = c + result.data[result.len+1] = '\0' + inc(result.len) + +# These routines should be used like following: +# <Nimrod code> +# s &= "hallo " & name & " how do you feel?" +# +# <generated C code> +# { +# s = resizeString(s, 6 + name->len + 17); +# appendString(s, strLit1); +# appendString(s, strLit2); +# appendString(s, strLit3); +# } +# +# <Nimrod code> +# s = "hallo " & name & " how do you feel?" +# +# <generated C code> +# { +# string tmp0; +# tmp0 = rawNewString(6 + name->len + 17); +# appendString(s, strLit1); +# appendString(s, strLit2); +# appendString(s, strLit3); +# s = tmp0; +# } +# +# <Nimrod code> +# s = "" +# +# <generated C code> +# s = rawNewString(0); + +proc resizeString(dest: NimString, addlen: int): NimString {.compilerproc.} = + if dest.len + addLen + 1 <= dest.space: # BUGFIX: this is horrible! + result = dest + else: # slow path: + var sp = max(resize(dest.space), dest.len + addLen + 1) + result = cast[NimString](growObj(dest, sizeof(TGenericSeq) + + (sp+1) * sizeof(Char))) + result.space = sp + #result = rawNewString(sp) + #copyMem(result, dest, dest.len * sizeof(char) + sizeof(TGenericSeq)) + # DO NOT UPDATE LEN YET: dest.len = newLen + +proc appendString(dest, src: NimString) {.compilerproc, inline.} = + c_memcpy(addr(dest.data[dest.len]), src.data, (src.len + 1) * sizeof(Char)) + inc(dest.len, src.len) + +proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} = + dest.data[dest.len] = c + dest.data[dest.len+1] = '\0' + inc(dest.len) + +proc setLengthStr(s: NimString, newLen: int): NimString {.compilerProc.} = + var n = max(newLen, 0) + if n <= s.space: + result = s + else: + result = resizeString(s, n) + result.len = n + result.data[n] = '\0' + +# ----------------- sequences ---------------------------------------------- + +proc incrSeq(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} = + # increments the length by one: + # this is needed for supporting ``add``; + # + # add(seq, x) generates: + # seq = incrSeq(seq, sizeof(x)); + # seq[seq->len-1] = x; + when false: + # broken version: + result = seq + if result.len >= result.space: + var s = resize(result.space) + result = cast[PGenericSeq](newSeq(extGetCellType(seq), s)) + genericSeqAssign(result, seq, XXX) + #copyMem(result, seq, seq.len * elemSize + GenericSeqSize) + inc(result.len) + else: + result = seq + if result.len >= result.space: + result.space = resize(result.space) + result = cast[PGenericSeq](growObj(result, elemSize * result.space + + GenericSeqSize)) + # set new elements to zero: + #var s = cast[TAddress](result) + #zeroMem(cast[pointer](s + GenericSeqSize + (result.len * elemSize)), + # (result.space - result.len) * elemSize) + # for i in len .. space-1: + # seq->data[i] = 0 + inc(result.len) + +proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {. + compilerProc.} = + when false: + # broken version: + result = seq + if result.space < newLen: + var s = max(resize(result.space), newLen) + result = cast[PGenericSeq](newSeq(extGetCellType(seq), s)) + result.len = newLen + else: + result = seq + if result.space < newLen: + result.space = max(resize(result.space), newLen) + result = cast[PGenericSeq](growObj(result, elemSize * result.space + + GenericSeqSize)) + elif newLen < result.len: + # we need to decref here, otherwise the GC leaks! + when not defined(boehmGC) and not defined(nogc): + for i in newLen..result.len-1: + forAllChildrenAux(cast[pointer](cast[TAddress](result) +% + GenericSeqSize +% (i*%elemSize)), + extGetCellType(result).base, waZctDecRef) + # and set the memory to nil: + zeroMem(cast[pointer](cast[TAddress](result) +% GenericSeqSize +% + (newLen*%elemSize)), (result.len-%newLen) *% elemSize) + result.len = newLen + +# --------------- other string routines ---------------------------------- +proc nimIntToStr(x: int): string {.compilerproc.} = + result = newString(sizeof(x)*4) + var i = 0 + var y = x + while True: + var d = y div 10 + result[i] = chr(abs(int(y - d*10)) + ord('0')) + inc(i) + y = d + if y == 0: break + if x < 0: + result[i] = '-' + inc(i) + setLen(result, i) + # mirror the string: + for j in 0..i div 2 - 1: + swap(result[j], result[i-j-1]) + +proc nimFloatToStr(x: float): string {.compilerproc.} = + var buf: array [0..59, char] + c_sprintf(buf, "%#g", x) + return $buf + +proc nimInt64ToStr(x: int64): string {.compilerproc.} = + # we don't rely on C's runtime here as some C compiler's + # int64 support is weak + result = newString(sizeof(x)*4) + var i = 0 + var y = x + while True: + var d = y div 10 + result[i] = chr(abs(int(y - d*10)) + ord('0')) + inc(i) + y = d + if y == 0: break + if x < 0: + result[i] = '-' + inc(i) + setLen(result, i) + # mirror the string: + for j in 0..i div 2 - 1: + swap(result[j], result[i-j-1]) + +proc nimBoolToStr(x: bool): string {.compilerproc.} = + return if x: "true" else: "false" + +proc nimCharToStr(x: char): string {.compilerproc.} = + result = newString(1) + result[0] = x + +proc binaryStrSearch(x: openarray[string], y: string): int {.compilerproc.} = + var + a = 0 + b = len(x) + while a < b: + var mid = (a + b) div 2 + if x[mid] < y: + a = mid + 1 + else: + b = mid + if (a < len(x)) and (x[a] == y): + return a + else: + return -1 diff --git a/nimlib/windows/winlean.nim b/nimlib/windows/winlean.nim new file mode 100755 index 000000000..40e6e7b11 --- /dev/null +++ b/nimlib/windows/winlean.nim @@ -0,0 +1,192 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a small wrapper for some needed Win API procedures, +## so that the Nimrod compiler does not depend on the huge Windows module. + +type + THandle* = int + WINBOOL* = int32 + + TSECURITY_ATTRIBUTES* {.final, pure.} = object + nLength*: int32 + lpSecurityDescriptor*: pointer + bInheritHandle*: WINBOOL + + TSTARTUPINFO* {.final, pure.} = object + cb*: int32 + lpReserved*: cstring + lpDesktop*: cstring + lpTitle*: cstring + dwX*: int32 + dwY*: int32 + dwXSize*: int32 + dwYSize*: int32 + dwXCountChars*: int32 + dwYCountChars*: int32 + dwFillAttribute*: int32 + dwFlags*: int32 + wShowWindow*: int16 + cbReserved2*: int16 + lpReserved2*: pointer + hStdInput*: THANDLE + hStdOutput*: THANDLE + hStdError*: THANDLE + + TPROCESS_INFORMATION* {.final, pure.} = object + hProcess*: THANDLE + hThread*: THANDLE + dwProcessId*: int32 + dwThreadId*: int32 + +const + STARTF_USESHOWWINDOW* = 1'i32 + STARTF_USESTDHANDLES* = 256'i32 + HIGH_PRIORITY_CLASS* = 128'i32 + IDLE_PRIORITY_CLASS* = 64'i32 + NORMAL_PRIORITY_CLASS* = 32'i32 + REALTIME_PRIORITY_CLASS* = 256'i32 + WAIT_TIMEOUT* = 0x00000102'i32 + INFINITE* = -1'i32 + + STD_INPUT_HANDLE* = -10'i32 + STD_OUTPUT_HANDLE* = -11'i32 + STD_ERROR_HANDLE* = -12'i32 + + DETACHED_PROCESS* = 8'i32 + +proc CloseHandle*(hObject: THANDLE): WINBOOL {.stdcall, dynlib: "kernel32", + importc: "CloseHandle".} + +proc ReadFile*(hFile: THandle, Buffer: pointer, nNumberOfBytesToRead: int32, + lpNumberOfBytesRead: var int32, lpOverlapped: pointer): WINBOOL{. + stdcall, dynlib: "kernel32", importc: "ReadFile".} + +proc WriteFile*(hFile: THandle, Buffer: pointer, nNumberOfBytesToWrite: int32, + lpNumberOfBytesWritten: var int32, + lpOverlapped: pointer): WINBOOL{. + stdcall, dynlib: "kernel32", importc: "WriteFile".} + +proc CreatePipe*(hReadPipe, hWritePipe: var THandle, + lpPipeAttributes: var TSECURITY_ATTRIBUTES, + nSize: int32): WINBOOL{. + stdcall, dynlib: "kernel32", importc: "CreatePipe".} + +proc CreateProcess*(lpApplicationName, lpCommandLine: cstring, + lpProcessAttributes: ptr TSECURITY_ATTRIBUTES, + lpThreadAttributes: ptr TSECURITY_ATTRIBUTES, + bInheritHandles: WINBOOL, dwCreationFlags: int32, + lpEnvironment: pointer, lpCurrentDirectory: cstring, + lpStartupInfo: var TSTARTUPINFO, + lpProcessInformation: var TPROCESS_INFORMATION): WINBOOL{. + stdcall, dynlib: "kernel32", importc: "CreateProcessA".} + +proc SuspendThread*(hThread: THANDLE): int32 {.stdcall, dynlib: "kernel32", + importc: "SuspendThread".} +proc ResumeThread*(hThread: THANDLE): int32 {.stdcall, dynlib: "kernel32", + importc: "ResumeThread".} + +proc WaitForSingleObject*(hHandle: THANDLE, dwMilliseconds: int32): int32 {. + stdcall, dynlib: "kernel32", importc: "WaitForSingleObject".} + +proc TerminateProcess*(hProcess: THANDLE, uExitCode: int): WINBOOL {.stdcall, + dynlib: "kernel32", importc: "TerminateProcess".} + +proc GetExitCodeProcess*(hProcess: THANDLE, lpExitCode: var int32): WINBOOL {. + stdcall, dynlib: "kernel32", importc: "GetExitCodeProcess".} + +proc GetStdHandle*(nStdHandle: int32): THANDLE {.stdcall, dynlib: "kernel32", + importc: "GetStdHandle".} +proc SetStdHandle*(nStdHandle: int32, hHandle: THANDLE): WINBOOL {.stdcall, + dynlib: "kernel32", importc: "SetStdHandle".} +proc FlushFileBuffers*(hFile: THANDLE): WINBOOL {.stdcall, dynlib: "kernel32", + importc: "FlushFileBuffers".} + +proc GetLastError*(): int32 {.importc, stdcall, dynlib: "kernel32".} +proc FormatMessageA*(dwFlags: int32, lpSource: pointer, + dwMessageId, dwLanguageId: int32, + lpBuffer: pointer, nSize: int32, + Arguments: pointer): int32 {. + importc, stdcall, dynlib: "kernel32".} +proc LocalFree*(p: pointer) {.importc, stdcall, dynlib: "kernel32".} + +proc GetCurrentDirectoryA*(nBufferLength: int32, lpBuffer: cstring): int32 {. + importc, dynlib: "kernel32", stdcall.} +proc SetCurrentDirectoryA*(lpPathName: cstring): int32 {. + importc, dynlib: "kernel32", stdcall.} +proc CreateDirectoryA*(pathName: cstring, security: Pointer): int32 {. + importc: "CreateDirectoryA", dynlib: "kernel32", stdcall.} +proc RemoveDirectoryA*(lpPathName: cstring): int32 {. + importc, dynlib: "kernel32", stdcall.} +proc SetEnvironmentVariableA*(lpName, lpValue: cstring): int32 {. + stdcall, dynlib: "kernel32", importc.} + +proc GetModuleFileNameA*(handle: THandle, buf: CString, size: int32): int32 {. + importc, dynlib: "kernel32", stdcall.} + +const + FILE_ATTRIBUTE_ARCHIVE* = 32'i32 + FILE_ATTRIBUTE_COMPRESSED* = 2048'i32 + FILE_ATTRIBUTE_NORMAL* = 128'i32 + FILE_ATTRIBUTE_DIRECTORY* = 16'i32 + FILE_ATTRIBUTE_HIDDEN* = 2'i32 + FILE_ATTRIBUTE_READONLY* = 1'i32 + FILE_ATTRIBUTE_SYSTEM* = 4'i32 + + MAX_PATH* = 260 +type + FILETIME* {.final, pure.} = object ## CANNOT BE int64 BECAUSE OF ALIGNMENT + dwLowDateTime*: int32 + dwHighDateTime*: int32 + TWIN32_FIND_DATA* {.pure.} = object + dwFileAttributes*: int32 + ftCreationTime*: FILETIME + ftLastAccessTime*: FILETIME + ftLastWriteTime*: FILETIME + nFileSizeHigh*: int32 + nFileSizeLow*: int32 + dwReserved0: int32 + dwReserved1: int32 + cFileName*: array[0..(MAX_PATH) - 1, char] + cAlternateFileName*: array[0..13, char] +proc FindFirstFileA*(lpFileName: cstring, + lpFindFileData: var TWIN32_FIND_DATA): THANDLE {. + stdcall, dynlib: "kernel32", importc: "FindFirstFileA".} +proc FindNextFileA*(hFindFile: THANDLE, + lpFindFileData: var TWIN32_FIND_DATA): int32 {. + stdcall, dynlib: "kernel32", importc: "FindNextFileA".} +proc FindClose*(hFindFile: THANDLE) {.stdcall, dynlib: "kernel32", + importc: "FindClose".} + +proc GetFullPathNameA*(lpFileName: cstring, nBufferLength: int32, + lpBuffer: cstring, lpFilePart: var cstring): int32 {. + stdcall, dynlib: "kernel32", importc.} +proc GetFileAttributesA*(lpFileName: cstring): int32 {. + stdcall, dynlib: "kernel32", importc.} +proc SetFileAttributesA*(lpFileName: cstring, dwFileAttributes: int32): WINBOOL {. + stdcall, dynlib: "kernel32", importc: "SetFileAttributesA".} + +proc CopyFileA*(lpExistingFileName, lpNewFileName: CString, + bFailIfExists: cint): cint {. + importc, stdcall, dynlib: "kernel32".} + +proc GetEnvironmentStringsA*(): cstring {. + stdcall, dynlib: "kernel32", importc.} +proc FreeEnvironmentStringsA*(para1: cstring): int32 {. + stdcall, dynlib: "kernel32", importc.} + +proc GetCommandLineA*(): CString {.importc, stdcall, dynlib: "kernel32".} + +proc rdFileTime*(f: FILETIME): int64 = + result = ze64(f.dwLowDateTime) or (ze64(f.dwHighDateTime) shl 32) + +proc Sleep*(dwMilliseconds: int32){.stdcall, dynlib: "kernel32", + importc: "Sleep".} + + |