module std+, utf8 { private import rt::c; var utf8table() int = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 }; // See Copyright Notice at the end of this file. func utf8decode(o. char, val. int = null) .char { define { MAXUNICODE = 0x10ffff; } static limits() uint = { 0xff, 0x7f, 0x7ff, 0xffff }; var s. char = o; var c uint = s[0]; var res uint = 0; if c < 0x80 { res = c; else var count int = 0; while c & 0x40 { var cc int = s[++count]; if (cc & 0xc0) ~= 0x80; return null; res = (res << 6) | (cc & 0x3f); c <<= 1; } res |= ((c & 0x7f) << (count * 5)); if count > 3 or res > MAXUNICODE or res <= limits[count]; return null; s += count; } if val ~= null; .val = res; return s + 1; } func utf8len_s(s. char, size int, tail. int = null) int { func = 0; if tail ~= null; .tail = 0; forvar pos int = 0; pos < size; func++ { var s1. char = utf8decode(s + pos); if s1 == null; return -1; if tail ~= null; .tail = s1 - (s + pos); pos = s1 - s; } } inline utf8chk(s. char, size int) bool { return utf8len_s(s, size) ~= -1; } func utf8len(s. char, tail. int = null) int { if tail ~= null; .tail = 0; for func = 0;; func++ { var val int; var s1. char = utf8decode(s, &val); if s1 == null; return -1; if !val; return; if tail ~= null; .tail = s1 - s; s = s1; } } func utf8size(s. char, tail. int = null) int { var o. char = s; if tail ~= null; .tail = 0; for ;; { var val int; var s1. char = utf8decode(s, &val); if s1 == null; return -1; if !val; return (s1 - 1) - o; if tail ~= null; .tail = s1 - s; s = s1; } } func utf8at(s. char, idx int, c. uint32) int { forvar i int = 0;; i++ { var val int; var s1. char = utf8decode(s, &val); if s1 == null; return -1; if i >= idx { .c = val; return s1 - s; } s = s1; } } func utf8cmp(s1. char, s2. char) bool { var ss1 int = utf8size(s1); var ss2 int = utf8size(s2); if ss1 ~= -1 and ss1 == ss2 and !memcmp(s1, s2, ss1); return true; return false; } func utf8str(s. char, sub. char) .char { var s1 int = utf8size(s), s2 int = utf8size(sub); if s1 == -1 or s2 == -1; return null; return memmem(s, s1, sub, s2); } } /****************************************************************************** * Copyright (C) 1994-2018 Lua.org, PUC-Rio. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ******************************************************************************/