Server : Apache/2.4.43 (Win64) OpenSSL/1.1.1g PHP/7.4.6 System : Windows NT USER-PC 6.1 build 7601 (Windows 7 Professional Edition Service Pack 1) AMD64 User : User ( 0) PHP Version : 7.4.6 Disable Function : NONE Directory : C:/xampp/phpMyAdmin/vendor/phpmyadmin/sql-parser/src/ |
<?php /** * Implementation for UTF-8 strings. * * The subscript operator in PHP, when used with string will return a byte * and not a character. Because in UTF-8 strings a character may occupy more * than one byte, the subscript operator may return an invalid character. * * Because the lexer relies on the subscript operator this class had to be * implemented. */ declare(strict_types=1); namespace PhpMyAdmin\SqlParser; use ArrayAccess; use Exception; use function mb_check_encoding; use function mb_strlen; use function ord; /** * Implements array-like access for UTF-8 strings. * * In this library, this class should be used to parse UTF-8 queries. */ class UtfString implements ArrayAccess { /** * The raw, multi-byte string. * * @var string */ public $str = ''; /** * The index of current byte. * * For ASCII strings, the byte index is equal to the character index. * * @var int */ public $byteIdx = 0; /** * The index of current character. * * For non-ASCII strings, some characters occupy more than one byte and * the character index will have a lower value than the byte index. * * @var int */ public $charIdx = 0; /** * The length of the string (in bytes). * * @var int */ public $byteLen = 0; /** * The length of the string (in characters). * * @var int */ public $charLen = 0; /** * @param string $str the string */ public function __construct($str) { $this->str = $str; $this->byteIdx = 0; $this->charIdx = 0; $this->byteLen = mb_strlen($str, '8bit'); if (! mb_check_encoding($str, 'UTF-8')) { $this->charLen = 0; } else { $this->charLen = mb_strlen($str, 'UTF-8'); } } /** * Checks if the given offset exists. * * @param int $offset the offset to be checked * * @return bool */ public function offsetExists($offset) { return ($offset >= 0) && ($offset < $this->charLen); } /** * Gets the character at given offset. * * @param int $offset the offset to be returned * * @return string|null */ public function offsetGet($offset) { if (($offset < 0) || ($offset >= $this->charLen)) { return null; } $delta = $offset - $this->charIdx; if ($delta > 0) { // Fast forwarding. while ($delta-- > 0) { $this->byteIdx += static::getCharLength($this->str[$this->byteIdx]); ++$this->charIdx; } } elseif ($delta < 0) { // Rewinding. while ($delta++ < 0) { do { $byte = ord($this->str[--$this->byteIdx]); } while (($byte >= 128) && ($byte < 192)); --$this->charIdx; } } $bytesCount = static::getCharLength($this->str[$this->byteIdx]); $ret = ''; for ($i = 0; $bytesCount-- > 0; ++$i) { $ret .= $this->str[$this->byteIdx + $i]; } return $ret; } /** * Sets the value of a character. * * @param int $offset the offset to be set * @param string $value the value to be set * * @throws Exception not implemented. */ public function offsetSet($offset, $value) { throw new Exception('Not implemented.'); } /** * Unsets an index. * * @param int $offset the value to be unset * * @throws Exception not implemented. */ public function offsetUnset($offset) { throw new Exception('Not implemented.'); } /** * Gets the length of an UTF-8 character. * * According to RFC 3629, a UTF-8 character can have at most 4 bytes. * However, this implementation supports UTF-8 characters containing up to 6 * bytes. * * @see https://tools.ietf.org/html/rfc3629 * * @param string $byte the byte to be analyzed * * @return int */ public static function getCharLength($byte) { $byte = ord($byte); if ($byte < 128) { return 1; } elseif ($byte < 224) { return 2; } elseif ($byte < 240) { return 3; } elseif ($byte < 248) { return 4; } elseif ($byte < 252) { return 5; // unofficial } return 6; // unofficial } /** * Returns the length in characters of the string. * * @return int */ public function length() { return $this->charLen; } /** * Returns the contained string. * * @return string */ public function __toString() { return $this->str; } }