[ Index ] |
PHP Cross Reference of MyBB 1.8.38 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Provides static methods for charset and locale safe string manipulation. 4 * 5 * Copyright 2003-2017 Horde LLC (http://www.horde.org/) 6 * 7 * See the enclosed file COPYING for license information (LGPL). If you 8 * did not receive this file, see http://www.horde.org/licenses/lgpl21. 9 * 10 * @todo Split up in Horde_String_Multibyte for multibyte-safe methods and 11 * Horde_String_Locale for locale-safe methods. 12 * 13 * @author Jan Schneider <jan@horde.org> 14 * @category Horde 15 * @license http://www.horde.org/licenses/lgpl21 LGPL 2.1 16 * @package Util 17 */ 18 19 // Disallow direct access to this file for security reasons 20 if(!defined("IN_MYBB")) 21 { 22 die("Direct initialization of this file is not allowed.<br /><br />Please make sure IN_MYBB is defined."); 23 } 24 25 class Horde_String 26 { 27 /** 28 * lower() cache. 29 * 30 * @var array 31 */ 32 protected static $_lowers = array(); 33 34 /** 35 * upper() cache. 36 * 37 * @var array 38 */ 39 protected static $_uppers = array(); 40 41 /** 42 * Converts a string from one charset to another. 43 * 44 * Uses the iconv or the mbstring extensions. 45 * The original string is returned if conversion failed or none 46 * of the extensions were available. 47 * 48 * @param mixed $input The data to be converted. If $input is an an 49 * array, the array's values get converted 50 * recursively. 51 * @param string $from The string's current charset. 52 * @param string $to The charset to convert the string to. 53 * @param boolean $force Force conversion? 54 * 55 * @return mixed The converted input data. 56 */ 57 public static function convertCharset($input, $from, $to, $force = false) 58 { 59 /* Don't bother converting numbers. */ 60 if (is_numeric($input)) { 61 return $input; 62 } 63 64 /* If the from and to character sets are identical, return now. */ 65 if (!$force && $from == $to) { 66 return $input; 67 } 68 $from = self::lower($from); 69 $to = self::lower($to); 70 if (!$force && $from == $to) { 71 return $input; 72 } 73 74 if (is_array($input)) { 75 $tmp = array(); 76 foreach ($input as $key => $val) { 77 $tmp[self::_convertCharset($key, $from, $to)] = self::convertCharset($val, $from, $to, $force); 78 } 79 return $tmp; 80 } 81 82 if (is_object($input)) { 83 // PEAR_Error/Exception objects are almost guaranteed to contain 84 // recursion, which will cause a segfault in PHP. We should never 85 // reach this line, but add a check. 86 if (($input instanceof Exception) || 87 ($input instanceof PEAR_Error)) { 88 return ''; 89 } 90 91 $input = clone $input; 92 $vars = get_object_vars($input); 93 foreach ($vars as $key => $val) { 94 $input->$key = self::convertCharset($val, $from, $to, $force); 95 } 96 return $input; 97 } 98 99 if (!is_string($input)) { 100 return $input; 101 } 102 103 return self::_convertCharset($input, $from, $to); 104 } 105 106 /** 107 * Internal function used to do charset conversion. 108 * 109 * @param string $input See self::convertCharset(). 110 * @param string $from See self::convertCharset(). 111 * @param string $to See self::convertCharset(). 112 * 113 * @return string The converted string. 114 */ 115 protected static function _convertCharset($input, $from, $to) 116 { 117 /* Use utf8_[en|de]code() if possible and if the string isn't too 118 * large (less than 16 MB = 16 * 1024 * 1024 = 16777216 bytes) - these 119 * functions use more memory. */ 120 if (Horde_Util::extensionExists('xml') && 121 ((strlen($input) < 16777216) || 122 !Horde_Util::extensionExists('iconv') || 123 !Horde_Util::extensionExists('mbstring'))) { 124 if (($to == 'utf-8') && 125 in_array($from, array('iso-8859-1', 'us-ascii', 'utf-8'))) { 126 return utf8_encode($input); 127 } 128 129 if (($from == 'utf-8') && 130 in_array($to, array('iso-8859-1', 'us-ascii', 'utf-8'))) { 131 return utf8_decode($input); 132 } 133 } 134 135 /* Try UTF7-IMAP conversions. */ 136 if (($from == 'utf7-imap') || ($to == 'utf7-imap')) { 137 try { 138 if ($from == 'utf7-imap') { 139 return self::convertCharset(Horde_Imap_Client_Utf7imap::Utf7ImapToUtf8($input), 'UTF-8', $to); 140 } else { 141 if ($from == 'utf-8') { 142 $conv = $input; 143 } else { 144 $conv = self::convertCharset($input, $from, 'UTF-8'); 145 } 146 return Horde_Imap_Client_Utf7imap::Utf8ToUtf7Imap($conv); 147 } 148 } catch (Horde_Imap_Client_Exception $e) { 149 return $input; 150 } 151 } 152 153 /* Try iconv with transliteration. */ 154 if (Horde_Util::extensionExists('iconv')) { 155 unset($php_errormsg); 156 ini_set('track_errors', 1); 157 $out = @iconv($from, $to . '//TRANSLIT', $input); 158 $errmsg = isset($php_errormsg); 159 ini_restore('track_errors'); 160 if (!$errmsg && $out !== false) { 161 return $out; 162 } 163 } 164 165 /* Try mbstring. */ 166 if (Horde_Util::extensionExists('mbstring')) { 167 $out = @mb_convert_encoding($input, $to, self::_mbstringCharset($from)); 168 if (!empty($out)) { 169 return $out; 170 } 171 } 172 173 return $input; 174 } 175 176 /** 177 * Makes a string lowercase. 178 * 179 * @param string $string The string to be converted. 180 * @param boolean $locale If true the string will be converted based on 181 * a given charset, locale independent else. 182 * @param string $charset If $locale is true, the charset to use when 183 * converting. 184 * 185 * @return string The string with lowercase characters. 186 */ 187 public static function lower($string, $locale = false, $charset = null) 188 { 189 if ($locale) { 190 if (Horde_Util::extensionExists('mbstring')) { 191 if (is_null($charset)) { 192 throw new InvalidArgumentException('$charset argument must not be null'); 193 } 194 $ret = @mb_strtolower($string, self::_mbstringCharset($charset)); 195 if (!empty($ret)) { 196 return $ret; 197 } 198 } 199 return strtolower($string); 200 } 201 202 if (!isset(self::$_lowers[$string])) { 203 $language = setlocale(LC_CTYPE, 0); 204 setlocale(LC_CTYPE, 'C'); 205 self::$_lowers[$string] = strtolower($string); 206 setlocale(LC_CTYPE, $language); 207 } 208 209 return self::$_lowers[$string]; 210 } 211 212 /** 213 * Makes a string uppercase. 214 * 215 * @param string $string The string to be converted. 216 * @param boolean $locale If true the string will be converted based on a 217 * given charset, locale independent else. 218 * @param string $charset If $locale is true, the charset to use when 219 * converting. If not provided the current charset. 220 * 221 * @return string The string with uppercase characters. 222 */ 223 public static function upper($string, $locale = false, $charset = null) 224 { 225 if ($locale) { 226 if (Horde_Util::extensionExists('mbstring')) { 227 if (is_null($charset)) { 228 throw new InvalidArgumentException('$charset argument must not be null'); 229 } 230 $ret = @mb_strtoupper($string, self::_mbstringCharset($charset)); 231 if (!empty($ret)) { 232 return $ret; 233 } 234 } 235 return strtoupper($string); 236 } 237 238 if (!isset(self::$_uppers[$string])) { 239 $language = setlocale(LC_CTYPE, 0); 240 setlocale(LC_CTYPE, 'C'); 241 self::$_uppers[$string] = strtoupper($string); 242 setlocale(LC_CTYPE, $language); 243 } 244 245 return self::$_uppers[$string]; 246 } 247 248 /** 249 * Returns a string with the first letter capitalized if it is 250 * alphabetic. 251 * 252 * @param string $string The string to be capitalized. 253 * @param boolean $locale If true the string will be converted based on a 254 * given charset, locale independent else. 255 * @param string $charset The charset to use, defaults to current charset. 256 * 257 * @return string The capitalized string. 258 */ 259 public static function ucfirst($string, $locale = false, $charset = null) 260 { 261 if ($locale) { 262 if (is_null($charset)) { 263 throw new InvalidArgumentException('$charset argument must not be null'); 264 } 265 $first = self::substr($string, 0, 1, $charset); 266 if (self::isAlpha($first, $charset)) { 267 $string = self::upper($first, true, $charset) . self::substr($string, 1, null, $charset); 268 } 269 } else { 270 $string = self::upper(substr($string, 0, 1), false) . substr($string, 1); 271 } 272 273 return $string; 274 } 275 276 /** 277 * Returns a string with the first letter of each word capitalized if it is 278 * alphabetic. 279 * 280 * Sentences are splitted into words at whitestrings. 281 * 282 * @param string $string The string to be capitalized. 283 * @param boolean $locale If true the string will be converted based on a 284 * given charset, locale independent else. 285 * @param string $charset The charset to use, defaults to current charset. 286 * 287 * @return string The capitalized string. 288 */ 289 public static function ucwords($string, $locale = false, $charset = null) 290 { 291 $words = preg_split('/(\s+)/', $string, -1, PREG_SPLIT_DELIM_CAPTURE); 292 for ($i = 0, $c = count($words); $i < $c; $i += 2) { 293 $words[$i] = self::ucfirst($words[$i], $locale, $charset); 294 } 295 return implode('', $words); 296 } 297 298 /** 299 * Returns part of a string. 300 * 301 * @param string $string The string to be converted. 302 * @param integer $start The part's start position, zero based. 303 * @param integer $length The part's length. 304 * @param string $charset The charset to use when calculating the part's 305 * position and length, defaults to current 306 * charset. 307 * 308 * @return string The string's part. 309 */ 310 public static function substr($string, $start, $length = null, 311 $charset = 'UTF-8') 312 { 313 if (is_null($length)) { 314 $length = self::length($string, $charset) - $start; 315 } 316 317 if ($length === 0) { 318 return ''; 319 } 320 321 $error = false; 322 323 /* Try mbstring. */ 324 if (Horde_Util::extensionExists('mbstring')) { 325 $ret = @mb_substr($string, $start, $length, self::_mbstringCharset($charset)); 326 327 /* mb_substr() returns empty string on failure. */ 328 if (strlen($ret)) { 329 return $ret; 330 } 331 $error = true; 332 } 333 334 /* Try iconv. */ 335 if (Horde_Util::extensionExists('iconv')) { 336 $ret = @iconv_substr($string, $start, $length, $charset); 337 338 /* iconv_substr() returns false on failure. */ 339 if ($ret !== false) { 340 return $ret; 341 } 342 $error = true; 343 } 344 345 /* Try intl. */ 346 if (Horde_Util::extensionExists('intl')) { 347 $ret = self::convertCharset( 348 @grapheme_substr( 349 self::convertCharset($string, $charset, 'UTF-8'), 350 $start, 351 $length 352 ), 353 'UTF-8', 354 $charset 355 ); 356 357 /* grapheme_substr() returns false on failure. */ 358 if ($ret !== false) { 359 return $ret; 360 } 361 $error = true; 362 } 363 364 return $error 365 ? '' 366 : substr($string, $start, $length); 367 } 368 369 /** 370 * Returns the character (not byte) length of a string. 371 * 372 * @param string $string The string to return the length of. 373 * @param string $charset The charset to use when calculating the string's 374 * length. 375 * 376 * @return integer The string's length. 377 */ 378 public static function length($string, $charset = 'UTF-8') 379 { 380 $charset = self::lower($charset); 381 382 if ($charset == 'utf-8' || $charset == 'utf8') { 383 return strlen(utf8_decode($string)); 384 } 385 386 if (Horde_Util::extensionExists('mbstring')) { 387 $ret = @mb_strlen($string, self::_mbstringCharset($charset)); 388 if (!empty($ret)) { 389 return $ret; 390 } 391 } 392 if (Horde_Util::extensionExists('intl')) { 393 return grapheme_strlen( 394 self::convertCharset($string, $charset, 'UTF-8') 395 ); 396 } 397 398 return strlen($string); 399 } 400 401 /** 402 * Returns the numeric position of the first occurrence of $needle 403 * in the $haystack string. 404 * 405 * @param string $haystack The string to search through. 406 * @param string $needle The string to search for. 407 * @param integer $offset Character in $haystack to start searching at. 408 * @param string $charset Charset of $needle. 409 * 410 * @return integer The position of first occurrence. 411 */ 412 public static function pos( 413 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 414 ) 415 { 416 return self::_pos($haystack, $needle, $offset, $charset, 'strpos'); 417 } 418 419 /** 420 * Returns the numeric position of the first case-insensitive occurrence 421 * of $needle in the $haystack string. 422 * 423 * @since 2.5.0 424 * 425 * @param string $haystack The string to search through. 426 * @param string $needle The string to search for. 427 * @param integer $offset Character in $haystack to start searching at. 428 * @param string $charset Charset of $needle. 429 * 430 * @return integer The position of first case-insensitive occurrence. 431 */ 432 public static function ipos( 433 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 434 ) 435 { 436 return self::_pos($haystack, $needle, $offset, $charset, 'stripos'); 437 } 438 439 /** 440 * Returns the numeric position of the last occurrence of $needle 441 * in the $haystack string. 442 * 443 * @param string $haystack The string to search through. 444 * @param string $needle The string to search for. 445 * @param integer $offset Character in $haystack to start searching at. 446 * @param string $charset Charset of $needle. 447 * 448 * @return integer The position of last occurrence. 449 */ 450 public static function rpos( 451 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 452 ) 453 { 454 return self::_pos($haystack, $needle, $offset, $charset, 'strrpos'); 455 } 456 457 /** 458 * Returns the numeric position of the last case-insensitive occurrence of 459 * $needle in the $haystack string. 460 * 461 * @since 2.5.0 462 * 463 * @param string $haystack The string to search through. 464 * @param string $needle The string to search for. 465 * @param integer $offset Character in $haystack to start searching at. 466 * @param string $charset Charset of $needle. 467 * 468 * @return integer The position of last case-insensitive occurrence. 469 */ 470 public static function ripos( 471 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 472 ) 473 { 474 return self::_pos($haystack, $needle, $offset, $charset, 'strripos'); 475 } 476 477 /** 478 * Perform string position searches. 479 * 480 * @param string $haystack The string to search through. 481 * @param string $needle The string to search for. 482 * @param integer $offset Character in $haystack to start searching at. 483 * @param string $charset Charset of $needle. 484 * @param string $func Function to use. 485 * 486 * @return integer The position of occurrence. 487 * 488 */ 489 protected static function _pos( 490 $haystack, $needle, $offset, $charset, $func 491 ) 492 { 493 if (Horde_Util::extensionExists('mbstring')) { 494 unset($php_errormsg); 495 $track_errors = ini_set('track_errors', 1); 496 $ret = @call_user_func('mb_' . $func, $haystack, $needle, $offset, self::_mbstringCharset($charset)); 497 ini_set('track_errors', $track_errors); 498 if (!isset($php_errormsg)) { 499 return $ret; 500 } 501 } 502 503 if (Horde_Util::extensionExists('intl')) { 504 unset($php_errormsg); 505 $track_errors = ini_set('track_errors', 1); 506 $ret = self::convertCharset( 507 @call_user_func( 508 'grapheme_' . $func, 509 self::convertCharset($haystack, $charset, 'UTF-8'), 510 self::convertCharset($needle, $charset, 'UTF-8'), 511 $offset 512 ), 513 'UTF-8', 514 $charset 515 ); 516 ini_set('track_errors', $track_errors); 517 if (!isset($php_errormsg)) { 518 return $ret; 519 } 520 } 521 522 return $func($haystack, $needle, $offset); 523 } 524 525 /** 526 * Returns a string padded to a certain length with another string. 527 * This method behaves exactly like str_pad() but is multibyte safe. 528 * 529 * @param string $input The string to be padded. 530 * @param integer $length The length of the resulting string. 531 * @param string $pad The string to pad the input string with. Must 532 * be in the same charset like the input string. 533 * @param const $type The padding type. One of STR_PAD_LEFT, 534 * STR_PAD_RIGHT, or STR_PAD_BOTH. 535 * @param string $charset The charset of the input and the padding 536 * strings. 537 * 538 * @return string The padded string. 539 */ 540 public static function pad($input, $length, $pad = ' ', 541 $type = STR_PAD_RIGHT, $charset = 'UTF-8') 542 { 543 $mb_length = self::length($input, $charset); 544 $sb_length = strlen($input); 545 $pad_length = self::length($pad, $charset); 546 547 /* Return if we already have the length. */ 548 if ($mb_length >= $length) { 549 return $input; 550 } 551 552 /* Shortcut for single byte strings. */ 553 if ($mb_length == $sb_length && $pad_length == strlen($pad)) { 554 return str_pad($input, $length, $pad, $type); 555 } 556 557 switch ($type) { 558 case STR_PAD_LEFT: 559 $left = $length - $mb_length; 560 $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . $input; 561 break; 562 563 case STR_PAD_BOTH: 564 $left = floor(($length - $mb_length) / 2); 565 $right = ceil(($length - $mb_length) / 2); 566 $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . 567 $input . 568 self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset); 569 break; 570 571 case STR_PAD_RIGHT: 572 $right = $length - $mb_length; 573 $output = $input . self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset); 574 break; 575 } 576 577 return $output; 578 } 579 580 /** 581 * Wraps the text of a message. 582 * 583 * @param string $string String containing the text to wrap. 584 * @param integer $width Wrap the string at this number of 585 * characters. 586 * @param string $break Character(s) to use when breaking lines. 587 * @param boolean $cut Whether to cut inside words if a line 588 * can't be wrapped. 589 * @param boolean $line_folding Whether to apply line folding rules per 590 * RFC 822 or similar. The correct break 591 * characters including leading whitespace 592 * have to be specified too. 593 * 594 * @return string String containing the wrapped text. 595 */ 596 public static function wordwrap($string, $width = 75, $break = "\n", 597 $cut = false, $line_folding = false) 598 { 599 $breakRegex = '(?:' . preg_quote($break) . ')'; 600 $rpos = self::rpos($break, "\n"); 601 if ($rpos === false) { 602 $rpos = 0; 603 } else { 604 $rpos++; 605 } 606 $wrapped = ''; 607 $hasWrapped = false; 608 609 while (self::length($string, 'UTF-8') > $width) { 610 $line = self::substr($string, 0, $width + ($hasWrapped ? $rpos : 0), 'UTF-8'); 611 $string = self::substr($string, self::length($line, 'UTF-8'), null, 'UTF-8'); 612 613 // Make sure we didn't cut a word, unless we want hard breaks 614 // anyway. 615 if (!$cut && preg_match('/^(.+?)((\s|\r?\n).*)/us', $string, $match)) { 616 $line .= $match[1]; 617 $string = $match[2]; 618 } 619 620 // Wrap at existing line breaks. 621 $regex = '/^(' . ($hasWrapped ? $breakRegex : '') . '.*?)(\r?\n)(.*)$/us'; 622 if (preg_match($regex, $line, $match)) { 623 $wrapped .= $match[1] . $match[2]; 624 $string = $match[3] . $string; 625 $hasWrapped = false; 626 continue; 627 } 628 629 // Wrap at the last colon or semicolon followed by a whitespace if 630 // doing line folding. 631 if ($line_folding && 632 preg_match('/^(.*?)(;|:)(\s+.*)$/us', $line, $match)) { 633 $wrapped .= $match[1] . $match[2]; 634 $string = $break . $match[3] . $string; 635 $hasWrapped = true; 636 continue; 637 } 638 639 // Wrap at the last whitespace of $line. 640 $sub = $line_folding 641 ? '(' . ($hasWrapped ? $breakRegex : '') . '.+[^\s])' 642 : '(' . ($hasWrapped ? $breakRegex : '') . '.*)'; 643 644 if (preg_match('/^' . $sub . '(\s+)(.*)$/u', $line, $match)) { 645 $wrapped .= $match[1]; 646 $string = $break . ($line_folding ? $match[2] : '') 647 . $match[3] . $string; 648 $hasWrapped = true; 649 continue; 650 } 651 652 // Hard wrap if necessary. 653 if ($cut) { 654 $wrapped .= $line; 655 $string = $break . $string; 656 $hasWrapped = true; 657 continue; 658 } 659 660 $wrapped .= $line; 661 $hasWrapped = false; 662 } 663 664 return $wrapped . $string; 665 } 666 667 /** 668 * Wraps the text of a message. 669 * 670 * @param string $text String containing the text to wrap. 671 * @param integer $length Wrap $text at this number of characters. 672 * @param string $break_char Character(s) to use when breaking lines. 673 * @param boolean $quote Ignore lines that are wrapped with the '>' 674 * character (RFC 2646)? If true, we don't 675 * remove any padding whitespace at the end of 676 * the string. 677 * 678 * @return string String containing the wrapped text. 679 */ 680 public static function wrap($text, $length = 80, $break_char = "\n", 681 $quote = false) 682 { 683 $paragraphs = array(); 684 685 foreach (preg_split('/\r?\n/', $text) as $input) { 686 if ($quote && (strpos($input, '>') === 0)) { 687 $line = $input; 688 } else { 689 /* We need to handle the Usenet-style signature line 690 * separately; since the space after the two dashes is 691 * REQUIRED, we don't want to trim the line. */ 692 if ($input != '-- ') { 693 $input = rtrim($input); 694 } 695 $line = self::wordwrap($input, $length, $break_char); 696 } 697 698 $paragraphs[] = $line; 699 } 700 701 return implode($break_char, $paragraphs); 702 } 703 704 /** 705 * Return a truncated string, suitable for notifications. 706 * 707 * @param string $text The original string. 708 * @param integer $length The maximum length. 709 * 710 * @return string The truncated string, if longer than $length. 711 */ 712 public static function truncate($text, $length = 100) 713 { 714 return (self::length($text) > $length) 715 ? rtrim(self::substr($text, 0, $length - 3)) . '...' 716 : $text; 717 } 718 719 /** 720 * Return an abbreviated string, with characters in the middle of the 721 * excessively long string replaced by '...'. 722 * 723 * @param string $text The original string. 724 * @param integer $length The length at which to abbreviate. 725 * 726 * @return string The abbreviated string, if longer than $length. 727 */ 728 public static function abbreviate($text, $length = 20) 729 { 730 return (self::length($text) > $length) 731 ? rtrim(self::substr($text, 0, round(($length - 3) / 2))) . '...' . ltrim(self::substr($text, (($length - 3) / 2) * -1)) 732 : $text; 733 } 734 735 /** 736 * Returns the common leading part of two strings. 737 * 738 * @param string $str1 A string. 739 * @param string $str2 Another string. 740 * 741 * @return string The start of $str1 and $str2 that is identical in both. 742 */ 743 public static function common($str1, $str2) 744 { 745 for ($result = '', $i = 0; 746 isset($str1[$i]) && isset($str2[$i]) && $str1[$i] == $str2[$i]; 747 $i++) { 748 $result .= $str1[$i]; 749 } 750 return $result; 751 } 752 753 /** 754 * Returns true if the every character in the parameter is an alphabetic 755 * character. 756 * 757 * @param string $string The string to test. 758 * @param string $charset The charset to use when testing the string. 759 * 760 * @return boolean True if the parameter was alphabetic only. 761 */ 762 public static function isAlpha($string, $charset) 763 { 764 if (!Horde_Util::extensionExists('mbstring')) { 765 return ctype_alpha($string); 766 } 767 768 $charset = self::_mbstringCharset($charset); 769 $old_charset = mb_regex_encoding(); 770 771 if ($charset != $old_charset) { 772 @mb_regex_encoding($charset); 773 } 774 $alpha = !@mb_ereg_match('[^[:alpha:]]', $string); 775 if ($charset != $old_charset) { 776 @mb_regex_encoding($old_charset); 777 } 778 779 return $alpha; 780 } 781 782 /** 783 * Returns true if ever character in the parameter is a lowercase letter in 784 * the current locale. 785 * 786 * @param string $string The string to test. 787 * @param string $charset The charset to use when testing the string. 788 * 789 * @return boolean True if the parameter was lowercase. 790 */ 791 public static function isLower($string, $charset) 792 { 793 return ((self::lower($string, true, $charset) === $string) && 794 self::isAlpha($string, $charset)); 795 } 796 797 /** 798 * Returns true if every character in the parameter is an uppercase letter 799 * in the current locale. 800 * 801 * @param string $string The string to test. 802 * @param string $charset The charset to use when testing the string. 803 * 804 * @return boolean True if the parameter was uppercase. 805 */ 806 public static function isUpper($string, $charset) 807 { 808 return ((self::upper($string, true, $charset) === $string) && 809 self::isAlpha($string, $charset)); 810 } 811 812 /** 813 * Performs a multibyte safe regex match search on the text provided. 814 * 815 * @param string $text The text to search. 816 * @param array $regex The regular expressions to use, without perl 817 * regex delimiters (e.g. '/' or '|'). 818 * @param string $charset The character set of the text. 819 * 820 * @return array The matches array from the first regex that matches. 821 */ 822 public static function regexMatch($text, $regex, $charset = null) 823 { 824 if (!empty($charset)) { 825 $regex = self::convertCharset($regex, $charset, 'utf-8'); 826 $text = self::convertCharset($text, $charset, 'utf-8'); 827 } 828 829 $matches = array(); 830 foreach ($regex as $val) { 831 if (preg_match('/' . $val . '/u', $text, $matches)) { 832 break; 833 } 834 } 835 836 if (!empty($charset)) { 837 $matches = self::convertCharset($matches, 'utf-8', $charset); 838 } 839 840 return $matches; 841 } 842 843 /** 844 * Check to see if a string is valid UTF-8. 845 * 846 * @param string $text The text to check. 847 * 848 * @return boolean True if valid UTF-8. 849 */ 850 public static function validUtf8($text) 851 { 852 $text = strval($text); 853 854 // First check for illegal surrogate pair sequences. See RFC 3629. 855 if (preg_match('/\xE0[\x80-\x9F][\x80-\xBF]|\xED[\xA0-\xBF][\x80-\xBF]/S', $text)) { 856 return false; 857 } 858 859 for ($i = 0, $len = strlen($text); $i < $len; ++$i) { 860 $c = ord($text[$i]); 861 if ($c > 128) { 862 if ($c > 247) { 863 // STD 63 (RFC 3629) eliminates 5 & 6-byte characters. 864 return false; 865 } elseif ($c > 239) { 866 $j = 3; 867 } elseif ($c > 223) { 868 $j = 2; 869 } elseif ($c > 191) { 870 $j = 1; 871 } else { 872 return false; 873 } 874 875 if (($i + $j) > $len) { 876 return false; 877 } 878 879 do { 880 $c = ord($text[++$i]); 881 if (($c < 128) || ($c > 191)) { 882 return false; 883 } 884 } while (--$j); 885 } 886 } 887 888 return true; 889 } 890 891 /** 892 * Workaround charsets that don't work with mbstring functions. 893 * 894 * @param string $charset The original charset. 895 * 896 * @return string The charset to use with mbstring functions. 897 */ 898 protected static function _mbstringCharset($charset) 899 { 900 /* mbstring functions do not handle the 'ks_c_5601-1987' & 901 * 'ks_c_5601-1989' charsets. However, these charsets are used, for 902 * example, by various versions of Outlook to send Korean characters. 903 * Use UHC (CP949) encoding instead. See, e.g., 904 * http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0030.html */ 905 return in_array(self::lower($charset), array('ks_c_5601-1987', 'ks_c_5601-1989')) 906 ? 'UHC' 907 : $charset; 908 } 909 910 /** 911 * Strip UTF-8 byte order mark (BOM) from string data. 912 * 913 * @param string $str Input string (UTF-8). 914 * 915 * @return string Stripped string (UTF-8). 916 */ 917 public static function trimUtf8Bom($str) 918 { 919 return (substr($str, 0, 3) == pack('CCC', 239, 187, 191)) 920 ? substr($str, 3) 921 : $str; 922 } 923 924 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
2005 - 2021 © MyBB.de | Alle Rechte vorbehalten! | Sponsor: netcup | Cross-referenced by PHPXref |