PHPXRef 0.7.1 : MyBB 1.8.39 : /inc/3rdparty/diff/Diff/String.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  /**
   3   * Provides static methods for charset and locale safe string manipulation.
   4   *
   5   * Copyright 2003-2017 Horde LLC (http://www.horde.org/)
   6   *
   7   * See the enclosed file COPYING for license information (LGPL). If you
   8   * did not receive this file, see http://www.horde.org/licenses/lgpl21.
   9   *
  10   * @todo Split up in Horde_String_Multibyte for multibyte-safe methods and
  11   *       Horde_String_Locale for locale-safe methods.
  12   *
  13   * @author   Jan Schneider <jan@horde.org>
  14   * @category Horde
  15   * @license  http://www.horde.org/licenses/lgpl21 LGPL 2.1
  16   * @package  Util
  17   */
  18  
  19  // Disallow direct access to this file for security reasons
  20  if(!defined("IN_MYBB"))
  21  {
  22      die("Direct initialization of this file is not allowed.<br /><br />Please make sure IN_MYBB is defined.");
  23  }
  24  
  25  class Horde_String
  26  {
  27      /**
  28       * lower() cache.
  29       *
  30       * @var array
  31       */
  32      protected static $_lowers = array();
  33  
  34      /**
  35       * upper() cache.
  36       *
  37       * @var array
  38       */
  39      protected static $_uppers = array();
  40  
  41      /**
  42       * Converts a string from one charset to another.
  43       *
  44       * Uses the iconv or the mbstring extensions.
  45       * The original string is returned if conversion failed or none
  46       * of the extensions were available.
  47       *
  48       * @param mixed $input    The data to be converted. If $input is an an
  49       *                        array, the array's values get converted
  50       *                        recursively.
  51       * @param string $from    The string's current charset.
  52       * @param string $to      The charset to convert the string to.
  53       * @param boolean $force  Force conversion?
  54       *
  55       * @return mixed  The converted input data.
  56       */
  57      public static function convertCharset($input, $from, $to, $force = false)
  58      {
  59          /* Don't bother converting numbers. */
  60          if (is_numeric($input)) {
  61              return $input;
  62          }
  63  
  64          /* If the from and to character sets are identical, return now. */
  65          if (!$force && $from == $to) {
  66              return $input;
  67          }
  68          $from = self::lower($from);
  69          $to = self::lower($to);
  70          if (!$force && $from == $to) {
  71              return $input;
  72          }
  73  
  74          if (is_array($input)) {
  75              $tmp = array();
  76              foreach ($input as $key => $val) {
  77                  $tmp[self::_convertCharset($key, $from, $to)] = self::convertCharset($val, $from, $to, $force);
  78              }
  79              return $tmp;
  80          }
  81  
  82          if (is_object($input)) {
  83              // PEAR_Error/Exception objects are almost guaranteed to contain
  84              // recursion, which will cause a segfault in PHP. We should never
  85              // reach this line, but add a check.
  86              if (($input instanceof Exception) ||
  87                  ($input instanceof PEAR_Error)) {
  88                  return '';
  89              }
  90  
  91              $input = clone $input;
  92              $vars = get_object_vars($input);
  93              foreach ($vars as $key => $val) {
  94                  $input->$key = self::convertCharset($val, $from, $to, $force);
  95              }
  96              return $input;
  97          }
  98  
  99          if (!is_string($input)) {
 100              return $input;
 101          }
 102  
 103          return self::_convertCharset($input, $from, $to);
 104      }
 105  
 106      /**
 107       * Internal function used to do charset conversion.
 108       *
 109       * @param string $input  See self::convertCharset().
 110       * @param string $from   See self::convertCharset().
 111       * @param string $to     See self::convertCharset().
 112       *
 113       * @return string  The converted string.
 114       */
 115      protected static function _convertCharset($input, $from, $to)
 116      {
 117          /* Use utf8_[en|de]code() if possible and if the string isn't too
 118           * large (less than 16 MB = 16 * 1024 * 1024 = 16777216 bytes) - these
 119           * functions use more memory. */
 120          if (Horde_Util::extensionExists('xml') &&
 121              ((strlen($input) < 16777216) ||
 122               !Horde_Util::extensionExists('iconv') ||
 123               !Horde_Util::extensionExists('mbstring'))) {
 124              if (($to == 'utf-8') &&
 125                  in_array($from, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
 126                  return utf8_encode($input);
 127              }
 128  
 129              if (($from == 'utf-8') &&
 130                  in_array($to, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
 131                  return utf8_decode($input);
 132              }
 133          }
 134  
 135          /* Try UTF7-IMAP conversions. */
 136          if (($from == 'utf7-imap') || ($to == 'utf7-imap')) {
 137              try {
 138                  if ($from == 'utf7-imap') {
 139                      return self::convertCharset(Horde_Imap_Client_Utf7imap::Utf7ImapToUtf8($input), 'UTF-8', $to);
 140                  } else {
 141                      if ($from == 'utf-8') {
 142                          $conv = $input;
 143                      } else {
 144                          $conv = self::convertCharset($input, $from, 'UTF-8');
 145                      }
 146                      return Horde_Imap_Client_Utf7imap::Utf8ToUtf7Imap($conv);
 147                  }
 148              } catch (Horde_Imap_Client_Exception $e) {
 149                  return $input;
 150              }
 151          }
 152  
 153          /* Try iconv with transliteration. */
 154          if (Horde_Util::extensionExists('iconv')) {
 155              unset($php_errormsg);
 156              ini_set('track_errors', 1);
 157              $out = @iconv($from, $to . '//TRANSLIT', $input);
 158              $errmsg = isset($php_errormsg);
 159              ini_restore('track_errors');
 160              if (!$errmsg && $out !== false) {
 161                  return $out;
 162              }
 163          }
 164  
 165          /* Try mbstring. */
 166          if (Horde_Util::extensionExists('mbstring')) {
 167              $out = @mb_convert_encoding($input, $to, self::_mbstringCharset($from));
 168              if (!empty($out)) {
 169                  return $out;
 170              }
 171          }
 172  
 173          return $input;
 174      }
 175  
 176      /**
 177       * Makes a string lowercase.
 178       *
 179       * @param string $string   The string to be converted.
 180       * @param boolean $locale  If true the string will be converted based on
 181       *                         a given charset, locale independent else.
 182       * @param string $charset  If $locale is true, the charset to use when
 183       *                         converting.
 184       *
 185       * @return string  The string with lowercase characters.
 186       */
 187      public static function lower($string, $locale = false, $charset = null)
 188      {
 189          if ($locale) {
 190              if (Horde_Util::extensionExists('mbstring')) {
 191                  if (is_null($charset)) {
 192                      throw new InvalidArgumentException('$charset argument must not be null');
 193                  }
 194                  $ret = @mb_strtolower($string, self::_mbstringCharset($charset));
 195                  if (!empty($ret)) {
 196                      return $ret;
 197                  }
 198              }
 199              return strtolower($string);
 200          }
 201  
 202          if (!isset(self::$_lowers[$string])) {
 203              $language = setlocale(LC_CTYPE, 0);
 204              setlocale(LC_CTYPE, 'C');
 205              self::$_lowers[$string] = strtolower($string);
 206              setlocale(LC_CTYPE, $language);
 207          }
 208  
 209          return self::$_lowers[$string];
 210      }
 211  
 212      /**
 213       * Makes a string uppercase.
 214       *
 215       * @param string $string   The string to be converted.
 216       * @param boolean $locale  If true the string will be converted based on a
 217       *                         given charset, locale independent else.
 218       * @param string $charset  If $locale is true, the charset to use when
 219       *                         converting. If not provided the current charset.
 220       *
 221       * @return string  The string with uppercase characters.
 222       */
 223      public static function upper($string, $locale = false, $charset = null)
 224      {
 225          if ($locale) {
 226              if (Horde_Util::extensionExists('mbstring')) {
 227                  if (is_null($charset)) {
 228                      throw new InvalidArgumentException('$charset argument must not be null');
 229                  }
 230                  $ret = @mb_strtoupper($string, self::_mbstringCharset($charset));
 231                  if (!empty($ret)) {
 232                      return $ret;
 233                  }
 234              }
 235              return strtoupper($string);
 236          }
 237  
 238          if (!isset(self::$_uppers[$string])) {
 239              $language = setlocale(LC_CTYPE, 0);
 240              setlocale(LC_CTYPE, 'C');
 241              self::$_uppers[$string] = strtoupper($string);
 242              setlocale(LC_CTYPE, $language);
 243          }
 244  
 245          return self::$_uppers[$string];
 246      }
 247  
 248      /**
 249       * Returns a string with the first letter capitalized if it is
 250       * alphabetic.
 251       *
 252       * @param string $string   The string to be capitalized.
 253       * @param boolean $locale  If true the string will be converted based on a
 254       *                         given charset, locale independent else.
 255       * @param string $charset  The charset to use, defaults to current charset.
 256       *
 257       * @return string  The capitalized string.
 258       */
 259      public static function ucfirst($string, $locale = false, $charset = null)
 260      {
 261          if ($locale) {
 262              if (is_null($charset)) {
 263                  throw new InvalidArgumentException('$charset argument must not be null');
 264              }
 265              $first = self::substr($string, 0, 1, $charset);
 266              if (self::isAlpha($first, $charset)) {
 267                  $string = self::upper($first, true, $charset) . self::substr($string, 1, null, $charset);
 268              }
 269          } else {
 270              $string = self::upper(substr($string, 0, 1), false) . substr($string, 1);
 271          }
 272  
 273          return $string;
 274      }
 275  
 276      /**
 277       * Returns a string with the first letter of each word capitalized if it is
 278       * alphabetic.
 279       *
 280       * Sentences are splitted into words at whitestrings.
 281       *
 282       * @param string $string   The string to be capitalized.
 283       * @param boolean $locale  If true the string will be converted based on a
 284       *                         given charset, locale independent else.
 285       * @param string $charset  The charset to use, defaults to current charset.
 286       *
 287       * @return string  The capitalized string.
 288       */
 289      public static function ucwords($string, $locale = false, $charset = null)
 290      {
 291          $words = preg_split('/(\s+)/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
 292          for ($i = 0, $c = count($words); $i < $c; $i += 2) {
 293              $words[$i] = self::ucfirst($words[$i], $locale, $charset);
 294          }
 295          return implode('', $words);
 296      }
 297  
 298      /**
 299       * Returns part of a string.
 300       *
 301       * @param string $string   The string to be converted.
 302       * @param integer $start   The part's start position, zero based.
 303       * @param integer $length  The part's length.
 304       * @param string $charset  The charset to use when calculating the part's
 305       *                         position and length, defaults to current
 306       *                         charset.
 307       *
 308       * @return string  The string's part.
 309       */
 310      public static function substr($string, $start, $length = null,
 311                                    $charset = 'UTF-8')
 312      {
 313          if (is_null($length)) {
 314              $length = self::length($string, $charset) - $start;
 315          }
 316  
 317          if ($length === 0) {
 318              return '';
 319          }
 320  
 321          $error = false;
 322  
 323          /* Try mbstring. */
 324          if (Horde_Util::extensionExists('mbstring')) {
 325              $ret = @mb_substr($string, $start, $length, self::_mbstringCharset($charset));
 326  
 327              /* mb_substr() returns empty string on failure. */
 328              if (strlen($ret)) {
 329                  return $ret;
 330              }
 331              $error = true;
 332          }
 333  
 334          /* Try iconv. */
 335          if (Horde_Util::extensionExists('iconv')) {
 336              $ret = @iconv_substr($string, $start, $length, $charset);
 337  
 338              /* iconv_substr() returns false on failure. */
 339              if ($ret !== false) {
 340                  return $ret;
 341              }
 342              $error = true;
 343          }
 344  
 345          /* Try intl. */
 346          if (Horde_Util::extensionExists('intl')) {
 347              $ret = self::convertCharset(
 348                  @grapheme_substr(
 349                      self::convertCharset($string, $charset, 'UTF-8'),
 350                      $start,
 351                      $length
 352                  ),
 353                  'UTF-8',
 354                  $charset
 355              );
 356  
 357              /* grapheme_substr() returns false on failure. */
 358              if ($ret !== false) {
 359                  return $ret;
 360              }
 361              $error = true;
 362          }
 363  
 364          return $error
 365              ? ''
 366              : substr($string, $start, $length);
 367      }
 368  
 369      /**
 370       * Returns the character (not byte) length of a string.
 371       *
 372       * @param string $string  The string to return the length of.
 373       * @param string $charset The charset to use when calculating the string's
 374       *                        length.
 375       *
 376       * @return integer  The string's length.
 377       */
 378      public static function length($string, $charset = 'UTF-8')
 379      {
 380          $charset = self::lower($charset);
 381  
 382          if ($charset == 'utf-8' || $charset == 'utf8') {
 383              return strlen(utf8_decode($string));
 384          }
 385  
 386          if (Horde_Util::extensionExists('mbstring')) {
 387              $ret = @mb_strlen($string, self::_mbstringCharset($charset));
 388              if (!empty($ret)) {
 389                  return $ret;
 390              }
 391          }
 392          if (Horde_Util::extensionExists('intl')) {
 393              return grapheme_strlen(
 394                  self::convertCharset($string, $charset, 'UTF-8')
 395              );
 396          }
 397  
 398          return strlen($string);
 399      }
 400  
 401      /**
 402       * Returns the numeric position of the first occurrence of $needle
 403       * in the $haystack string.
 404       *
 405       * @param string $haystack  The string to search through.
 406       * @param string $needle    The string to search for.
 407       * @param integer $offset   Character in $haystack to start searching at.
 408       * @param string $charset   Charset of $needle.
 409       *
 410       * @return integer  The position of first occurrence.
 411       */
 412      public static function pos(
 413          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 414      )
 415      {
 416          return self::_pos($haystack, $needle, $offset, $charset, 'strpos');
 417      }
 418  
 419      /**
 420       * Returns the numeric position of the first case-insensitive occurrence
 421       * of $needle in the $haystack string.
 422       *
 423       * @since 2.5.0
 424       *
 425       * @param string $haystack  The string to search through.
 426       * @param string $needle    The string to search for.
 427       * @param integer $offset   Character in $haystack to start searching at.
 428       * @param string $charset   Charset of $needle.
 429       *
 430       * @return integer  The position of first case-insensitive occurrence.
 431       */
 432      public static function ipos(
 433          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 434      )
 435      {
 436          return self::_pos($haystack, $needle, $offset, $charset, 'stripos');
 437      }
 438  
 439      /**
 440       * Returns the numeric position of the last occurrence of $needle
 441       * in the $haystack string.
 442       *
 443       * @param string $haystack  The string to search through.
 444       * @param string $needle    The string to search for.
 445       * @param integer $offset   Character in $haystack to start searching at.
 446       * @param string $charset   Charset of $needle.
 447       *
 448       * @return integer  The position of last occurrence.
 449       */
 450      public static function rpos(
 451          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 452      )
 453      {
 454          return self::_pos($haystack, $needle, $offset, $charset, 'strrpos');
 455      }
 456  
 457      /**
 458       * Returns the numeric position of the last case-insensitive occurrence of
 459       * $needle in the $haystack string.
 460       *
 461       * @since 2.5.0
 462       *
 463       * @param string $haystack  The string to search through.
 464       * @param string $needle    The string to search for.
 465       * @param integer $offset   Character in $haystack to start searching at.
 466       * @param string $charset   Charset of $needle.
 467       *
 468       * @return integer  The position of last case-insensitive occurrence.
 469       */
 470      public static function ripos(
 471          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 472      )
 473      {
 474          return self::_pos($haystack, $needle, $offset, $charset, 'strripos');
 475      }
 476  
 477      /**
 478       * Perform string position searches.
 479       *
 480       * @param string $haystack  The string to search through.
 481       * @param string $needle    The string to search for.
 482       * @param integer $offset   Character in $haystack to start searching at.
 483       * @param string $charset   Charset of $needle.
 484       * @param string $func      Function to use.
 485       *
 486       * @return integer  The position of occurrence.
 487       *
 488       */
 489      protected static function _pos(
 490          $haystack, $needle, $offset, $charset, $func
 491      )
 492      {
 493          if (Horde_Util::extensionExists('mbstring')) {
 494              unset($php_errormsg);
 495              $track_errors = ini_set('track_errors', 1);
 496              $ret = @call_user_func('mb_' . $func, $haystack, $needle, $offset, self::_mbstringCharset($charset));
 497              ini_set('track_errors', $track_errors);
 498              if (!isset($php_errormsg)) {
 499                  return $ret;
 500              }
 501          }
 502  
 503          if (Horde_Util::extensionExists('intl')) {
 504              unset($php_errormsg);
 505              $track_errors = ini_set('track_errors', 1);
 506              $ret = self::convertCharset(
 507                  @call_user_func(
 508                      'grapheme_' . $func,
 509                      self::convertCharset($haystack, $charset, 'UTF-8'),
 510                      self::convertCharset($needle, $charset, 'UTF-8'),
 511                      $offset
 512                  ),
 513                  'UTF-8',
 514                  $charset
 515              );
 516              ini_set('track_errors', $track_errors);
 517              if (!isset($php_errormsg)) {
 518                  return $ret;
 519              }
 520          }
 521  
 522          return $func($haystack, $needle, $offset);
 523      }
 524  
 525      /**
 526       * Returns a string padded to a certain length with another string.
 527       * This method behaves exactly like str_pad() but is multibyte safe.
 528       *
 529       * @param string $input    The string to be padded.
 530       * @param integer $length  The length of the resulting string.
 531       * @param string $pad      The string to pad the input string with. Must
 532       *                         be in the same charset like the input string.
 533       * @param const $type      The padding type. One of STR_PAD_LEFT,
 534       *                         STR_PAD_RIGHT, or STR_PAD_BOTH.
 535       * @param string $charset  The charset of the input and the padding
 536       *                         strings.
 537       *
 538       * @return string  The padded string.
 539       */
 540      public static function pad($input, $length, $pad = ' ',
 541                                 $type = STR_PAD_RIGHT, $charset = 'UTF-8')
 542      {
 543          $mb_length = self::length($input, $charset);
 544          $sb_length = strlen($input);
 545          $pad_length = self::length($pad, $charset);
 546  
 547          /* Return if we already have the length. */
 548          if ($mb_length >= $length) {
 549              return $input;
 550          }
 551  
 552          /* Shortcut for single byte strings. */
 553          if ($mb_length == $sb_length && $pad_length == strlen($pad)) {
 554              return str_pad($input, $length, $pad, $type);
 555          }
 556  
 557          switch ($type) {
 558          case STR_PAD_LEFT:
 559              $left = $length - $mb_length;
 560              $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . $input;
 561              break;
 562  
 563          case STR_PAD_BOTH:
 564              $left = floor(($length - $mb_length) / 2);
 565              $right = ceil(($length - $mb_length) / 2);
 566              $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) .
 567                  $input .
 568                  self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
 569              break;
 570  
 571          case STR_PAD_RIGHT:
 572              $right = $length - $mb_length;
 573              $output = $input . self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
 574              break;
 575          }
 576  
 577          return $output;
 578      }
 579  
 580      /**
 581       * Wraps the text of a message.
 582       *
 583       * @param string $string         String containing the text to wrap.
 584       * @param integer $width         Wrap the string at this number of
 585       *                               characters.
 586       * @param string $break          Character(s) to use when breaking lines.
 587       * @param boolean $cut           Whether to cut inside words if a line
 588       *                               can't be wrapped.
 589       * @param boolean $line_folding  Whether to apply line folding rules per
 590       *                               RFC 822 or similar. The correct break
 591       *                               characters including leading whitespace
 592       *                               have to be specified too.
 593       *
 594       * @return string  String containing the wrapped text.
 595       */
 596      public static function wordwrap($string, $width = 75, $break = "\n",
 597                                      $cut = false, $line_folding = false)
 598      {
 599          $breakRegex = '(?:' . preg_quote($break) . ')';
 600          $rpos = self::rpos($break, "\n");
 601          if ($rpos === false) {
 602              $rpos = 0;
 603          } else {
 604              $rpos++;
 605          }
 606          $wrapped = '';
 607          $hasWrapped = false;
 608  
 609          while (self::length($string, 'UTF-8') > $width) {
 610              $line = self::substr($string, 0, $width + ($hasWrapped ? $rpos : 0), 'UTF-8');
 611              $string = self::substr($string, self::length($line, 'UTF-8'), null, 'UTF-8');
 612  
 613              // Make sure we didn't cut a word, unless we want hard breaks
 614              // anyway.
 615              if (!$cut && preg_match('/^(.+?)((\s|\r?\n).*)/us', $string, $match)) {
 616                  $line .= $match[1];
 617                  $string = $match[2];
 618              }
 619  
 620              // Wrap at existing line breaks.
 621              $regex = '/^(' . ($hasWrapped ? $breakRegex : '') . '.*?)(\r?\n)(.*)$/us';
 622              if (preg_match($regex, $line, $match)) {
 623                  $wrapped .= $match[1] . $match[2];
 624                  $string = $match[3] . $string;
 625                  $hasWrapped = false;
 626                  continue;
 627              }
 628  
 629              // Wrap at the last colon or semicolon followed by a whitespace if
 630              // doing line folding.
 631              if ($line_folding &&
 632                  preg_match('/^(.*?)(;|:)(\s+.*)$/us', $line, $match)) {
 633                  $wrapped .= $match[1] . $match[2];
 634                  $string = $break . $match[3] . $string;
 635                  $hasWrapped = true;
 636                  continue;
 637              }
 638  
 639              // Wrap at the last whitespace of $line.
 640              $sub = $line_folding
 641                  ? '(' . ($hasWrapped ? $breakRegex : '') . '.+[^\s])'
 642                  : '(' . ($hasWrapped ? $breakRegex : '') . '.*)';
 643  
 644              if (preg_match('/^' . $sub . '(\s+)(.*)$/u', $line, $match)) {
 645                  $wrapped .= $match[1];
 646                  $string = $break . ($line_folding ? $match[2] : '')
 647                      . $match[3] . $string;
 648                  $hasWrapped = true;
 649                  continue;
 650              }
 651  
 652              // Hard wrap if necessary.
 653              if ($cut) {
 654                  $wrapped .= $line;
 655                  $string = $break . $string;
 656                  $hasWrapped = true;
 657                  continue;
 658              }
 659  
 660              $wrapped .= $line;
 661              $hasWrapped = false;
 662          }
 663  
 664          return $wrapped . $string;
 665      }
 666  
 667      /**
 668       * Wraps the text of a message.
 669       *
 670       * @param string $text        String containing the text to wrap.
 671       * @param integer $length     Wrap $text at this number of characters.
 672       * @param string $break_char  Character(s) to use when breaking lines.
 673       * @param boolean $quote      Ignore lines that are wrapped with the '>'
 674       *                            character (RFC 2646)? If true, we don't
 675       *                            remove any padding whitespace at the end of
 676       *                            the string.
 677       *
 678       * @return string  String containing the wrapped text.
 679       */
 680      public static function wrap($text, $length = 80, $break_char = "\n",
 681                                  $quote = false)
 682      {
 683          $paragraphs = array();
 684  
 685          foreach (preg_split('/\r?\n/', $text) as $input) {
 686              if ($quote && (strpos($input, '>') === 0)) {
 687                  $line = $input;
 688              } else {
 689                  /* We need to handle the Usenet-style signature line
 690                   * separately; since the space after the two dashes is
 691                   * REQUIRED, we don't want to trim the line. */
 692                  if ($input != '-- ') {
 693                      $input = rtrim($input);
 694                  }
 695                  $line = self::wordwrap($input, $length, $break_char);
 696              }
 697  
 698              $paragraphs[] = $line;
 699          }
 700  
 701          return implode($break_char, $paragraphs);
 702      }
 703  
 704      /**
 705       * Return a truncated string, suitable for notifications.
 706       *
 707       * @param string $text     The original string.
 708       * @param integer $length  The maximum length.
 709       *
 710       * @return string  The truncated string, if longer than $length.
 711       */
 712      public static function truncate($text, $length = 100)
 713      {
 714          return (self::length($text) > $length)
 715              ? rtrim(self::substr($text, 0, $length - 3)) . '...'
 716              : $text;
 717      }
 718  
 719      /**
 720       * Return an abbreviated string, with characters in the middle of the
 721       * excessively long string replaced by '...'.
 722       *
 723       * @param string $text     The original string.
 724       * @param integer $length  The length at which to abbreviate.
 725       *
 726       * @return string  The abbreviated string, if longer than $length.
 727       */
 728      public static function abbreviate($text, $length = 20)
 729      {
 730          return (self::length($text) > $length)
 731              ? rtrim(self::substr($text, 0, round(($length - 3) / 2))) . '...' . ltrim(self::substr($text, (($length - 3) / 2) * -1))
 732              : $text;
 733      }
 734  
 735      /**
 736       * Returns the common leading part of two strings.
 737       *
 738       * @param string $str1  A string.
 739       * @param string $str2  Another string.
 740       *
 741       * @return string  The start of $str1 and $str2 that is identical in both.
 742       */
 743      public static function common($str1, $str2)
 744      {
 745          for ($result = '', $i = 0;
 746               isset($str1[$i]) && isset($str2[$i]) && $str1[$i] == $str2[$i];
 747               $i++) {
 748              $result .= $str1[$i];
 749          }
 750          return $result;
 751      }
 752  
 753      /**
 754       * Returns true if the every character in the parameter is an alphabetic
 755       * character.
 756       *
 757       * @param string $string   The string to test.
 758       * @param string $charset  The charset to use when testing the string.
 759       *
 760       * @return boolean  True if the parameter was alphabetic only.
 761       */
 762      public static function isAlpha($string, $charset)
 763      {
 764          if (!Horde_Util::extensionExists('mbstring')) {
 765              return ctype_alpha($string);
 766          }
 767  
 768          $charset = self::_mbstringCharset($charset);
 769          $old_charset = mb_regex_encoding();
 770  
 771          if ($charset != $old_charset) {
 772              @mb_regex_encoding($charset);
 773          }
 774          $alpha = !@mb_ereg_match('[^[:alpha:]]', $string);
 775          if ($charset != $old_charset) {
 776              @mb_regex_encoding($old_charset);
 777          }
 778  
 779          return $alpha;
 780      }
 781  
 782      /**
 783       * Returns true if ever character in the parameter is a lowercase letter in
 784       * the current locale.
 785       *
 786       * @param string $string   The string to test.
 787       * @param string $charset  The charset to use when testing the string.
 788       *
 789       * @return boolean  True if the parameter was lowercase.
 790       */
 791      public static function isLower($string, $charset)
 792      {
 793          return ((self::lower($string, true, $charset) === $string) &&
 794                  self::isAlpha($string, $charset));
 795      }
 796  
 797      /**
 798       * Returns true if every character in the parameter is an uppercase letter
 799       * in the current locale.
 800       *
 801       * @param string $string   The string to test.
 802       * @param string $charset  The charset to use when testing the string.
 803       *
 804       * @return boolean  True if the parameter was uppercase.
 805       */
 806      public static function isUpper($string, $charset)
 807      {
 808          return ((self::upper($string, true, $charset) === $string) &&
 809                  self::isAlpha($string, $charset));
 810      }
 811  
 812      /**
 813       * Performs a multibyte safe regex match search on the text provided.
 814       *
 815       * @param string $text     The text to search.
 816       * @param array $regex     The regular expressions to use, without perl
 817       *                         regex delimiters (e.g. '/' or '|').
 818       * @param string $charset  The character set of the text.
 819       *
 820       * @return array  The matches array from the first regex that matches.
 821       */
 822      public static function regexMatch($text, $regex, $charset = null)
 823      {
 824          if (!empty($charset)) {
 825              $regex = self::convertCharset($regex, $charset, 'utf-8');
 826              $text = self::convertCharset($text, $charset, 'utf-8');
 827          }
 828  
 829          $matches = array();
 830          foreach ($regex as $val) {
 831              if (preg_match('/' . $val . '/u', $text, $matches)) {
 832                  break;
 833              }
 834          }
 835  
 836          if (!empty($charset)) {
 837              $matches = self::convertCharset($matches, 'utf-8', $charset);
 838          }
 839  
 840          return $matches;
 841      }
 842  
 843      /**
 844       * Check to see if a string is valid UTF-8.
 845       *
 846       * @param string $text  The text to check.
 847       *
 848       * @return boolean  True if valid UTF-8.
 849       */
 850      public static function validUtf8($text)
 851      {
 852          $text = strval($text);
 853  
 854          // First check for illegal surrogate pair sequences. See RFC 3629.
 855          if (preg_match('/\xE0[\x80-\x9F][\x80-\xBF]|\xED[\xA0-\xBF][\x80-\xBF]/S', $text)) {
 856              return false;
 857          }
 858  
 859          for ($i = 0, $len = strlen($text); $i < $len; ++$i) {
 860              $c = ord($text[$i]);
 861              if ($c > 128) {
 862                  if ($c > 247) {
 863                      // STD 63 (RFC 3629) eliminates 5 & 6-byte characters.
 864                      return false;
 865                  } elseif ($c > 239) {
 866                      $j = 3;
 867                  } elseif ($c > 223) {
 868                      $j = 2;
 869                  } elseif ($c > 191) {
 870                      $j = 1;
 871                  } else {
 872                      return false;
 873                  }
 874  
 875                  if (($i + $j) > $len) {
 876                      return false;
 877                  }
 878  
 879                  do {
 880                      $c = ord($text[++$i]);
 881                      if (($c < 128) || ($c > 191)) {
 882                          return false;
 883                      }
 884                  } while (--$j);
 885              }
 886          }
 887  
 888          return true;
 889      }
 890  
 891      /**
 892       * Workaround charsets that don't work with mbstring functions.
 893       *
 894       * @param string $charset  The original charset.
 895       *
 896       * @return string  The charset to use with mbstring functions.
 897       */
 898      protected static function _mbstringCharset($charset)
 899      {
 900          /* mbstring functions do not handle the 'ks_c_5601-1987' &
 901           * 'ks_c_5601-1989' charsets. However, these charsets are used, for
 902           * example, by various versions of Outlook to send Korean characters.
 903           * Use UHC (CP949) encoding instead. See, e.g.,
 904           * http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0030.html */
 905          return in_array(self::lower($charset), array('ks_c_5601-1987', 'ks_c_5601-1989'))
 906              ? 'UHC'
 907              : $charset;
 908      }
 909  
 910      /**
 911       * Strip UTF-8 byte order mark (BOM) from string data.
 912       *
 913       * @param string $str  Input string (UTF-8).
 914       *
 915       * @return string  Stripped string (UTF-8).
 916       */
 917      public static function trimUtf8Bom($str)
 918      {
 919          return (substr($str, 0, 3) == pack('CCC', 239, 187, 191))
 920              ? substr($str, 3)
 921              : $str;
 922      }
 923  
 924  }
PHP Cross Reference of MyBB 1.8.39

/inc/3rdparty/diff/Diff/ -> String.php (source)