%PDF- %PDF-
Direktori : /home1/lightco1/public_html/lightingrepublic.com.au/libraries/koowa/helper/ |
Current File : //home1/lightco1/public_html/lightingrepublic.com.au/libraries/koowa/helper/string.php |
<?php /** * @version $Id$ * @package Koowa_Helper * @copyright Copyright (C) 2007 - 2012 Johan Janssens. All rights reserved. * @license GNU GPLv3 <http://www.gnu.org/licenses/gpl.html> * @link http://www.nooku.org */ /** * PHP mbstring and iconv local configuration */ // check if mbstring extension is loaded and attempt to load it if not present except for windows if (extension_loaded('mbstring') || ((!strtoupper(substr(PHP_OS, 0, 3)) === 'WIN' && dl('mbstring.so')))) { //Make sure to surpress the output in case ini_set is disabled @ini_set('mbstring.internal_encoding', 'UTF-8'); @ini_set('mbstring.http_input', 'UTF-8'); @ini_set('mbstring.http_output', 'UTF-8'); } // check if iconv extension is loaded and attempt to load it if not present except for windows if (function_exists('iconv') || ((!strtoupper(substr(PHP_OS, 0, 3)) === 'WIN' && dl('iconv.so')))) { // these are settings that can be set inside code iconv_set_encoding("internal_encoding", "UTF-8"); iconv_set_encoding("input_encoding", "UTF-8"); iconv_set_encoding("output_encoding", "UTF-8"); } /** * String helper class for utf-8 data * * All functions assume the validity of utf-8 strings. * * @author Johan Janssens <johan@nooku.org> * @category Koowa * @package Koowa_Helper * @subpackage String * @static */ class KHelperString { /** * UTF-8 aware alternative to strpos * * Find position of first occurrence of a string * * @param $str - string String being examined * @param $search - string String being searced for * @param $offset - int Optional, specifies the position from which the search should be performed * @return mixed Number of characters before the first match or FALSE on failure * @see http://www.php.net/strpos */ public static function strpos($str, $search, $offset = FALSE) { if(strlen($str) && strlen($search)) { if ( $offset === FALSE ) { return mb_strpos($str, $search); } else { return mb_strpos($str, $search, $offset); } } else return FALSE; } /** * UTF-8 aware alternative to strrpos * * Finds position of last occurrence of a string * * @param $str - string String being examined * @param $search - string String being searced for * @return mixed Number of characters before the last match or FALSE on failure * @see http://www.php.net/strrpos */ public static function strrpos($str, $search) { if ( $offset === FALSE ) { # Emulate behaviour of strrpos rather than raising warning if ( empty($str) ) { return FALSE; } return mb_strrpos($str, $search); } else { if ( !is_int($offset) ) { trigger_error('utf8_strrpos expects parameter 3 to be long',E_USER_WARNING); return FALSE; } $str = mb_substr($str, $offset); if ( FALSE !== ( $pos = mb_strrpos($str, $search) ) ) { return $pos + $offset; } return FALSE; } } /** * UTF-8 aware alternative to substr * * Return part of a string given character offset (and optionally length) * * @param string * @param integer number of UTF-8 characters offset (from left) * @param integer (optional) length in UTF-8 characters from offset * @return mixed string or FALSE if failure * @see http://www.php.net/substr */ public static function substr($str, $offset, $length = FALSE) { if ( $length === FALSE ) { return mb_substr($str, $offset); } else { return mb_substr($str, $offset, $length); } } /** * UTF-8 aware alternative to strtlower * * Make a string lowercase * * Note: The concept of a characters "case" only exists is some alphabets * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does * not exist in the Chinese alphabet, for example. See Unicode Standard * Annex #21: Case Mappings * * @param string * @return mixed either string in lowercase or FALSE is UTF-8 invalid * @see http://www.php.net/strtolower */ public static function strtolower($str) { return mb_strtolower($str); } /** * UTF-8 aware alternative to strtoupper * * Make a string uppercase * * Note: The concept of a characters "case" only exists is some alphabets * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does * not exist in the Chinese alphabet, for example. See Unicode Standard * Annex #21: Case Mappings * * @param string * @return mixed either string in uppercase or FALSE is UTF-8 invalid * @see http://www.php.net/strtoupper */ public static function strtoupper($str) { return mb_strtoupper($str); } /** * UTF-8 aware alternative to strlen * * Returns the number of characters in the string (NOT THE NUMBER OF BYTES), * * @param string UTF-8 string * @return int number of UTF-8 characters in string * @see http://www.php.net/strlen */ public static function strlen($str) { return mb_strlen($str); } /** * UTF-8 aware alternative to str_ireplace * * Case-insensitive version of str_replace * * @param string string to search * @param string existing string to replace * @param string new string to replace with * @param int optional count value to be passed by referene * @see http://www.php.net/str_ireplace */ public static function str_ireplace($search, $replace, $str, $count = NULL) { if ( !is_array($search) ) { $slen = strlen($search); $lendif = strlen($replace) - $slen; if ( $slen == 0 ) { return $str; } $search = KHelperString::strtolower($search); $search = preg_quote($search, '/'); $lstr = KHelperString::strtolower($str); $i = 0; $matched = 0; while ( preg_match('/(.*)'.$search.'/Us',$lstr, $matches) ) { if ( $i === $count ) { break; } $mlen = strlen($matches[0]); $lstr = substr($lstr, $mlen); $str = substr_replace($str, $replace, $matched+strlen($matches[1]), $slen); $matched += $mlen + $lendif; $i++; } return $str; } else { foreach ( array_keys($search) as $k ) { if ( is_array($replace) ) { if ( array_key_exists($k,$replace) ) { $str = KHelperString::str_ireplace($search[$k], $replace[$k], $str, $count); } else { $str = KHelperString::str_ireplace($search[$k], '', $str, $count); } } else { $str = KHelperString::str_ireplace($search[$k], $replace, $str, $count); } } return $str; } } /** * UTF-8 aware alternative to str_split * * Convert a string to an array * * @param string UTF-8 encoded * @param int number to characters to split string by * @return array * @see http://www.php.net/str_split */ public static function str_split($str, $split_len = 1) { if ( !preg_match('/^[0-9]+$/',$split_len) || $split_len < 1 ) { return FALSE; } $len = KHelperString::strlen($str); if ( $len <= $split_len ) { return array($str); } preg_match_all('/.{'.$split_len.'}|[^\x00]{1,'.$split_len.'}$/us', $str, $ar); return $ar[0]; } /** * UTF-8 aware alternative to strcasecmp * * A case insensivite string comparison * * @param string string 1 to compare * @param string string 2 to compare * @return int < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal. * @see http://www.php.net/strcasecmp */ public static function strcasecmp($str1, $str2) { $strX = KHelperString::strtolower($strX); $strY = KHelperString::strtolower($strY); return strcmp($strX, $strY); } /** * UTF-8 aware alternative to strcspn * Find length of initial segment not matching mask * * @param string * @param string the mask * @param int Optional starting character position (in characters) * @param int Optional length * @return int the length of the initial segment of str1 which does not contain any of the characters in str2 * @see http://www.php.net/strcspn */ public static function strcspn($str, $mask, $start = NULL, $length = NULL) { if ( empty($mask) || strlen($mask) == 0 ) { return NULL; } $mask = preg_replace('!([\\\\\\-\\]\\[/^])!','\\\${1}',$mask); if ( $start !== NULL || $length !== NULL ) { $str = KHelperString::substr($str, $start, $length); } preg_match('/^[^'.$mask.']+/u',$str, $matches); if ( isset($matches[0]) ) { return utf8_strlen($matches[0]); } return 0; } /** * UTF-8 aware alternative to stristr * * Returns all of haystack from the first occurrence of needle to the end. * needle and haystack are examined in a case-insensitive manner * Find first occurrence of a string using case insensitive comparison * * @param string the haystack * @param string the needle * @return string the sub string * @see http://www.php.net/stristr */ public static function stristr($str, $search) { if ( strlen($search) == 0 ) { return $str; } $lstr = KHelperString::strtolower($str); $lsearch = KHelperString::strtolower($search); preg_match('|^(.*)'.preg_quote($lsearch).'|Us',$lstr, $matches); if ( count($matches) == 2 ) { return substr($str, strlen($matches[1])); } return FALSE; } /** * UTF-8 aware alternative to strrev * * Reverse a string * * @param string String to be reversed * @return string The string in reverse character order * @see http://www.php.net/strrev */ public static function strrev($str) { preg_match_all('/./us', $str, $ar); return join('',array_reverse($ar[0])); } /** * UTF-8 aware alternative to strspn * * Find length of initial segment matching mask * * @param string the haystack * @param string the mask * @param int start optional * @param int length optional * @see http://www.php.net/strspn */ public static function strspn($str, $mask, $start = NULL, $length = NULL) { $mask = preg_replace('!([\\\\\\-\\]\\[/^])!','\\\${1}',$mask); if ( $start !== NULL || $length !== NULL ) { $str = KHelperString::substr($str, $start, $length); } preg_match('/^['.$mask.']+/u',$str, $matches); if ( isset($matches[0]) ) { return KHelperString::strlen($matches[0]); } return 0; } /** * UTF-8 aware substr_replace * * Replace text within a portion of a string * * @param string the haystack * @param string the replacement string * @param int start * @param int length (optional) * @see http://www.php.net/substr_replace */ public static function substr_replace($str, $repl, $start, $length = NULL ) { preg_match_all('/./us', $str, $ar); preg_match_all('/./us', $repl, $rar); if( $length === NULL ) { $length = KHelperString::strlen($str); } array_splice( $ar[0], $start, $length, $rar[0] ); return join('',$ar[0]); } /** * UTF-8 aware replacement for ltrim() * * Strip whitespace (or other characters) from the beginning of a string * Note: you only need to use this if you are supplying the charlist * optional arg and it contains UTF-8 characters. Otherwise ltrim will * work normally on a UTF-8 string * * @param string the string to be trimmed * @param string the optional charlist of additional characters to trim * @return string the trimmed string * @see http://www.php.net/ltrim */ public static function ltrim( $str, $charlist = FALSE ) { if($charlist === FALSE) return ltrim($str); //quote charlist for use in a characterclass $charlist = preg_replace('!([\\\\\\-\\]\\[/^])!','\\\${1}',$charlist); return preg_replace('/^['.$charlist.']+/u','',$str); } /** * UTF-8 aware replacement for rtrim() * * Strip whitespace (or other characters) from the end of a string * Note: you only need to use this if you are supplying the charlist * optional arg and it contains UTF-8 characters. Otherwise rtrim will * work normally on a UTF-8 string * * @param string the string to be trimmed * @param string the optional charlist of additional characters to trim * @return string the trimmed string * @see http://www.php.net/rtrim */ public static function rtrim( $str, $charlist = FALSE ) { if($charlist === FALSE) { return rtrim($str); } //quote charlist for use in a characterclass $charlist = preg_replace('!([\\\\\\-\\]\\[/^])!','\\\${1}',$charlist); return preg_replace('/['.$charlist.']+$/u','',$str); } /** * UTF-8 aware replacement for trim() * * Strip whitespace (or other characters) from the beginning and end of a string * Note: you only need to use this if you are supplying the charlist * optional arg and it contains UTF-8 characters. Otherwise trim will * work normally on a UTF-8 string * * @param string the string to be trimmed * @param string the optional charlist of additional characters to trim * @return string the trimmed string * @see http://www.php.net/trim */ public static function trim( $str, $charlist = FALSE ) { if($charlist === FALSE) { return trim($str); } return KHelperString::ltrim(utf8_rtrim($str, $charlist), $charlist); } /** * UTF-8 aware alternative to ucfirst * * Make a string's first character uppercase * * @param string * @return string with first character as upper case (if applicable) * @see http://www.php.net/ucfirst */ public static function ucfirst($str) { switch ( KHelperString::strlen($str) ) { case 0: return ''; break; case 1: return KHelperString::strtoupper($str); break; default: preg_match('/^(.{1})(.*)$/us', $str, $matches); return KHelperString::strtoupper($matches[1]).$matches[2]; break; } } /** * UTF-8 aware alternative to ucwords * * Uppercase the first character of each word in a string * * @param string * @return string with first char of each word uppercase * @see http://www.php.net/ucwords */ public static function ucwords($str) { // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; return preg_replace_callback($pattern, 'KHelperString::ucwords_callback',$str); } /** * Callback function for preg_replace_callback call in utf8_ucwords * * You don't need to call this yourself * * @param array of matches corresponding to a single word * @return string with first char of the word in uppercase * @see ucwords * @see strtoupper */ public static function ucwords_callback($matches) { $leadingws = $matches[2]; $ucfirst = KHelperString::strtoupper($matches[3]); $ucword = KHelperString::substr_replace(ltrim($matches[0]),$ucfirst,0,1); return $leadingws . $ucword; } /** * Transcode a string. * * @param string $source The string to transcode. * @param string $from_encoding The source encoding. * @param string $to_encoding The target encoding. * @return string Transcoded string */ public static function transcode($source, $from_encoding, $to_encoding) { if (is_string($source)) { /* * "//TRANSLIT" is appendd to the $to_encoding to ensure that when iconv comes * across a character that cannot be represented in the target charset, it can * be approximated through one or several similarly looking characters. */ return iconv($from_encoding, $to_encoding.'//TRANSLIT', $source); } } /** * Tests a string as to whether it's valid UTF-8 and supported by the Unicode standard * * Note: this function has been modified to simple return true or false * * @author <hsivonen@iki.fi> * @param string UTF-8 encoded string * @return boolean true if valid * @see http://hsivonen.iki.fi/php-utf8/ * @see compliant */ public static function valid($str) { $mState = 0; // cached expected number of octets after the current octet // until the beginning of the next UTF8 character sequence $mUcs4 = 0; // cached Unicode character $mBytes = 1; // cached expected number of octets in the current sequence $len = strlen($str); for($i = 0; $i < $len; $i++) { $in = ord($str{$i}); if ( $mState == 0) { // When mState is zero we expect either a US-ASCII character or a // multi-octet sequence. if (0 == (0x80 & ($in))) { // US-ASCII, pass straight through. $mBytes = 1; } else if (0xC0 == (0xE0 & ($in))) { // First octet of 2 octet sequence $mUcs4 = ($in); $mUcs4 = ($mUcs4 & 0x1F) << 6; $mState = 1; $mBytes = 2; } else if (0xE0 == (0xF0 & ($in))) { // First octet of 3 octet sequence $mUcs4 = ($in); $mUcs4 = ($mUcs4 & 0x0F) << 12; $mState = 2; $mBytes = 3; } else if (0xF0 == (0xF8 & ($in))) { // First octet of 4 octet sequence $mUcs4 = ($in); $mUcs4 = ($mUcs4 & 0x07) << 18; $mState = 3; $mBytes = 4; } else if (0xF8 == (0xFC & ($in))) { /* First octet of 5 octet sequence. * * This is illegal because the encoded codepoint must be either * (a) not the shortest form or * (b) outside the Unicode range of 0-0x10FFFF. * Rather than trying to resynchronize, we will carry on until the end * of the sequence and let the later error handling code catch it. */ $mUcs4 = ($in); $mUcs4 = ($mUcs4 & 0x03) << 24; $mState = 4; $mBytes = 5; } else if (0xFC == (0xFE & ($in))) { // First octet of 6 octet sequence, see comments for 5 octet sequence. $mUcs4 = ($in); $mUcs4 = ($mUcs4 & 1) << 30; $mState = 5; $mBytes = 6; } else { /* Current octet is neither in the US-ASCII range nor a legal first * octet of a multi-octet sequence. */ return FALSE; } } else { // When mState is non-zero, we expect a continuation of the multi-octet // sequence if (0x80 == (0xC0 & ($in))) { // Legal continuation. $shift = ($mState - 1) * 6; $tmp = $in; $tmp = ($tmp & 0x0000003F) << $shift; $mUcs4 |= $tmp; /** * End of the multi-octet sequence. mUcs4 now contains the final * Unicode codepoint to be output */ if (0 == --$mState) { /* * Check for illegal sequences and codepoints. */ // From Unicode 3.1, non-shortest form is illegal if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || ((3 == $mBytes) && ($mUcs4 < 0x0800)) || ((4 == $mBytes) && ($mUcs4 < 0x10000)) || (4 < $mBytes) || // From Unicode 3.2, surrogate characters are illegal (($mUcs4 & 0xFFFFF800) == 0xD800) || // Codepoints outside the Unicode range are illegal ($mUcs4 > 0x10FFFF)) { return FALSE; } //initialize UTF8 cache $mState = 0; $mUcs4 = 0; $mBytes = 1; } } else { /** *((0xC0 & (*in) != 0x80) && (mState != 0)) * Incomplete multi-octet sequence. */ return FALSE; } } } return TRUE; } /** * Tests whether a string complies as UTF-8. This will be much * faster than utf8_is_valid but will pass five and six octet * UTF-8 sequences, which are not supported by Unicode and * so cannot be displayed correctly in a browser. In other words * it is not as strict as utf8_is_valid but it's faster. If you use * is to validate user input, you place yourself at the risk that * attackers will be able to inject 5 and 6 byte sequences (which * may or may not be a significant risk, depending on what you are * are doing) * * @see valid * @see http://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805 * @param string UTF-8 string to check * @return boolean TRUE if string is valid UTF-8 */ public static function compliant($str) { if ( strlen($str) == 0 ) { return TRUE; } // If even just the first character can be matched, when the /u // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow // invalid, nothing at all will match, even if the string contains // some valid sequences return (preg_match('/^.{1}/us',$str,$ar) == 1); } }