这个类相当好用.作用么,PHP做JSON传递GBK字符,比如中文,日文,韩文神马的Unicode最合适不过了..
<?php class coding { //模仿JAVASCRIPT的ESCAPE和UNESCAPE函数的功能 function unescape($str) { $text = preg_replace_callback("/%u[0-9A-Za-z]{4}/", array( &$this, 'toUtf8' ), $str); return mb_convert_encoding($text, "gb2312", "utf-8"); } function toUtf8($ar) { foreach ($ar as $val) { $val = intval(substr($val, 2), 16); if ($val < 0x7F) { // 0000-007F $c .= chr($val); } elseif ($val < 0x800) { // 0080-0800 $c .= chr(0xC0 | ($val / 64)); $c .= chr(0x80 | ($val % 64)); } else { // 0800-FFFF $c .= chr(0xE0 | (($val / 64) / 64)); $c .= chr(0x80 | (($val / 64) % 64)); $c .= chr(0x80 | ($val % 64)); } } return $c; } function escape($string, $encoding = 'gb2312') { $return = ''; for ($x = 0; $x < mb_strlen($string, $encoding); $x++) { $str = mb_substr($string, $x, 1, $encoding); if (strlen($str) > 1) { // 多字节字符 $return .= '%u' . strtoupper(bin2hex(mb_convert_encoding($str, 'UCS-2', $encoding))); } else { $return .= '%' . strtoupper(bin2hex($str)); } } return $return; } function gb2utf8($string, $encoding = 'utf-8', $from_encode = 'gb2312') { return mb_convert_encoding($string, $encoding, $from_encode); } } ?> |
google code 上找到的另外一个类似脚本
<?php function phpescape($str) { $sublen=strlen($str); $retrunString=""; for ($i=0;$i<$sublen;$i++) { if(ord($str[$i])>=127) { $tmpString=bin2hex(iconv("gbk", "ucs-2", substr($str,$i,2))); $tmpString=substr($tmpString,2,2).substr($tmpString,0,2); $retrunString.="%u".$tmpString; $i++; } else { $retrunString.="%".dechex(ord($str[$i])); } } return $retrunString; } function escape($str) { preg_match_all("/[\x80-\xff].|[\x01-\x7f]+/",$str,$r); $ar = $r[0]; foreach($ar as $k=>$v) { if(ord($v[0]) < 128) $ar[$k] = rawurlencode($v); else $ar[$k] = "%u".bin2hex(iconv("UTF-8","UCS-2",$v)); } return join("",$ar); } function phpunescape ($source) { $decodedStr = ""; $pos = 0; $len = strlen ($source); while ($pos < $len) { $charAt = substr ($source, $pos, 1); if ($charAt == '%') { $pos++; $charAt = substr ($source, $pos, 1); if ($charAt == 'u') { // we got a unicode character $pos++; $unicodeHexVal = substr ($source, $pos, 4); $unicode = hexdec ($unicodeHexVal); $entity = "&#". $unicode . ';'; $decodedStr .= utf8_encode ($entity); $pos += 4; }else{ // we have an escaped ascii character $hexVal = substr ($source, $pos, 2); $decodedStr .= chr (hexdec ($hexVal)); $pos += 2; } }else{ $decodedStr .= $charAt; $pos++; } } return $decodedStr; } function unescape($str) { $str = rawurldecode($str); preg_match_all("/(?:%u.{4})|&#x.{4};|&#\d+;|.+/U", $str, $r); $ar = $r[0]; #print_r($ar); foreach($ar as $k=>$v) { if(substr($v,0,2) == "%u") $ar[$k] = iconv("UCS-2", "UTF-8", pack("H4",substr($v,-4))); elseif(substr($v,0,3) == "&#x") $ar[$k] = iconv("UCS-2", "UTF-8", pack("H4",substr($v,3,-1))); elseif(substr($v,0,2) == "&#") { //echo substr($v,2,-1).""; $ar[$k] = iconv("UCS-2", "UTF-8", pack("n",substr($v,2,-1))); } } return join("",$ar); } ?> |