一.背景
二.代码部分
/**
* @desc UTF-16转为UTF-8编码, 必须带有BOM文件头的才可以转,
* UTF-16LE BOM文件头: [0xFF, 0xFE],
* UTF-16BE BOM文件头: [0xFE, 0xFF],
* UTF-8 BOM文件头: [0xEF, 0xBB, 0xBF]
* @param $str 这里的$str是用file_get_contents获取到的文件内容
* @return string
*/
public static function utf16_to_utf8($str)
{
$c0 = ord($str[0]);
$c1 = ord($str[1]);
$c2 = ord($str[2]);
if ($c0 == 0xFE && $c1 == 0xFF) {
// -- UTF-16BE BOM文件头: [0xFE, 0xFF],
$be = true;
} else if ($c0 == 0xFF && $c1 == 0xFE) {
// -- UTF-16LE BOM文件头: [0xFF, 0xFE],
$be = false;
} else if ($c0 == 0xEF && $c1 == 0xBB && $c2 == 0xBF) {
// -- UTF-8 BOM文件头: [0xEF, 0xBB, 0xBF]
$str = substr($str, 3);
return $str;
} else {
return $str;
}
$str = substr($str, 2);
$len = strlen($str);
$dec = '';
for ($i = 0; $i < $len; $i += 2) {
$c = ($be) ? ord($str[$i]) << 8 | ord($str[$i + 1]) :
ord($str[$i + 1]) << 8 | ord($str[$i]);
if ($c >= 0x0001 && $c <= 0x007F) {
$dec .= chr($c);
} else if ($c > 0x07FF) {
$dec .= chr(0xE0 | (($c >> 12) & 0x0F));
$dec .= chr(0x80 | (($c >> 6) & 0x3F));
$dec .= chr(0x80 | (($c >> 0) & 0x3F));
} else {
$dec .= chr(0xC0 | (($c >> 6) & 0x1F));
$dec .= chr(0x80 | (($c >> 0) & 0x3F));
}
}
return $dec;
}