If you need to convert from UTF8/Unicode Vietnamese with intonation/ascent signs like acute, grave, circumflex, tilde, dot below, hook above, and more to just plain old ASCII without any extras, or to VIRQ format. Here is a PHP class that performs the conversion. Some examples:
an toàn => an toan áo giáp => ao giap xúc phạm => xuc pham
Source Code:
/** from file vumaps.js in package vietuni8 */
function initMapping() {
$this->mMap[‘Unicode’] = array(
97,226,259,101,234,105,111,244,417,117,432,121,
65,194,258,69,202,73,79,212,416,85,431,89,
225,7845,7855,233,7871,237,243,7889,7899,250,7913,253,
193,7844,7854,201,7870,205,211,7888,7898,218,7912,221,
224,7847,7857,232,7873,236,242,7891,7901,249,7915,7923,
192,7846,7856,200,7872,204,210,7890,7900,217,7914,7922,
7841,7853,7863,7865,7879,7883,7885,7897,7907,7909,7921,7925,
7840,7852,7862,7864,7878,7882,7884,7896,7906,7908,7920,7924,
7843,7849,7859,7867,7875,7881,7887,7893,7903,7911,7917,7927,
7842,7848,7858,7866,7874,7880,7886,7892,7902,7910,7916,7926,
227,7851,7861,7869,7877,297,245,7895,7905,361,7919,7929,
195,7850,7860,7868,7876,296,213,7894,7904,360,7918,7928,
100,273,68,272
);
$this->mMap[‘ASCII’] = array(
‘a’,’a’,’a’,’e’,’e’,’i’,’o’,’o’,’o’,’u’,’u’,’y’,
‘A’,’A’,’A’,’E’,’E’,’I’,’O’,’O’,’O’,’U’,’U’,’Y’,
‘a’,’a’,’a’,’e’,’e’,’i’,’o’,’o’,’o’,’u’,’u’,’y’,
‘A’,’A’,’A’,’E’,’E’,’I’,’O’,’O’,’O’,’U’,’U’,’Y’,
‘a’,’a’,’a’,’e’,’e’,’i’,’o’,’o’,’o’,’u’,’u’,’y’,
‘A’,’A’,’A’,’E’,’E’,’I’,’O’,’O’,’O’,’U’,’U’,’Y’,
‘a’,’a’,’a’,’e’,’e’,’i’,’o’,’o’,’o’,’u’,’u’,’y’,
‘A’,’A’,’A’,’E’,’E’,’I’,’O’,’O’,’O’,’U’,’U’,’Y’,
‘a’,’a’,’a’,’e’,’e’,’i’,’o’,’o’,’o’,’u’,’u’,’y’,
‘A’,’A’,’A’,’E’,’E’,’I’,’O’,’O’,’O’,’U’,’U’,’Y’,
‘a’,’a’,’a’,’e’,’e’,’i’,’o’,’o’,’o’,’u’,’u’,’y’,
‘A’,’A’,’A’,’E’,’E’,’I’,’O’,’O’,’O’,’U’,’U’,’Y’,
‘d’,’d’,’D’,’D’
);
$this->mMap[‘VIRQ’] = array(
“a”,”a^”,”a(“,”e”,”e^”,”i”,”o”,”o^”,”o+”,”u”,”u+”,”y”,
“A”,”A^”,”A(“,”E”,”E^”,”I”,”O”,”O^”,”O+”,”U”,”U+”, “Y”,
“a’”,”a^’”,”a(‘”,”e’”,”e^’”,”i’”,”o’”,”o^’”,”o+’”,”u’”,”u+’”,”y’”,
“A’”,”A^’”,”A(‘”,”E’”,”E^’”,”I’”,”O’”,”O^’”,”O+’”,”U’”,”U+’”,”Y’”,
“a`”,”a^`”,”a(`”,”e`”,”e^`”,”i`”,”o`”,”o^`”,”o+`”,”u`”,”u+`”,”y`”,
“A`”,”A^`”,”A(`”,”E`”,”E^`”,”I`”,”O`”,”O^`”,”O+`”,”U`”,”U+`”,”Y`”,
“a.”,”a^.”,”a(.”,”e.”,”e^.”,”i.”,”o.”,”o^.”,”o+.”,”u.”,”u+.”,”y.”,
“A.”,”A^.”,”A(.”,”E.”,”E^.”,”I.”,”O.”,”O^.”,”O+.”,”U.”,”U+.”,”Y.”,
“a?”,”a^?”,”a(?”,”e?”,”e^?”,”i?”,”o?”,”o^?”,”o+?”,”u?”,”u+?”,”y?”,
“A?”,”A^?”,”A(?”,”E?”,”E^?”,”I?”,”O?”,”O^?”,”O+?”,”U?”,”U+?”,”Y?”,
“a~”,”a^~”,”a(~”,”e~”,”e^~”,”i~”,”o~”,”o^~”,”o+~”,”u~”,”u+~”,”y~”,
“A~”,”A^~”,”A(~”,”E~”,”E^~”,”I~”,”O~”,”O^~”,”O+~”,”U~”,”U+~”,”Y~”,
“d”,”dd”,”D”,”DD”
);
}
/** check if it’s in order */
private function between($pStart,$pVar,$pEnd) {
return ($pVar>=$pStart && $pVar<=$pEnd);
}
/** map from one charset to another */
function map($pFrom,$pTo) {
$vStr = $this->mTxt;
$vLen = strlen($this->mTxt);
$vOutput = ”;
for ($i=0;$i<$vLen;$i++) {
$vOrd = 0;
$vOrds = array();
for ($j=0;$j<6;$j++) {
//if ($i+$j<$vLen) {
if (isset($vStr[$i+$j])) {
$vOrds[$j] = ord($vStr[$i+$j]);
} // fi
} // rof
// http://www1.tip.nl/~t876506/utf8tbl.html
if ($this->between(0,$vOrds[0],127)) {
$vOrd = $vOrds[0];
} elseif ($this->between(192,$vOrds[0],223)) {
$vOrd = ($vOrds[0]-192)*64+($vOrds[1]-128);
$i = $i+1;
} elseif ($this->between(224,$vOrds[0],239)) {
$vOrd = ($vOrds[0]-224)*4096+($vOrds[1]-128)*64+($vOrds[2]-128);
$i = $i+2;
} elseif ($this->between(240,$vOrds[0],247)) {
$vOrd = ($vOrds[0]-240)*262144+($vOrds[1]-128)*4096+($vOrds[2]-128)*64+($vOrds[3]-128);
$i = $i+3;
} elseif ($this->between(248,$vOrds[0],251)) {
$vOrd = ($vOrds[0]-248)*16777216+($vOrds[1]-128)*262144+($vOrds[2]-128)*4096+($vOrds[3]-128)*64+($vOrds[4]-128);
$i = $i+4;
} elseif ($this->between(252,$vOrds[0],253)) {
$vOrd = ($vOrds[0]-252)*1073741824+($vOrds[1]-128)*16777216+($vOrds[2]-128)*262144+($vOrds[3]-128)*4096+($vOrds[4]-128)*64+($vOrds[5]-128);
$i = $i+5;
} elseif ($this->between(254,$vOrds[0],255)) { // error
$vOrd = 0;
} // fi
if ($vOrd > 127 ) {
$vKey = array_search($vOrd,$this->mMap[$pFrom]);
$vOutput .= $this->mMap[$pTo][$vKey];
} else {
$vOutput .= chr($vOrd);
} // fi
} // rof
return $vOutput;
}
/** convert from utf8 to plain text ascii */
function uni2ascii() {
return $this->map(‘Unicode’,’ASCII’);
}
}
?>
Sample Usage:
Leave a Reply