commit 1adaca818f02918f184c9b9b39eb9bf782483284 Author: Igor V Belousov Date: Tue Dec 15 20:18:00 2015 +0300 initial diff --git a/idna.php b/idna.php new file mode 100644 index 0000000..16106c8 --- /dev/null +++ b/idna.php @@ -0,0 +1,318 @@ + + * @copyright 2008 Nicolas Thouvenin + * @license http://opensource.org/licenses/LGPL-2.1 LGPL v2.1 + */ +function ordUTF8($c, $index = 0, &$bytes = null) + { + $len = strlen($c); + $bytes = 0; + if ($index >= $len) + return false; + $h = ord($c{$index}); + if ($h <= 0x7F) { + $bytes = 1; + return $h; + } + else if ($h < 0xC2) + return false; + else if ($h <= 0xDF && $index < $len - 1) { + $bytes = 2; + return ($h & 0x1F) << 6 | (ord($c{$index + 1}) & 0x3F); + } + else if ($h <= 0xEF && $index < $len - 2) { + $bytes = 3; + return ($h & 0x0F) << 12 | (ord($c{$index + 1}) & 0x3F) << 6 + | (ord($c{$index + 2}) & 0x3F); + } + else if ($h <= 0xF4 && $index < $len - 3) { + $bytes = 4; + return ($h & 0x0F) << 18 | (ord($c{$index + 1}) & 0x3F) << 12 + | (ord($c{$index + 2}) & 0x3F) << 6 + | (ord($c{$index + 3}) & 0x3F); + } + else + return false; + } + +/** + * Encode UTF-8 domain name to IDN Punycode + * + * @param string $value Domain name + * @return string Encoded Domain name + * + * @author Igor V Belousov + * @copyright 2013 Igor V Belousov + * @license http://opensource.org/licenses/LGPL-2.1 LGPL v2.1 + * @link http://belousovv.ru/myscript/phpIDN + */ +function EncodePunycodeIDN( $value ) + { + /* search subdomains */ + $sub_domain = explode( '.', $value ); + if ( count( $sub_domain ) > 1 ) { + $sub_result = ''; + foreach ( $sub_domain as $sub_value ) { + $sub_result .= '.' . EncodePunycodeIDN( $sub_value ); + } + return substr( $sub_result, 1 ); + } + + $mb_internal_enc = mb_internal_encoding(); + mb_internal_encoding( "UTF-8" ); + + $value = mb_strtolower( $value ); + + /* http://tools.ietf.org/html/rfc3492#section-6.3 */ + $n = 0x80; + $delta = 0; + $bias = 72; + $output = array(); + + $input = array(); + $str = $value; + while ( mb_strlen( $str ) > 0 ) + { + array_push( $input, mb_substr( $str, 0, 1 ) ); + $str = mb_substr( $str, 1 ); + } + + /* basic symbols */ + $basic = preg_grep( '/[\x00-\x7f]/', $input ); + $b = $basic; + + if ( $b == $input ) + { + mb_internal_encoding( $mb_internal_enc ); + return $value; + } + $b = count( $b ); + if ( $b > 0 ) { + $output = $basic; + /* add delimeter */ + $output[] = '-'; + } + unset($basic); + /* add prefix */ + array_unshift( $output, 'xn--' ); + + $input_len = count( $input ); + $h = $b; + + while ( $h < $input_len ) { + $m = 0x10FFFF; + for ( $i = 0; $i < $input_len; ++$i ) + { + $ord_input[ $i ] = ordUtf8( $input[ $i ] ); + if ( ( $ord_input[ $i ] >= $n ) && ( $ord_input[ $i ] < $m ) ) + { + $m = $ord_input[ $i ]; + } + } + if ( ( $m - $n ) > ( 0x10FFFF / ( $h + 1 ) ) ) + { + mb_internal_encoding( $mb_internal_enc ); + return $value; + } + $delta += ( $m - $n ) * ( $h + 1 ); + $n = $m; + + for ( $i = 0; $i < $input_len; ++$i ) + { + $c = $ord_input[ $i ]; + if ( $c < $n ) + { + ++$delta; + if ( $delta == 0 ) + { + mb_internal_encoding( $mb_internal_enc ); + return $value; + } + } + if ( $c == $n ) + { + $q = $delta; + for ( $k = 36;; $k += 36 ) + { + if ( $k <= $bias ) + { + $t = 1; + } + elseif ( $k >= ( $bias + 26 ) ) + { + $t = 26; + } + else + { + $t = $k - $bias; + } + if ( $q < $t ) + { + break; + } + $tmp_int = ( $t + ( ( $q - $t ) % ( 36 - $t ) ) ); + $output[] = chr( ( $tmp_int + 22 + 75 * ( $tmp_int < 26 ) ) ); + $q = ( $q - $t ) / ( 36 - $t ); + } + + $output[]= chr(($q + 22 + 75 * ($q < 26))); + /* http://tools.ietf.org/html/rfc3492#section-6.1 */ + $delta = ( $h == $b )?$delta/700:$delta>>1; + + $delta += ( $delta / ( $h + 1 ) ); + + $k2 = 0; + while ( $delta > 455 ) + { + $delta /= 35; + $k2 += 36; + } + $bias= intval( $k2 + ( ( 36 * $delta ) / ( $delta + 38 ) ) ); + /* end section-6.1 */ + $delta = 0; + ++$h; + } + } + ++$delta; + ++$n; + } + mb_internal_encoding( $mb_internal_enc ); + return implode( '', $output ); + } + +/** + * Decode IDN Punycode to UTF-8 domain name + * + * @param string $value Punycode + * @return string Domain name in UTF-8 charset + * + * @author Igor V Belousov + * @copyright 2013 Igor V Belousov + * @license http://opensource.org/licenses/LGPL-2.1 LGPL v2.1 + * @link http://belousovv.ru/myscript/phpIDN + */ +function DecodePunycodeIDN($value) + { + /* search subdomains */ + $sub_domain = explode('.', $value); + if (count($sub_domain)>1){ + $sub_result=''; + foreach ($sub_domain as $sub_value) { + $sub_result .= '.'.DecodePunycodeIDN($sub_value); + } + return substr($sub_result,1); + } + + /* search prefix */ + if (substr($value,0,4)!='xn--') + { + return $value; + } + else + { + $bad_input=$value; + $value=substr($value,4); + } + + $n = 0x80; + $i = 0; + $bias = 72; + $output = array(); + + /* search delimeter */ + $d = strrpos($value, '-'); + + if ($d > 0) { + for ( $j = 0; $j < $d; ++$j) { + $c = $value[$j]; + $output[]=$c; + if ($c > 0x7F) + { + return $bad_input; + } + } + ++$d; + } else { + $d = 0; + } + + while ($d < strlen($value)) + { + $oldi = $i; + $w = 1; + + for ($k = 36;; $k += 36) + { + if ($d == strlen($value)) + { + return $bad_input; + } + $c = $value[$d++]; + $c=ord($c); + + $digit=($c - 48 < 10) ? $c - 22 : + ( + ($c - 65 < 26) ? $c - 65 : + ( + ($c - 97 < 26) ? $c - 97 : 36 + ) + ); + if ($digit > (0x10FFFF - $i) / $w) + { + return $bad_input; + } + $i += $digit * $w; + + if ($k <= $bias) + { + $t = 1; + } + elseif ($k >= $bias + 26) + { + $t = 26; + } + else + { + $t = $k - $bias; + } + if ($digit < $t) { + break; + } + + $w *= (36 - $t); + + } + + $delta = $i - $oldi; + + /* http://tools.ietf.org/html/rfc3492#section-6.1 */ + $delta = ($oldi == 0)?$delta/700:$delta>>1; + + $count_output_plus_one=count($output)+1; + $delta += ($delta / ($count_output_plus_one+1)); + + $k2 = 0; + while ($delta > 455) + { + $delta /= 35; + $k2 += 36; + } + $bias= intval($k2 + (36 * $delta) / ($delta + 38)); + /* end section-6.1 */ + if ($i / $count_output_plus_one > 0x10FFFF - $n) + { + return $bad_input; + } + $n += intval($i / $count_output_plus_one); + $i = intval($i % $count_output_plus_one); + array_splice($output, $i, 0, + html_entity_decode('&#'.$n.';',ENT_NOQUOTES,'UTF-8') + ); + ++$i; + } + return implode('', $output); + } +