aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'phpBB/develop/unicode_testing.php')
-rw-r--r--phpBB/develop/unicode_testing.php120
1 files changed, 120 insertions, 0 deletions
diff --git a/phpBB/develop/unicode_testing.php b/phpBB/develop/unicode_testing.php
new file mode 100644
index 0000000000..25a13d1325
--- /dev/null
+++ b/phpBB/develop/unicode_testing.php
@@ -0,0 +1,120 @@
+<?php
+//
+// This file provides some useful functions for debugging the unicode/UTF-8 library
+// It requires utf_tools.php to be loaded
+//
+die("Please read the first lines of this script for instructions on how to enable it");
+
+if (!headers_sent())
+{
+ header('Content-type: text/html; charset=UTF-8');
+}
+
+/**
+ * Converts unicode escape sequences (\u0123) into UTF-8 characters
+ *
+ * @param string A unicode sequence
+ * @return string UTF-8 representation of the given unicode sequence
+ */
+function unicode_to_utf8($string)
+{
+ $utf8 = '';
+ $chars = array();
+ for ($i = 0; $i < strlen($string); $i++)
+ {
+ if (isset($string[$i + 5]) && substr($string, $i, 2) == '\\u' && ctype_xdigit(substr($string, $i + 2, 4)))
+ {
+ $utf8 .= utf8_from_unicode(array(base_convert(substr($string, $i + 2, 4), 16, 10)));
+ $i += 5;
+ }
+ else
+ {
+ $utf8 .= $string[$i];
+ }
+ }
+ return $utf8;
+}
+
+/**
+ * Takes an array of ints representing the Unicode characters and returns
+ * a UTF-8 string.
+ *
+ * @param array $array array of unicode code points representing a string
+ * @return string UTF-8 character string
+ */
+function utf8_from_unicode($array)
+{
+ $str = '';
+ foreach ($array as $value)
+ {
+ $str .= utf8_chr($value);
+ }
+ return $str;
+}
+
+/**
+* Converts a UTF-8 string to unicode code points
+*
+* @param string $text UTF-8 string
+* @return string Unicode code points
+*/
+function utf8_to_unicode($text)
+{
+ return preg_replace_callback(
+ '#[\\xC2-\\xF4][\\x80-\\xBF]?[\\x80-\\xBF]?[\\x80-\\xBF]#',
+ 'utf8_to_unicode_callback',
+ preg_replace_callback(
+ '#[\\x00-\\x7f]#',
+ 'utf8_to_unicode_callback',
+ $text
+ )
+ );
+}
+
+/**
+* Takes a UTF-8 char and replaces it with its unicode escape sequence. Attention, $m is an array
+*
+* @param array $m 0-based numerically indexed array passed by preg_replace_callback()
+* @return string A unicode escape sequence
+*/
+function utf8_to_unicode_callback($m)
+{
+ return '\u' . str_pad(base_convert(utf8_ord($m[0]), 10, 16), 4, '0', STR_PAD_LEFT) . '';
+}
+
+/**
+* A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings
+* to be in NFKC
+*
+* @param mixed $strings a string or an array of strings to normalize
+* @return mixed the normalized content, preserving array keys if array given.
+*/
+function utf8_normalize_nfkc($strings)
+{
+ if (empty($strings))
+ {
+ return $strings;
+ }
+
+ if (!class_exists('utf_normalizer'))
+ {
+ global $phpbb_root_path, $phpEx;
+ include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
+ }
+
+ if (!is_array($strings))
+ {
+ utf_normalizer::nfkc($strings);
+ }
+ else if (is_array($strings))
+ {
+ foreach ($strings as $key => $string)
+ {
+ utf_normalizer::nfkc($strings[$key]);
+ }
+ }
+
+ return $strings;
+}
+
+?> \ No newline at end of file