whoami7 - Manager
:
/
home
/
kckglobal
/
www
/
portal
/
vendor
/
webklex
/
php-imap
/
src
/
Upload File:
files >> //home/kckglobal/www/portal/vendor/webklex/php-imap/src/EncodingAliases.php
<?php /* * File: EncodingAliases.php * Category: - * Author: S. Todorov (https://github.com/todorowww) * Created: 23.04.18 14:16 * Updated: - * * Description: * Contains email encoding aliases, thta can occur when fetching emails. These sometimes can break icvon() * This file attempts to correct this by using a list of aliases and their mappings to supported iconv() encodings */ namespace Webklex\PHPIMAP; /** * Class EncodingAliases * * @package Webklex\PHPIMAP */ class EncodingAliases { /** * Contains email encoding mappings * * @var array */ private static array $aliases = [ /* |-------------------------------------------------------------------------- | Email encoding aliases |-------------------------------------------------------------------------- | | Email encoding aliases used to convert to iconv supported charsets | | | This Source Code Form is subject to the terms of the Mozilla Public | License, v. 2.0. If a copy of the MPL was not distributed with this | file, You can obtain one at http://mozilla.org/MPL/2.0/. | | This Original Code has been modified by IBM Corporation. | Modifications made by IBM described herein are | Copyright (c) International Business Machines | Corporation, 1999 | | Modifications to Mozilla code or documentation | identified per MPL Section 3.3 | | Date Modified by Description of modification | 12/09/1999 IBM Corp. Support for IBM codepages - 850,852,855,857,862,864 | | Rule of this file: | 1. key should always be in lower case ascii so we can do case insensitive | comparison in the code faster. | 2. value should be the one used in unicode converter | | 3. If the charset is not used for document charset, but font charset | (e.g. XLFD charset- such as JIS x0201, JIS x0208), don't put here | */ "ascii" => "us-ascii", "us-ascii" => "us-ascii", "ansi_x3.4-1968" => "us-ascii", "646" => "us-ascii", "iso-8859-1" => "ISO-8859-1", "iso-8859-2" => "ISO-8859-2", "iso-8859-3" => "ISO-8859-3", "iso-8859-4" => "ISO-8859-4", "iso-8859-5" => "ISO-8859-5", "iso-8859-6" => "ISO-8859-6", "iso-8859-6-i" => "ISO-8859-6-I", "iso-8859-6-e" => "ISO-8859-6-E", "iso-8859-7" => "ISO-8859-7", "iso-8859-8" => "ISO-8859-8", "iso-8859-8-i" => "ISO-8859-8-I", "iso-8859-8-e" => "ISO-8859-8-E", "iso-8859-9" => "ISO-8859-9", "iso-8859-10" => "ISO-8859-10", "iso-8859-11" => "ISO-8859-11", "iso-8859-13" => "ISO-8859-13", "iso-8859-14" => "ISO-8859-14", "iso-8859-15" => "ISO-8859-15", "iso-8859-16" => "ISO-8859-16", "iso-ir-111" => "ISO-IR-111", "iso-2022-cn" => "ISO-2022-CN", "iso-2022-cn-ext" => "ISO-2022-CN", "iso-2022-kr" => "ISO-2022-KR", "iso-2022-jp" => "ISO-2022-JP", "utf-16be" => "UTF-16BE", "utf-16le" => "UTF-16LE", "utf-16" => "UTF-16", "windows-1250" => "windows-1250", "windows-1251" => "windows-1251", "windows-1252" => "windows-1252", "windows-1253" => "windows-1253", "windows-1254" => "windows-1254", "windows-1255" => "windows-1255", "windows-1256" => "windows-1256", "windows-1257" => "windows-1257", "windows-1258" => "windows-1258", "ibm866" => "IBM866", "ibm850" => "IBM850", "ibm852" => "IBM852", "ibm855" => "IBM855", "ibm857" => "IBM857", "ibm862" => "IBM862", "ibm864" => "IBM864", "utf-8" => "UTF-8", "utf-7" => "UTF-7", "utf-7-imap" => "UTF7-IMAP", "utf7-imap" => "UTF7-IMAP", "shift_jis" => "Shift_JIS", "big5" => "Big5", "euc-jp" => "EUC-JP", "euc-kr" => "EUC-KR", "gb2312" => "GB2312", "gb18030" => "gb18030", "viscii" => "VISCII", "koi8-r" => "KOI8-R", "koi8_r" => "KOI8-R", "cskoi8r" => "KOI8-R", "koi" => "KOI8-R", "koi8" => "KOI8-R", "koi8-u" => "KOI8-U", "tis-620" => "TIS-620", "t.61-8bit" => "T.61-8bit", "hz-gb-2312" => "HZ-GB-2312", "big5-hkscs" => "Big5-HKSCS", "gbk" => "gbk", "cns11643" => "x-euc-tw", // // Aliases for ISO-8859-1 // "latin1" => "ISO-8859-1", "iso_8859-1" => "ISO-8859-1", "iso8859-1" => "ISO-8859-1", "iso8859-2" => "ISO-8859-2", "iso8859-3" => "ISO-8859-3", "iso8859-4" => "ISO-8859-4", "iso8859-5" => "ISO-8859-5", "iso8859-6" => "ISO-8859-6", "iso8859-7" => "ISO-8859-7", "iso8859-8" => "ISO-8859-8", "iso8859-9" => "ISO-8859-9", "iso8859-10" => "ISO-8859-10", "iso8859-11" => "ISO-8859-11", "iso8859-13" => "ISO-8859-13", "iso8859-14" => "ISO-8859-14", "iso8859-15" => "ISO-8859-15", "iso_8859-1:1987" => "ISO-8859-1", "iso-ir-100" => "ISO-8859-1", "l1" => "ISO-8859-1", "ibm819" => "ISO-8859-1", "cp819" => "ISO-8859-1", "csisolatin1" => "ISO-8859-1", // // Aliases for ISO-8859-2 // "latin2" => "ISO-8859-2", "iso_8859-2" => "ISO-8859-2", "iso_8859-2:1987" => "ISO-8859-2", "iso-ir-101" => "ISO-8859-2", "l2" => "ISO-8859-2", "csisolatin2" => "ISO-8859-2", // // Aliases for ISO-8859-3 // "latin3" => "ISO-8859-3", "iso_8859-3" => "ISO-8859-3", "iso_8859-3:1988" => "ISO-8859-3", "iso-ir-109" => "ISO-8859-3", "l3" => "ISO-8859-3", "csisolatin3" => "ISO-8859-3", // // Aliases for ISO-8859-4 // "latin4" => "ISO-8859-4", "iso_8859-4" => "ISO-8859-4", "iso_8859-4:1988" => "ISO-8859-4", "iso-ir-110" => "ISO-8859-4", "l4" => "ISO-8859-4", "csisolatin4" => "ISO-8859-4", // // Aliases for ISO-8859-5 // "cyrillic" => "ISO-8859-5", "iso_8859-5" => "ISO-8859-5", "iso_8859-5:1988" => "ISO-8859-5", "iso-ir-144" => "ISO-8859-5", "csisolatincyrillic" => "ISO-8859-5", // // Aliases for ISO-8859-6 // "arabic" => "ISO-8859-6", "iso_8859-6" => "ISO-8859-6", "iso_8859-6:1987" => "ISO-8859-6", "iso-ir-127" => "ISO-8859-6", "ecma-114" => "ISO-8859-6", "asmo-708" => "ISO-8859-6", "csisolatinarabic" => "ISO-8859-6", // // Aliases for ISO-8859-6-I // "csiso88596i" => "ISO-8859-6-I", // // Aliases for ISO-8859-6-E", // "csiso88596e" => "ISO-8859-6-E", // // Aliases for ISO-8859-7", // "greek" => "ISO-8859-7", "greek8" => "ISO-8859-7", "sun_eu_greek" => "ISO-8859-7", "iso_8859-7" => "ISO-8859-7", "iso_8859-7:1987" => "ISO-8859-7", "iso-ir-126" => "ISO-8859-7", "elot_928" => "ISO-8859-7", "ecma-118" => "ISO-8859-7", "csisolatingreek" => "ISO-8859-7", // // Aliases for ISO-8859-8", // "hebrew" => "ISO-8859-8", "iso_8859-8" => "ISO-8859-8", "visual" => "ISO-8859-8", "iso_8859-8:1988" => "ISO-8859-8", "iso-ir-138" => "ISO-8859-8", "csisolatinhebrew" => "ISO-8859-8", // // Aliases for ISO-8859-8-I", // "csiso88598i" => "ISO-8859-8-I", "iso-8859-8i" => "ISO-8859-8-I", "logical" => "ISO-8859-8-I", // // Aliases for ISO-8859-8-E", // "csiso88598e" => "ISO-8859-8-E", // // Aliases for ISO-8859-9", // "latin5" => "ISO-8859-9", "iso_8859-9" => "ISO-8859-9", "iso_8859-9:1989" => "ISO-8859-9", "iso-ir-148" => "ISO-8859-9", "l5" => "ISO-8859-9", "csisolatin5" => "ISO-8859-9", // // Aliases for UTF-8", // "unicode-1-1-utf-8" => "UTF-8", // nl_langinfo(CODESET) in HP/UX returns 'utf8' under UTF-8 locales", "utf8" => "UTF-8", // // Aliases for Shift_JIS", // "x-sjis" => "Shift_JIS", "shift-jis" => "Shift_JIS", "ms_kanji" => "Shift_JIS", "csshiftjis" => "Shift_JIS", "windows-31j" => "Shift_JIS", "cp932" => "Shift_JIS", "sjis" => "Shift_JIS", // // Aliases for EUC_JP", // "cseucpkdfmtjapanese" => "EUC-JP", "x-euc-jp" => "EUC-JP", // // Aliases for ISO-2022-JP", // "csiso2022jp" => "ISO-2022-JP", // The following are really not aliases ISO-2022-JP, but sharing the same decoder", "iso-2022-jp-2" => "ISO-2022-JP", "csiso2022jp2" => "ISO-2022-JP", // // Aliases for Big5", // "csbig5" => "Big5", "cn-big5" => "Big5", // x-x-big5 is not really a alias for Big5, add it only for MS FrontPage", "x-x-big5" => "Big5", // Sun Solaris", "zh_tw-big5" => "Big5", // // Aliases for EUC-KR", // "cseuckr" => "EUC-KR", "ks_c_5601-1987" => "EUC-KR", "iso-ir-149" => "EUC-KR", "ks_c_5601-1989" => "EUC-KR", "ksc_5601" => "EUC-KR", "ksc5601" => "EUC-KR", "korean" => "EUC-KR", "csksc56011987" => "EUC-KR", "5601" => "EUC-KR", "windows-949" => "EUC-KR", // // Aliases for GB2312", // // The following are really not aliases GB2312, add them only for MS FrontPage", "gb_2312-80" => "GB2312", "iso-ir-58" => "GB2312", "chinese" => "GB2312", "csiso58gb231280" => "GB2312", "csgb2312" => "GB2312", "zh_cn.euc" => "GB2312", // Sun Solaris", "gb_2312" => "GB2312", // // Aliases for windows-125x ", // "x-cp1250" => "windows-1250", "x-cp1251" => "windows-1251", "x-cp1252" => "windows-1252", "x-cp1253" => "windows-1253", "x-cp1254" => "windows-1254", "x-cp1255" => "windows-1255", "x-cp1256" => "windows-1256", "x-cp1257" => "windows-1257", "x-cp1258" => "windows-1258", // // Aliases for windows-874 ", // "windows-874" => "windows-874", "ibm874" => "windows-874", "dos-874" => "windows-874", // // Aliases for macintosh", // "macintosh" => "macintosh", "x-mac-roman" => "macintosh", "mac" => "macintosh", "csmacintosh" => "macintosh", // // Aliases for IBM866", // "cp866" => "IBM866", "cp-866" => "IBM866", "866" => "IBM866", "csibm866" => "IBM866", // // Aliases for IBM850", // "cp850" => "IBM850", "850" => "IBM850", "csibm850" => "IBM850", // // Aliases for IBM852", // "cp852" => "IBM852", "852" => "IBM852", "csibm852" => "IBM852", // // Aliases for IBM855", // "cp855" => "IBM855", "855" => "IBM855", "csibm855" => "IBM855", // // Aliases for IBM857", // "cp857" => "IBM857", "857" => "IBM857", "csibm857" => "IBM857", // // Aliases for IBM862", // "cp862" => "IBM862", "862" => "IBM862", "csibm862" => "IBM862", // // Aliases for IBM864", // "cp864" => "IBM864", "864" => "IBM864", "csibm864" => "IBM864", "ibm-864" => "IBM864", // // Aliases for T.61-8bit", // "t.61" => "T.61-8bit", "iso-ir-103" => "T.61-8bit", "csiso103t618bit" => "T.61-8bit", // // Aliases for UTF-7", // "x-unicode-2-0-utf-7" => "UTF-7", "unicode-2-0-utf-7" => "UTF-7", "unicode-1-1-utf-7" => "UTF-7", "csunicode11utf7" => "UTF-7", // // Aliases for ISO-10646-UCS-2", // "csunicode" => "UTF-16BE", "csunicode11" => "UTF-16BE", "iso-10646-ucs-basic" => "UTF-16BE", "csunicodeascii" => "UTF-16BE", "iso-10646-unicode-latin1" => "UTF-16BE", "csunicodelatin1" => "UTF-16BE", "iso-10646" => "UTF-16BE", "iso-10646-j-1" => "UTF-16BE", // // Aliases for ISO-8859-10", // "latin6" => "ISO-8859-10", "iso-ir-157" => "ISO-8859-10", "l6" => "ISO-8859-10", // Currently .properties cannot handle : in key", //iso_8859-10:1992" => "ISO-8859-10", "csisolatin6" => "ISO-8859-10", // // Aliases for ISO-8859-15", // "iso_8859-15" => "ISO-8859-15", "csisolatin9" => "ISO-8859-15", "l9" => "ISO-8859-15", // // Aliases for ISO-IR-111", // "ecma-cyrillic" => "ISO-IR-111", "csiso111ecmacyrillic" => "ISO-IR-111", // // Aliases for ISO-2022-KR", // "csiso2022kr" => "ISO-2022-KR", // // Aliases for VISCII", // "csviscii" => "VISCII", // // Aliases for x-euc-tw", // "zh_tw-euc" => "x-euc-tw", // // Following names appears in unix nl_langinfo(CODESET)", // They can be compiled as platform specific if necessary", // DONT put things here if it does not look generic enough (like hp15CN)", // "iso88591" => "ISO-8859-1", "iso88592" => "ISO-8859-2", "iso88593" => "ISO-8859-3", "iso88594" => "ISO-8859-4", "iso88595" => "ISO-8859-5", "iso88596" => "ISO-8859-6", "iso88597" => "ISO-8859-7", "iso88598" => "ISO-8859-8", "iso88599" => "ISO-8859-9", "iso885910" => "ISO-8859-10", "iso885911" => "ISO-8859-11", "iso885912" => "ISO-8859-12", "iso885913" => "ISO-8859-13", "iso885914" => "ISO-8859-14", "iso885915" => "ISO-8859-15", "cp1250" => "windows-1250", "cp1251" => "windows-1251", "cp1252" => "windows-1252", "cp1253" => "windows-1253", "cp1254" => "windows-1254", "cp1255" => "windows-1255", "cp1256" => "windows-1256", "cp1257" => "windows-1257", "cp1258" => "windows-1258", "x-gbk" => "gbk", "windows-936" => "gbk", "ansi-1251" => "windows-1251", ]; /** * Returns proper encoding mapping, if exists. If it doesn't, return unchanged $encoding * @param string|null $encoding * @param string|null $fallback * * @return string */ public static function get(?string $encoding, string $fallback = null): string { if (isset(self::$aliases[strtolower($encoding ?? '')])) { return self::$aliases[strtolower($encoding ?? '')]; } return $fallback ?: $encoding; } /** * Convert the encoding of a string * @param $str * @param string $from * @param string $to * * @return mixed */ public static function convert($str, string $from = "ISO-8859-2", string $to = "UTF-8"): mixed { $from = self::get($from, self::detectEncoding($str)); $to = self::get($to, self::detectEncoding($str)); if ($from === $to) { return $str; } // We don't need to do convertEncoding() if charset is ASCII (us-ascii): // ASCII is a subset of UTF-8, so all ASCII files are already UTF-8 encoded // https://stackoverflow.com/a/11303410 // // us-ascii is the same as ASCII: // ASCII is the traditional name for the encoding system; the Internet Assigned Numbers Authority (IANA) // prefers the updated name US-ASCII, which clarifies that this system was developed in the US and // based on the typographical symbols predominantly in use there. // https://en.wikipedia.org/wiki/ASCII // // convertEncoding() function basically means convertToUtf8(), so when we convert ASCII string into UTF-8 it gets broken. if (strtolower($from) == 'us-ascii' && $to == 'UTF-8') { return $str; } try { if (function_exists('iconv') && !self::isUtf7($from) && !self::isUtf7($to)) { return iconv($from, $to, $str); } if (!$from) { return mb_convert_encoding($str, $to); } return mb_convert_encoding($str, $to, $from); } catch (\Exception $e) { if (str_contains($from, '-')) { $from = str_replace('-', '', $from); return self::convert($str, $from, $to); } return $str; } } /** * Attempts to detect the encoding of a string * @param string $string * * @return string */ public static function detectEncoding(string $string): string { $encoding = mb_detect_encoding($string, array_filter(self::getEncodings(), function($value){ return !in_array($value, [ 'ISO-8859-6-I', 'ISO-8859-6-E', 'ISO-8859-8-I', 'ISO-8859-8-E', 'ISO-8859-11', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16', 'ISO-IR-111',"ISO-2022-CN", "windows-1250", "windows-1253", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "IBM852", "IBM855", "IBM857", "IBM866", "IBM864", "IBM862", "KOI8-R", "KOI8-U", "TIS-620", "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "VISCII", "T.61-8bit", "Big5-HKSCS", "windows-874", "macintosh", "ISO-8859-12", "ISO-8859-7", "IMAP-UTF-7" ]); }), true); if ($encoding === false) { $encoding = 'UTF-8'; } return $encoding; } /** * Returns all available encodings * * @return array */ public static function getEncodings(): array { $encodings = []; foreach (self::$aliases as $encoding) { if (!in_array($encoding, $encodings)) { $encodings[] = $encoding; } } return $encodings; } /** * Returns true if the encoding is UTF-7 like * @param string $encoding * * @return bool */ public static function isUtf7(string $encoding): bool { return str_contains(str_replace("-", "", strtolower($encoding)), "utf7"); } /** * Check if an encoding is supported * @param string $encoding * * @return bool */ public static function has(string $encoding): bool { return isset(self::$aliases[strtolower($encoding)]); } }
Copyright ©2021 || Defacer Indonesia