本文介绍了php glob目录下的utf8的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧! 问题描述 29岁程序员,3月因学历无情被辞! 我试图让所有文件在一个可变的路径... atm我使用glob(),但我只是无法找到一种方法来打开一个路径,其中包含äüö在... 我已经尝试了很多德/编码,但似乎没有工作.. 当我改变äüö在文件名和变量我得到正确的答案...所以它必须与utf8字符... ...我猜\\ b $ b PHP脚本 //通过ajax获取变量 $ town = $ _ POST ['town']; //例如Bankdrücken $ pfad ='maps /'。 $国家。 /。 $ town。'/ *。jpg'; $ files = glob($ pfad); $ n_files = count($ files); $ erg = new stdClass(); $ erg-> files = $ files; $ erg-> n_files = $ n_files; echo json_encode($ erg); return; Javascript console.log(data); ... pre> 这不会返回... /Bankdrücken但是... / Bankdr\\\ücken 我做jQuery.parseJson(数据);它返回./Bankdrücken,但是,因为这是在客户端服务器端已经失败..我只是不能正确地得到它的权利 这里是一个真正的例子...如果文件路径中的$变量包含äöü - > glob()不会得到我任何文件... //不,我打印出我从ajax调用... 试用一个(带ü): $ b $ stdClass对象 ( [files] => Array () [n_files] => ; 0 [filename] => workout_uebungen / Brust /Liegestützen/ ) / ************************************** / trail二(我改变了文件名从Liegestützen=> Liegestuetzen和$ var从Liegestützen=> Liegestuetzen) $ b $ stdClass对象( [files] =>数组( [0] => workout_uebungen / Brust / Liegestuetzen / 1.jpg [1] => workout_uebungen / Brust / Liegestuetzen / 2.jpg ) [n_files] => 2 [文件名] => workout_uebungen / Brust / Liegestuetzen / ) 所以审判2给出了正确的答案,但我不想改变我所有的文件...以及这将是一个肮脏的解决方案给我... <?php $ b $ b class Encoding { protected static $ win1252ToUtf8 = array( 128 =>\xe2\x82\xac, 130 => ;\\\ xe2\x80\x9a, 131 =>\\\\\\\\ x92, 132 => 133 =>\ xe2\x80\xa6, 134 =>\ xe2\x80\xa0, 135 =>\\ \\ xe2 \x80 \xa1, 136 =>\ xcb\x86, 137 => b 138 =>\ xc5 \xa0, 139 =>\ xe2\x80\xb9, 140 =&g t;\xc5\x92, 142 =>\xc5\xbd, 145 =>\\ \\ xe2 \x80 \x98, 146 => \xe2\x80\x99, 147 => \xe2\x80\x9c, 148 => \xe2\x80\x9d, 149 => \xe2\x80\xa2, 150 => \xe2\x80\x93, 151 => \xe2\x80\x94, 152 => \xcb\x9c, 153 => \xe2\x84\xa2, 154 => \xc5\xa1, 155 => \xe2\x80\xba, 156 => \xc5\x93, 158 => \xc5\xbe, 159 => \xc5\xb8); $ b $ protected static $ brokenUtf8ToUtf8 = array(\xc2\x80=" \xe2\x82\xac, \xc2\x82=>\ xe2\x80\x9a,\xc2\x83=>\xc6\x92,\xc2\x84=>\ xe2\x80\x9e,\xc2\x85=>\ xe2\x80\xa6 ,\xc2\x86=>\xe2\x80\xa0,\xc2\x87=>\xe2\x80 \ xa1,\xc2\x88=>\xcb\x86,\xc2\x89=>\xe2\ x80 \xb0,\xc2\x8a=>\xc5\xa0,\xc2\x8b=>\xe2\\ \\ x80 \xb9,\xc2\x8c=>\xc5\x92, \xc2\x8e=> \xc5\xbd, \xc2\x91=> \xe2\x80\x98,\xc2\x92=>\xe2\x80\x99,\xc2\x93 = \\xe2 \x80 \x9c,\xc2\x94=>\ xe2\x80\x9d,\ xc2 \x95=>\xe2\x80\xa2,\xc2\x96=> \xe2\x80\x93,\xc2\x97=> \xe2\x80\x94,\xc2\x98=> \xcb\x9c,\xc2\x99=> \xe2\x84\xa2,\xc2\x9a=> \xc5\xa1,\xc2\x9b=> \xe2\x80\xba,\xc2\x9c=> \xc5\x93, \xc2\x9e=> \xc5\xbe,\xc2\x9f=> \xc5\xb8); protected static $ utf8ToWin1252 = array(\xe2\x82\xac=&; \x80, \ xe2 \x80\x9a=>\x82,\xc6\x92=>\x83,\xe2\x80\\ \\ x9e=>\x84,\xe2\x80\xa6=>\x85,\xe2\x80\xa0 =>\x86,\xe2\x80\xa1=>\x87,\xcb\x86=> \ x88,\xe2\x80\xb0=>\x89,\xc5\xa0=>\x8a, \xe2\x80\xb9=>\x8b,\xc5\x92=>\x8c, \xc5\xbd=>\x8e, \xe2\x80\x98=>\x91 ,\xe2\x80\x99=>\x92,\xe2\x80 \\ x9c=>\x93,\xe2\x80\x9d=>\x94,\xe2\x80\xa2 =>\x95,\xe2\x80\x93=>\x96,\xe2\x80\x94= >\x97,\xcb\x9c=> \x98,\xe2\x84\xa2=> \x99,\xc5\xa1=> \x9a,\xe2\x80\xba=> \x9b,\xc5\x93=> \x9c, \xc5\xbe=> \x9e,\xc5\xb8=> \x9f); 静态函数toUTF8($ text){ / ** *函数编码:: toUTF8 * *这个函数只保留UTF8字符,同时将几乎所有非UTF8转换为UTF8。 * *它假定原始字符串的编码是Windows-1252或ISO 8859-1。 * *如果这些字符中的任何一个出现这种情况,则可能无法将字符转换为UTF-8: * * 1) *之后是以下任何一个:(B组) *,£¤¥|§¨¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯° b $ b *例如:%ABREPRESENT%C9%BB。 «REPRESENTÉ»(%AB)字符将被转换,但是É后跟»(%C9%BB) *也是一个有效的Unicode字符,将保持不变。 * * 2)当其中任何一个:àáâããäæçèéêëìíîï后跟B组的两个字符, * 3),其中任何一个:ðñòó后跟B组中的三个字符。 b $ b * * @name toUTF8 * @param string $ text任何字符串。 * @return string相同的字符串,UTF8编码 * * / if(is_array($ text)) { foreach($ text为$ k => $ v) { $ text [$ k] = self :: toUTF8($ v); } 返回$ text; } elseif(is_string($ text)){ $ max = strlen($ text); $ buf =; ($ i = 0; $ i $ c1 = $ text {$ i}; if($ c1> =\xc0){//如果已经不是UTF8,应该转换为UTF8 $ c2 = $ i + 1> = $ max? \x00:$ text {$ i + 1}; $ c3 = $ i + 2> = $ max? \x00:$ text {$ i + 2}; $ c4 = $ i + 3> = $ max? \x00:$ text {$ i + 3}; if($ c1> =\xc0& $ c1< =\xdf){// //看起来像2个字节UTF8 if($ c2> =\\ \\ x80&& $ c2< =\xbf){//是的,几乎可以肯定的是UTF8已经是 $ buf。= $ c1了。 $ C2; $ i ++; } else {//无效的UTF8。转换它。 $ cc1 =(chr(ord($ c1)/ 64)|\xc0); $ cc2 =($ c1&\x3f)| \x80; $ buf。= $ cc1。 $ CC2; } } elseif($ c1> =\xe0& $ c1< =\ xef){//看起来像3个字节UTF8 if($ c2> =\x80&& $ c2< =\xbf&&& $ c3> =\x80&& $ c3< =\xbf ){//是的,几乎可以肯定它的UTF8已经是 $ buf。= $ c1。 $ c2。 $ C3; $ i = $ i + 2; } else {//无效的UTF8。转换它。 $ cc1 =(chr(ord($ c1)/ 64)|\xc0); $ cc2 =($ c1&\x3f)| \x80; $ buf。= $ cc1。 $ CC2; } } elseif($ c1> =\xf0& $ c1< =\xf7){//看起来像4个字节UTF8 if($ c2> =\x80&& $ c2< =\xbf&& $ c3> =\x80&& $ c3< =\xbf && $ c4> =\x80&& $ c4< =\xbf){//是的,几乎可以肯定的是UTF8已经 $ buf。= $ c1 。 $ c2。 $ C3; $ i = $ i + 2; } else {//无效的UTF8。转换它。 $ cc1 =(chr(ord($ c1)/ 64)|\xc0); $ cc2 =($ c1&\x3f)| \x80; $ buf。= $ cc1。 $ CC2; } } else {//看起来不像UTF8,但应该转换 $ cc1 =(chr(ord($ c1)/ 64)|\xc0) ; $ cc2 =(($ c1&\x3f)|\x80); $ buf。= $ cc1。 $ CC2; $(b $ b)elseif(($ c1&\xc0)==\x80){//需要转换 if(isset(self :: $ win1252ToUtf8 [ ord($ c1)])){//在Windows-1252中找到特殊情况 $ buf。= self :: $ win1252ToUtf8 [ord($ c1)]; } else { $ cc1 =(chr(ord($ c1)/ 64)|\xc0); $ cc2 =(($ c1&\x3f)|\x80); $ buf。= $ cc1。 $ CC2; } } else {//不需要convesion $ buf。= $ c1; } } return $ buf; } else { return $ text; $ b静态函数toWin1252($ text){ if(is_array($ text)){ foreach($ text as $ k => $ v){ $ text [$ k] = self :: toWin1252($ v); } 返回$ text; $ self_toUTF8($ text)} elseif(is_string($ text)){ return utf8_decode(str_replace(array_keys(self :: $ utf8ToWin1252),array_values(self :: $ utf8ToWin1252) ); } else { return $ text; 静态函数toISO8859($ text){返回self :: toWin1252($ text); 静态函数toLatin1($ text){ return self :: toWin1252($ text); static function fixUTF8($ text){ if(is_array($ text)){ foreach($ text as $ k => $ v ){ $ text [$ k] = self :: fixUTF8($ v); } 返回$ text; } $ last =; while($ last<> $ text){ $ last = $ text; $ text = self :: toUTF8(utf8_decode(str_replace(array_keys(self :: $ utf8ToWin1252),array_values(self :: $ utf8ToWin1252),$ text))); } $ text = self :: toUTF8(utf8_decode(str_replace(array_keys(self :: $ utf8ToWin1252),array_values(self :: $ utf8ToWin1252),$ text))); 返回$ text; 静态函数UTF8FixWin1252Chars($ text){ //如果您收到一个从Windows-1252转换的UTF-8字符串,因为它是ISO8859-1 //(忽略从80到9F的Windows-1252字符)使用这个函数来修复它。 //见:http://en.wikipedia.org/wiki/Windows-1252 返回str_replace(array_keys(self :: $ brokenUtf8ToUtf8),array_values(self :: $ brokenUtf8ToUtf8 ),$ text); 静态函数removeBOM($ str =){ if(substr($ str,0,3)== pack(CCC,0xef,0xbb ,0xbf)){ $ str = substr($ str,3); } return $ str; } } ?> 为了使用它,你需要包含这个类的脚本,并且像这样: Encoding :: toUtf8('Bankdrücken'); I am trying to get all files within a variable path ... atm i am using glob() but i just can not find a way to open a path which contains ä ü ö in it...I have tried a lot of de/encoding but none seem to work..When i change the ä ü ö in the filename and the variable i get the right answer... so it has to be smth with utf8 chars...i guessPHP script// get the variable via ajax post$town =$_POST['town']; // for example "Bankdrücken"$pfad = 'maps/'. $country.'/'. $town .'/*.jpg';$files = glob($pfad);$n_files = count($files); $erg = new stdClass(); $erg->files = $files; $erg->n_files = $n_files; echo json_encode($erg); return;Javascript.success(function(data){ console.log(data);...this will not return .../Bankdrücken but .../Bankdr\u00fcckenwhen i do jQuery.parseJson(data); it returns ./Bankdrücken but as this is on client side the server side already did fail .. i just cant get it righthere is an real example ... if the $variable within the file path contains ä ö ü -> glob() will not get me any files...// no i am printing out what i get back from the ajax call...Trial One ( With ü ):"stdClass Object( [files] => Array ( )[n_files] => 0[filename] => workout_uebungen/Brust/Liegestützen/)"/**************************************/trail two ( i changed the file name from Liegestützen => Liegestuetzen and the $var from Liegestützen => Liegestuetzen)"stdClass Object( [files] => Array ( [0] => workout_uebungen/Brust/Liegestuetzen/1.jpg [1] => workout_uebungen/Brust/Liegestuetzen/2.jpg )[n_files] => 2[filename] => workout_uebungen/Brust/Liegestuetzen/)"so trial 2 gives the right answer but i dont want to change all my files ... as well it would be a dirty solution to me... 解决方案 As case you can use:<?phpclass Encoding { protected static $win1252ToUtf8 = array( 128 => "\xe2\x82\xac", 130 => "\xe2\x80\x9a", 131 => "\xc6\x92", 132 => "\xe2\x80\x9e", 133 => "\xe2\x80\xa6", 134 => "\xe2\x80\xa0", 135 => "\xe2\x80\xa1", 136 => "\xcb\x86", 137 => "\xe2\x80\xb0", 138 => "\xc5\xa0", 139 => "\xe2\x80\xb9", 140 => "\xc5\x92", 142 => "\xc5\xbd", 145 => "\xe2\x80\x98", 146 => "\xe2\x80\x99", 147 => "\xe2\x80\x9c", 148 => "\xe2\x80\x9d", 149 => "\xe2\x80\xa2", 150 => "\xe2\x80\x93", 151 => "\xe2\x80\x94", 152 => "\xcb\x9c", 153 => "\xe2\x84\xa2", 154 => "\xc5\xa1", 155 => "\xe2\x80\xba", 156 => "\xc5\x93", 158 => "\xc5\xbe", 159 => "\xc5\xb8" ); protected static $brokenUtf8ToUtf8 = array( "\xc2\x80" => "\xe2\x82\xac", "\xc2\x82" => "\xe2\x80\x9a", "\xc2\x83" => "\xc6\x92", "\xc2\x84" => "\xe2\x80\x9e", "\xc2\x85" => "\xe2\x80\xa6", "\xc2\x86" => "\xe2\x80\xa0", "\xc2\x87" => "\xe2\x80\xa1", "\xc2\x88" => "\xcb\x86", "\xc2\x89" => "\xe2\x80\xb0", "\xc2\x8a" => "\xc5\xa0", "\xc2\x8b" => "\xe2\x80\xb9", "\xc2\x8c" => "\xc5\x92", "\xc2\x8e" => "\xc5\xbd", "\xc2\x91" => "\xe2\x80\x98", "\xc2\x92" => "\xe2\x80\x99", "\xc2\x93" => "\xe2\x80\x9c", "\xc2\x94" => "\xe2\x80\x9d", "\xc2\x95" => "\xe2\x80\xa2", "\xc2\x96" => "\xe2\x80\x93", "\xc2\x97" => "\xe2\x80\x94", "\xc2\x98" => "\xcb\x9c", "\xc2\x99" => "\xe2\x84\xa2", "\xc2\x9a" => "\xc5\xa1", "\xc2\x9b" => "\xe2\x80\xba", "\xc2\x9c" => "\xc5\x93", "\xc2\x9e" => "\xc5\xbe", "\xc2\x9f" => "\xc5\xb8" ); protected static $utf8ToWin1252 = array( "\xe2\x82\xac" => "\x80", "\xe2\x80\x9a" => "\x82", "\xc6\x92" => "\x83", "\xe2\x80\x9e" => "\x84", "\xe2\x80\xa6" => "\x85", "\xe2\x80\xa0" => "\x86", "\xe2\x80\xa1" => "\x87", "\xcb\x86" => "\x88", "\xe2\x80\xb0" => "\x89", "\xc5\xa0" => "\x8a", "\xe2\x80\xb9" => "\x8b", "\xc5\x92" => "\x8c", "\xc5\xbd" => "\x8e", "\xe2\x80\x98" => "\x91", "\xe2\x80\x99" => "\x92", "\xe2\x80\x9c" => "\x93", "\xe2\x80\x9d" => "\x94", "\xe2\x80\xa2" => "\x95", "\xe2\x80\x93" => "\x96", "\xe2\x80\x94" => "\x97", "\xcb\x9c" => "\x98", "\xe2\x84\xa2" => "\x99", "\xc5\xa1" => "\x9a", "\xe2\x80\xba" => "\x9b", "\xc5\x93" => "\x9c", "\xc5\xbe" => "\x9e", "\xc5\xb8" => "\x9f" ); static function toUTF8($text){ /** * Function Encoding::toUTF8 * * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8. * * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1. * * It may fail to convert characters to UTF-8 if they fall into one of these scenarios: * * 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß * are followed by any of these: ("group B") * ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿ * For example: %ABREPRESENT%C9%BB. «REPRESENTÉ» * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB) * is also a valid unicode character, and will be left unchanged. * * 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B, * 3) when any of these: ðñòó are followed by THREE chars from group B. * * @name toUTF8 * @param string $text Any string. * @return string The same string, UTF8 encoded * */ if(is_array($text)) { foreach($text as $k => $v) { $text[$k] = self::toUTF8($v); } return $text; } elseif(is_string($text)) { $max = strlen($text); $buf = ""; for($i = 0; $i < $max; $i++){ $c1 = $text{$i}; if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already $c2 = $i+1 >= $max? "\x00" : $text{$i+1}; $c3 = $i+2 >= $max? "\x00" : $text{$i+2}; $c4 = $i+3 >= $max? "\x00" : $text{$i+3}; if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8 if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already $buf .= $c1 . $c2; $i++; } else { //not valid UTF8. Convert it. $cc1 = (chr(ord($c1) / 64) | "\xc0"); $cc2 = ($c1 & "\x3f") | "\x80"; $buf .= $cc1 . $cc2; } } elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8 if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already $buf .= $c1 . $c2 . $c3; $i = $i + 2; } else { //not valid UTF8. Convert it. $cc1 = (chr(ord($c1) / 64) | "\xc0"); $cc2 = ($c1 & "\x3f") | "\x80"; $buf .= $cc1 . $cc2; } } elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8 if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already $buf .= $c1 . $c2 . $c3; $i = $i + 2; } else { //not valid UTF8. Convert it. $cc1 = (chr(ord($c1) / 64) | "\xc0"); $cc2 = ($c1 & "\x3f") | "\x80"; $buf .= $cc1 . $cc2; } } else { //doesn't look like UTF8, but should be converted $cc1 = (chr(ord($c1) / 64) | "\xc0"); $cc2 = (($c1 & "\x3f") | "\x80"); $buf .= $cc1 . $cc2; } } elseif(($c1 & "\xc0") == "\x80"){ // needs conversion if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases $buf .= self::$win1252ToUtf8[ord($c1)]; } else { $cc1 = (chr(ord($c1) / 64) | "\xc0"); $cc2 = (($c1 & "\x3f") | "\x80"); $buf .= $cc1 . $cc2; } } else { // it doesn't need convesion $buf .= $c1; } } return $buf; } else { return $text; } } static function toWin1252($text) { if(is_array($text)) { foreach($text as $k => $v) { $text[$k] = self::toWin1252($v); } return $text; } elseif(is_string($text)) { return utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))); } else { return $text; } } static function toISO8859($text) { return self::toWin1252($text); } static function toLatin1($text) { return self::toWin1252($text); } static function fixUTF8($text){ if(is_array($text)) { foreach($text as $k => $v) { $text[$k] = self::fixUTF8($v); } return $text; } $last = ""; while($last <> $text){ $last = $text; $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text))); } $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text))); return $text; } static function UTF8FixWin1252Chars($text){ // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. // See: http://en.wikipedia.org/wiki/Windows-1252 return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text); } static function removeBOM($str=""){ if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) { $str=substr($str, 3); } return $str; }}?>For use it you need include script with this class and ue it like:Encoding::toUtf8('Bankdrücken'); 这篇关于php glob目录下的utf8的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持! 上岸,阿里云!
08-12 09:26