data
是从网站下载的,
NSString * html = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
html
是nil
,但是NSString * html = [[NSString alloc] initWithData:data encoding:NSASCIIStringEncoding];
将有内容。
由于网站包含汉字,因此如果使用Ascii,则无法显示中文。我猜网站上有一些无效的UTF-8,因此使第一个代码不起作用。
有什么方法可以继续使用UTF-8但忽略一些无效错误吗?
最佳答案
我想我找到了解决方案。
Vincent Guerci's answer
将libiconv添加到您的项目中,并使其清除无效的UTF-8,清除后,NSData可以安全地传递给[[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
详细实现是:
#include "iconv.h"
objective-c :
- (NSData *)cleanUTF8:(NSData *)data {
// this function is from
// https://stackoverflow.com/questions/3485190/nsstring-initwithdata-returns-null
//
//
iconv_t cd = iconv_open("UTF-8", "UTF-8"); // convert to UTF-8 from UTF-8
int one = 1;
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &one); // discard invalid characters
size_t inbytesleft, outbytesleft;
inbytesleft = outbytesleft = data.length;
char *inbuf = (char *)data.bytes;
char *outbuf = malloc(sizeof(char) * data.length);
char *outptr = outbuf;
if (iconv(cd, &inbuf, &inbytesleft, &outptr, &outbytesleft)
== (size_t)-1) {
NSLog(@"this should not happen, seriously");
return nil;
}
NSData *result = [NSData dataWithBytes:outbuf length:data.length - outbytesleft];
iconv_close(cd);
free(outbuf);
return result;
}