rust 读取非uft-8文件line-by-line

rust std::io::BufRead提供了lines方法。但本身只支持UTF_8编码。遇到非utf-8编码时直接报错。本文通过使用第三方cratesencoding增加对编码的支持。 cargo add encoding

code

use std::io::preclude::*;
use encoding::{all::GB18030,DecoderTrap};
//“这是一个字符串\n将会被读取”
let bytes: &[u8] = &[
            213, 226, 202, 199, 210, 187, 184, 246, 215, 214, 183, 251, 180, 174, 10, 189, 171,
            187, 225, 177, 187, 182, 193, 200, 161,
        ];
let mut buf: Vec<u8> = Vec::new();
loop {
	match bytes.read_until(b'\n', &mut buf) {
		Ok(0) => break,
		Ok(_) => {
			let mut line = String::new();
			GB18030
				.decode_to(buf, DecoderTrap::Replace, &mut line)
				.expect("decode fail");
			println!("{}", line);//解码好的字符串
		}
	Err(e) => panic!(e),
	}
}

封装

私基于上文原理对std::io::BufReader进行了封装,重载了read_linelines方法。crates

usage

cargo add encodingbufreader
use encodingbufreader::{BufReaderEncoding};
use encoding::all::,GB18030;
let bytes: &[u8] = &[
            213, 226, 202, 199, 210, 187, 184, 246, 215, 214, 183, 251, 180, 174, 10, 189, 171,
            187, 225, 177, 187, 182, 193, 200, 161,
        ];
for line in BufReaderEncoding::new(bytes, GB18030)
	.lines()
	.map(|l| l.unwrap()){
    	println!("{}",line);
}

读取文件

use encodingbufreader::{BufReaderEncoding};
use encoding::all::,GB18030;
use std::fs::File;
let file = File::open("test.txt")?;
for line in BufReaderEncoding::new(file, GB18030)
	.lines(){
    	println!("{}",line?);
}
03-31 00:34