前言
早就看到很多描述NIO相比傳統(tǒng)IO性能上的各種優(yōu)勢斋扰,于是帶著求證心里前來測試一番,場景如下:
- 寫場景:將字符串內(nèi)容
lu ben wei niu bi, stand up all! play game must be Laughing,play nm!
作為一行內(nèi)容传货,重復的寫入到一個文件中大概十萬行,最后比較兩種IO模型下的耗時 - 讀場景:將一個五十萬行的文本(包含漢字)按行讀取出來逮壁,統(tǒng)計兩種IO模型下的耗時
寫測試
傳統(tǒng)IO都使用帶緩存(buffer)的IO去操作粮宛,代碼如下:
private static final String CONTENT = "lu ben wei niu bi, stand up all! play game must be Laughing,play nm!\r\n";
private final static int WRITE_COUNT = 100000;
private final static String FILE_PATH = "E:\\IOTest\\lbw.txt";
public static void testWrite_Buffer(String content) {
FileOutputStream outputStream = null;
OutputStreamWriter writer = null;
BufferedWriter out = null;
try {
long start = System.currentTimeMillis();
String path = FILE_PATH;
outputStream = new FileOutputStream(path);
writer = new OutputStreamWriter(outputStream, "GBK");
out = new BufferedWriter(writer);
for (int i = 0; i < WRITE_COUNT; i++) {
out.write(content);
}
System.err.println("buffer->time:" + (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
} finally {
close(out, writer, outputStream);
}
}
運行結(jié)果大致在57-60ms左右
然后是nio寫操作:
private static final String CONTENT = "lu ben wei niu bi, stand up all! play game must be Laughing,play nm!\r\n";
private final static int WRITE_COUNT = 100000;
private final static String FILE_PATH = "E:\\IOTest\\lbw.txt";
public static void testNio_write(String content) {
FileChannel channel = null;
FileOutputStream outputStream = null;
try {
long start = System.currentTimeMillis();
String path = FILE_PATH;
outputStream = new FileOutputStream(path);
channel = outputStream.getChannel();
ByteBuffer buffer = ByteBuffer.allocateDirect(content.getBytes().length * WRITE_COUNT);
for (int i = 0; i < WRITE_COUNT; i++) {
buffer.put(content.getBytes());
}
buffer.flip();
channel.write(buffer);
System.err.println("channel->time:" + (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
} finally {
close(channel, outputStream);
}
}
運行結(jié)果大致在45-49ms左右。。
最后用mmap(內(nèi)存映射機制)測試:
private static final String CONTENT = "lu ben wei niu bi, stand up all! play game must be Laughing,play nm!\r\n";
private final static int WRITE_COUNT = 100000;
private final static String FILE_PATH = "E:\\IOTest\\lbw.txt";
public static void testRandomAccessFile_write(String content) {
FileChannel channel = null;
try {
long start = System.currentTimeMillis();
String path = FILE_PATH;
RandomAccessFile accessFile = new RandomAccessFile(path, "rw");
channel = accessFile.getChannel();
long offset = 0;
MappedByteBuffer map;
byte[] bytes = content.getBytes();
map = channel.map(FileChannel.MapMode.READ_WRITE, offset, bytes.length * WRITE_COUNT);
for (int i = 0; i < WRITE_COUNT; i++) {
map.put(bytes);
}
System.err.println("mmap->time:" + (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
} finally {
close(channel);
}
}
運行結(jié)果大致是15-19ms Oh!!!
mmap比傳統(tǒng)io快了一倍多逗堵!NIO提升不明顯,快了20%左右
完整代碼:
package rpf.study.excel.nio;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.util.Objects;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.function.BiConsumer;
/**
* @program: java-excel
* @description:
* @author: ranpengfeng
* @create: 2019-11-15 11:21
*/
public class Test {
private static final String CONTENT = "lu ben wei niu bi, stand up all! play game must be Laughing,play nm!\r\n";
private final static int WRITE_COUNT = 100000;
/**
* 測試路徑
*/
private final static String FILE_PATH = "E:\\IOTest\\lbw.txt";
public static void main(String[] args) {
ExecutorService executorService = Executors.newFixedThreadPool(3);
executorService.submit(() -> {
//testNio_write(CONTENT);
});
executorService.submit(() -> {
//testWrite_Buffer(CONTENT);
});
executorService.submit(() -> {
testRandomAccessFile_write(CONTENT);
});
executorService.shutdown();
}
public static void testWrite_Buffer(String content) {
FileOutputStream outputStream = null;
OutputStreamWriter writer = null;
BufferedWriter out = null;
try {
long start = System.currentTimeMillis();
String path = FILE_PATH;
outputStream = new FileOutputStream(path);
writer = new OutputStreamWriter(outputStream, "GBK");
out = new BufferedWriter(writer);
for (int i = 0; i < WRITE_COUNT; i++) {
out.write(content);
}
System.err.println("buffer->time:" + (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
} finally {
close(out, writer, outputStream);
}
}
public static void testRandomAccessFile_write(String content) {
FileChannel channel = null;
try {
long start = System.currentTimeMillis();
String path = FILE_PATH;
RandomAccessFile accessFile = new RandomAccessFile(path, "rw");
channel = accessFile.getChannel();
long offset = 0;
MappedByteBuffer map;
byte[] bytes = content.getBytes();
map = channel.map(FileChannel.MapMode.READ_WRITE, offset, bytes.length * WRITE_COUNT);
for (int i = 0; i < WRITE_COUNT; i++) {
map.put(bytes);
}
System.err.println("mmap->time:" + (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
} finally {
close(channel);
}
}
public static void testNio_write(String content) {
FileChannel channel = null;
FileOutputStream outputStream = null;
try {
long start = System.currentTimeMillis();
String path = FILE_PATH;
outputStream = new FileOutputStream(path);
channel = outputStream.getChannel();
ByteBuffer buffer = ByteBuffer.allocateDirect(content.getBytes().length * WRITE_COUNT);
for (int i = 0; i < WRITE_COUNT; i++) {
buffer.put(content.getBytes());
}
buffer.flip();
channel.write(buffer);
System.err.println("channel->time:" + (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
} finally {
close(channel, outputStream);
}
}
/**
* 可變參數(shù)關閉closeable實現(xiàn)
*
* @param readers
*/
private static void close(Closeable... readers) {
try {
for (Closeable reader : readers) {
if (Objects.nonNull(reader)) {
reader.close();
}
}
} catch (Exception e) {
e.printStackTrace();
System.err.println("流關閉失敗");
}
}
}
讀取測試
讀取還是和IOBuffer(BufferedReader
)一行亚斋,一行一行讀攘滩,最終統(tǒng)計總共耗時。BufferedReader
有自己的解碼器(字節(jié)碼轉(zhuǎn)字符)漂问,這里需要創(chuàng)建一個NIO的節(jié)碼器,以及按行讀取的function蚤假。
思路:首先是以換行符結(jié)尾(\r
,\n
)作為完整一行的標志。解析時磷仰,以8k大小作為一個解析塊,因為考慮到文件大小伺通,內(nèi)存占用關系,以一個固定大小的ByteBuffer
去循環(huán)的獲取文件內(nèi)容罐监。由于無法每次都保證8K解析塊的末尾剛好是某一行的結(jié)束位置拼苍,因此需要將上一次不完整行的內(nèi)容保存到StringBuffer
中笑诅。圖形話來說就是:
這個時候需要把byteBuffer中剩余的半行內(nèi)容編碼成字符串放到StringBuffer疮鲫,再解析下一行時候,就在StringBuffer后面追加既是完整一行俊犯。
其次是漢字問題,由于一個漢字在gbk編碼下占用兩個字節(jié)者祖,所以有可能出現(xiàn)ByteBuffer末尾包含一個漢字的其中一個字節(jié),而下一次讀取的時候ByteBuffer頭部則是漢字的另一個字節(jié)七问,這樣會導致兩行內(nèi)容編碼失敗茫舶,圖形解釋:
解決辦法就是,在節(jié)碼失敗后,將失敗的漢字編碼放到leftBuffer中去暫存起來讥耗,下一次直接將兩部分字節(jié)碼連接在一起編碼
首先是傳統(tǒng)IO代碼:
private final static String FILE_PATH = "E:\\IOTest\\sx.txt";
public static void testBuffer(BiConsumer<Integer, String> consumer) {
InputStreamReader reader = null;
BufferedReader in = null;
try {
long start = System.currentTimeMillis();
String path = FILE_PATH;
reader = new InputStreamReader(new FileInputStream(path), "GBK");
in = new BufferedReader(reader);
String line;
int lineCount = 0;
while ((line = in.readLine()) != null) {
lineCount++;
consumer.accept(lineCount, line);
}
System.out.println("buffer->lineCount: " + lineCount);
System.err.println("buffer->time:" + (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
} finally {
close(reader, in);
}
}
由于讀相互不影響,因此可以一起運行蔼卡。
NIO代碼:
/**
*解碼器配置
*/
private static Charset cs = Charset.forName("GBK");
private static CharsetDecoder charsetDecoder = cs.newDecoder();
/**
* 測試路徑
*/
private final static String FILE_PATH = "E:\\IOTest\\sx.txt";
public static void testNio(BiConsumer<Integer, String> consumer) {
FileChannel channel = null;
FileInputStream fileInputStream = null;
try {
long start = System.currentTimeMillis();
String path = FILE_PATH;
fileInputStream = new FileInputStream(path);
channel = fileInputStream.getChannel();
int capacity = 1024 * 8;
//直接獲取通道中的字節(jié)
ByteBuffer buffer = ByteBuffer.allocateDirect(capacity);
//暫存節(jié)碼出現(xiàn)問題的字節(jié)挣磨,通常由于漢字的2字節(jié)存在不同的批次的buffer中引起
ByteBuffer leftBuffer = ByteBuffer.allocateDirect(capacity * 2);
int lineCount = 0;
CharBuffer cb = CharBuffer.allocate(buffer.capacity());
//連接buffer中末尾不足一行的內(nèi)容,緩存于此
StringBuffer attach = new StringBuffer();
while (channel.read(buffer) != -1) {
buffer.flip();
//解碼字節(jié)
decodeBytes(buffer, leftBuffer, cb);
char[] chars = cb.array();
int pChar = 0;
String line;
int min = Math.min(cb.position(), chars.length);
for (int i = 0; i < min; i++) {
char c = chars[i];
if (c == '\n' || c == '\r') {
if (pChar < i || attach.length() > 0) {
if (attach.length() == 0) {
line = new String(chars, pChar, i - pChar);
} else {
attach.append(chars, pChar, i - pChar);
line = attach.toString();
attach.delete(0, attach.length());
}
lineCount++;
consumer.accept(lineCount, line);
pChar = i + 1;
continue;
}
if (c == '\r') {
lineCount++;
consumer.accept(lineCount, "");
pChar = i + 1;
}
}
}
//將不是完整一行數(shù)據(jù)的內(nèi)容放入attach
if (pChar < min) {
attach.append(chars, pChar, min - pChar);
}
buffer.clear();
}
System.out.println("channel->lineCount:" + lineCount);
System.err.println("channel->time:" + (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
} finally {
close(channel, fileInputStream);
}
}
/**
* 解碼字節(jié)->字符
* @param buffer
* @param leftBuffer
* @param cb
*/
private static void decodeBytes(ByteBuffer buffer, ByteBuffer leftBuffer, CharBuffer cb) {
int capacity = buffer.capacity();
cb.clear();
//假定剛好不存在漢字字節(jié)被斷開的問題喝峦,則直接解析呜达,大多數(shù)情況
CoderResult decode = charsetDecoder.decode(buffer, cb, true);
//處理遇到漢字字節(jié)被斷開的解析格式問題
while (decode.isMalformed()) {
//查詢leftBuffer是否有上次未處理的字節(jié)碼
if (leftBuffer.position() == 0) {
int position = buffer.position();
//將本次出現(xiàn)解碼錯誤位置的字節(jié)放到leftBuffer中
for (; position < capacity; position++) {
leftBuffer.put(buffer.get(position));
}
//將limit移動到解碼出錯的位置
buffer.limit(buffer.position());
//重新解碼
decode = charsetDecoder.decode(buffer, cb, true);
} else {
//將解碼錯誤的字節(jié)append到leftBuffer的后面,構造成完整的漢字解碼
buffer.position(0);
cb.position(0);
leftBuffer.put(buffer);
//寫模式切換讀模式
leftBuffer.flip();
decode = charsetDecoder.decode(leftBuffer, cb, true);
int position = leftBuffer.position();
int limit = leftBuffer.limit();
leftBuffer.clear();
//如果末尾再次遇到漢字解碼問題查近,則將出現(xiàn)問題的字節(jié)同理移動到leftBuffer開頭位置,并結(jié)束循環(huán)霜威,下次read(buffer)的時候再處理
if (decode.isMalformed()) {
for (; position < limit; position++) {
leftBuffer.put(leftBuffer.get(position));
}
break;
}
}
}
}
最后是mmap的代碼:
/**
*解碼器配置
*/
private static Charset cs = Charset.forName("GBK");
private static CharsetDecoder charsetDecoder = cs.newDecoder();
/**
* 測試路徑
*/
private final static String FILE_PATH = "E:\\IOTest\\sx.txt";
public static void testRandomAccessFile(BiConsumer<Integer, String> consumer) {
FileChannel channel = null;
try {
long start = System.currentTimeMillis();
String path = FILE_PATH;
RandomAccessFile accessFile = new RandomAccessFile(path, "r");
channel = accessFile.getChannel();
//偏移位置
long offset = 0;
//映射mmap容量
int max = 1024 * 1000;
//總字節(jié)大小
long len = channel.size();
int lineCount = 0;
MappedByteBuffer map;
CharBuffer cb;
ByteBuffer leftBuffer = ByteBuffer.allocateDirect(max * 2);
//連接buffer中末尾不足一行的內(nèi)容谈喳,緩存于此
StringBuffer attach = new StringBuffer();
while (offset <= len) {
if (offset + max > len) {
map = channel.map(FileChannel.MapMode.READ_ONLY, offset, len - offset);
offset = len + 1;
} else {
map = channel.map(FileChannel.MapMode.READ_ONLY, offset, max);
offset += max;
}
cb = CharBuffer.allocate(map.capacity());
String line;
decodeBytes(map,leftBuffer,cb);
char[] chars = cb.array();
int pChar = 0;
int min = Math.min(cb.position(), chars.length);
for (int i = 0; i < min; i++) {
char c = chars[i];
if (c == '\n' || c == '\r') {
if (pChar < i || attach.length() > 0) {
if (attach.length() == 0) {
line = new String(chars, pChar, i - pChar);
} else {
attach.append(chars, pChar, i - pChar);
line = attach.toString();
attach.delete(0, attach.length());
}
lineCount++;
consumer.accept(lineCount, line);
pChar = i + 1;
continue;
}
if (c == '\r') {
lineCount++;
consumer.accept(lineCount, "");
pChar = i + 1;
}
}
}
//將不是完整一行數(shù)據(jù)的內(nèi)容放入attach
if (pChar < min) {
attach.append(chars, pChar, min - pChar);
}
}
System.out.println("mmap->lineCount:" + lineCount);
System.err.println("mmap->time:" + (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
} finally {
close(channel);
}
}
/**
* 解碼字節(jié)->字符
* @param buffer
* @param leftBuffer
* @param cb
*/
private static void decodeBytes(ByteBuffer buffer, ByteBuffer leftBuffer, CharBuffer cb) {
int capacity = buffer.capacity();
cb.clear();
//假定剛好不存在漢字字節(jié)被斷開的問題,則直接解析戈泼,大多數(shù)情況
CoderResult decode = charsetDecoder.decode(buffer, cb, true);
//處理遇到漢字字節(jié)被斷開的解析格式問題
while (decode.isMalformed()) {
//查詢leftBuffer是否有上次未處理的字節(jié)碼
if (leftBuffer.position() == 0) {
int position = buffer.position();
//將本次出現(xiàn)解碼錯誤位置的字節(jié)放到leftBuffer中
for (; position < capacity; position++) {
leftBuffer.put(buffer.get(position));
}
//將limit移動到解碼出錯的位置
buffer.limit(buffer.position());
//重新解碼
decode = charsetDecoder.decode(buffer, cb, true);
} else {
//將解碼錯誤的字節(jié)append到leftBuffer的后面婿禽,構造成完整的漢字解碼
buffer.position(0);
cb.position(0);
leftBuffer.put(buffer);
//寫模式切換讀模式
leftBuffer.flip();
decode = charsetDecoder.decode(leftBuffer, cb, true);
int position = leftBuffer.position();
int limit = leftBuffer.limit();
leftBuffer.clear();
//如果末尾再次遇到漢字解碼問題,則將出現(xiàn)問題的字節(jié)同理移動到leftBuffer開頭位置大猛,并結(jié)束循環(huán)扭倾,下次read(buffer)的時候再處理
if (decode.isMalformed()) {
for (; position < limit; position++) {
leftBuffer.put(leftBuffer.get(position));
}
break;
}
}
}
最后測試入口:
public static void main(String[] args) {
BiConsumer<Integer, String> consumer = (i, line) -> {
if (i >= 1000 && i <= 2000) {
System.out.println("[" + i + "]:" + line);
}
};
ExecutorService executorService = Executors.newFixedThreadPool(3);
executorService.submit(() -> {
testNio(consumer);
});
executorService.submit(() -> {
testBuffer(consumer);
});
executorService.submit(() -> {
testRandomAccessFile(consumer);
});
executorService.shutdown();
}
結(jié)果相當?shù)囊馔?
第一次:
buffer->lineCount: 572585
buffer->time:513
channel->lineCount:572585
channel->time:902
mmap->time:1069
mmap->lineCount:572585
第二次:
buffer->lineCount: 572585
buffer->time:589
mmap->lineCount:572585
mmap->time:926
channel->lineCount:572585
channel->time:1025
第三次:
buffer->lineCount: 572585
buffer->time:505
mmap->lineCount:572585
channel->lineCount:572585
mmap->time:773
channel->time:776
第四次:
buffer->lineCount: 572585
buffer->time:651
channel->time:1073
channel->lineCount:572585
mmap->lineCount:572585
mmap->time:1177
結(jié)果:完斖旒ā膛壹!讀測試的代碼我修改了很多次,最終還是沒辦法優(yōu)化的和傳統(tǒng)IO相當唉堪,BufferReader牛逼模聋!
BufferReader獲取字節(jié)數(shù)組的方法還是一個native的原生方法,并沒有看到有什么獨特的優(yōu)化的地方唠亚。链方。