image.png
package com.zt.haide.util.easyWord;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.*;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.usermodel.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class ReadWordTable {
public static void main(String[] args) {
String path = "C:\\Users\\Administrator\\Desktop\\lzq1.doc";
List<List<String>> lists = readWord(path);
lists.forEach(list->{
System.out.println("-------");
list.forEach(str-> System.out.println(str));});
}
/**
* 讀word的表格
* @param path
* @return
*/
public static List<List<String>> readWord(String path){
List<List<String>> list = new ArrayList<>();
try {
File file = new File(path);
HashMap<String, String> map = new HashMap<>();
map.put("str1", "");
map.put("str2", "");
map.put("str3", "");
map.put("str4", "");
InputStream is = new FileInputStream(file);
if(path.toLowerCase().endsWith("docx")) {
XWPFDocument docx = new XWPFDocument(is);
List<XWPFTable> tables = docx.getTables();
//遍歷word內的表格
for (int i = 0; i < 1; i++) {
XWPFTable table = tables.get(i);
//讀取每一行數(shù)據(jù)
for (int j = 0; j < table.getNumberOfRows(); j++) {
if (j >= 1) {
List<String> stringList = new ArrayList<>();
//讀取表格的某一行的所有數(shù)據(jù)
XWPFTableRow row = table.getRow(j);
//讀取每一列的數(shù)據(jù)
List<XWPFTableCell> cells = row.getTableCells();
String text = "";
for (int k = 0; k < 4; k++) {
XWPFTableCell cell = cells.get(k);
List<XWPFParagraph> paragraphs1 = cell.getParagraphs();
text = "";
if (paragraphs1.size() <= 1) {
//一格一條數(shù)據(jù)
text = cell.getText();
} else if (paragraphs1.size() > 1) {
//一格多條數(shù)據(jù)
//表格內含有多個人的時候處理
text = "";
for (int h = 0; h < paragraphs1.size(); h++) {
XWPFParagraph xwpfParagraph = paragraphs1.get(h);
List<XWPFRun> runs = xwpfParagraph.getRuns();
// if (h > 0){
// text += ",";
// }
if (runs.size() > 1) {
for (int b = 0; b < runs.size(); b++) {
XWPFRun xwpfRun = runs.get(b);
text += xwpfRun.getText(0);
}
} else {
for (XWPFRun run : runs) {
text += run.getText(0);
}
}
}
}
if (("").equals(text)) {
String s = map.get("str" + k);
text = s;
}
map.put("str" + k, text);
stringList.add(text);
}
list.add(stringList);
}
}
}
}else {
// 處理doc格式 即office2003版本
POIFSFileSystem pfs = new POIFSFileSystem(is);
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = hwpf.getRange();//得到文檔的讀取范圍
TableIterator it = new TableIterator(range);
// 迭代文檔中的表格
// 如果有多個表格只讀取需要的一個 set是設置需要讀取的第幾個表格量承,total是文件中表格的總數(shù)
int set = 1, total = 4;
int num = set;
for (int i = 0; i < set - 1; i++) {
it.hasNext();
it.next();
}
while (it.hasNext()) {
Table tb = (Table) it.next();
//迭代行膜宋,默認從0開始,可以依據(jù)需要設置i的值,改變起始行數(shù)幌蚊,也可設置讀取到那行,只需修改循環(huán)的判斷條件即可
for (int i = 1; i < tb.numRows(); i++) {
ArrayList<String> strings = new ArrayList<>();
TableRow tr = tb.getRow(i);
//迭代列忽冻,默認從0開始
for (int j = 0; j < 4; j++) {
TableCell td = tr.getCell(j);//取得單元格
//取得單元格的內容
String text="";
for (int k = 0; k < td.numParagraphs(); k++) {
Paragraph para = td.getParagraph(k);
String s = para.text();
//去除后面的特殊符號
if (null != s && !"".equals(s)) {
s = s.substring(0, s.length() - 1);
}
text+=s;
}
if (("").equals(text)) {
String s = map.get("str" + j);
text = s;
}
map.put("str" + j, text);
strings.add(text);
}
list.add(strings);
}
// 過濾多余的表格
while (num < total) {
it.hasNext();
it.next();
num += 1;
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
/**
* 獲取文檔中備注(讀取Word段落的內容)
* @param paragraphs
* @return
*/
public static String getNotes(List<XWPFParagraph> paragraphs){
XWPFParagraph para = paragraphs.get(3);
String notes = "";
List<XWPFRun> runList = para.getRuns();
if (runList.size() > 0) {
for (XWPFRun r : runList) {
notes += r.getText(0);
}
System.out.println("備注*:" + notes);
}else {
System.out.println("備注*:" + notes);
}
return notes;
}
}