文章概覽
- 引入POI類庫及注意事項
- 多個Word文檔合并
- 替換文檔中的占位符,包含段落占位符雨让、表格占位符
- 富文本插入到Word及注意事項
- 給Word生成水印
- 傳送門
- 鳴謝
引入POI類庫及注意事項
Java操作Word用到的工具類庫是基于POI4.1.0版本的雇盖,poi官方API,可以使用Google自帶的全文翻譯栖忠,很方便崔挖。注意文章中操作的Word都是docx后綴的,即Word2007版本庵寞,如果需要操作Word2003版本還需自行轉(zhuǎn)換狸相。
后續(xù)將更新從Excel讀取表格數(shù)據(jù)寫入到Word,從另一個Word讀取模板表格到當前Word捐川,項目代碼中每一個功能都提供了test類脓鹃,你需要拉下代碼修改文件目錄即可執(zhí)行,一步到位古沥。
下面開始進入主題瘸右,文章中只貼關(guān)鍵代碼,全部代碼請通過傳送門去GitHub拉取渐白,如果感覺對你有幫助請在GitHub上點亮你尊貴的小星星尊浓,碼磚不易,轉(zhuǎn)載請說明出處纯衍,謝謝栋齿。
pox.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.corey</groupId>
<artifactId>wordtools</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<!-- !! POI依賴包 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.0</version>
</dependency>
<!-- POI依賴包 !!-->
<!--out net begin -->
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j</artifactId>
<version>3.3.6</version>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-ImportXHTML</artifactId>
<version>3.3.6</version>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-export-fo</artifactId>
<version>3.3.6</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.2</version>
</dependency>
<!--out net end -->
<!-- https://mvnrepository.com/artifact/org.springframework/spring-core -->
<!--只是使用到用spring的工具類-->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
<version>5.2.1.RELEASE</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/javax.servlet/javax.servlet-api -->
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
<version>4.0.1</version>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
多個Word文檔合并
POI合并文檔的基本思路,Word本身是一個xml文件,通過把不同xml的Xmlns去重合并瓦堵,添加固定的格式標簽基协,然后把不同xml里面的元素都拼接到一起,組成一個新的xml文件菇用,輸出成為一個新的Word澜驮。更多代碼請查看項目的magerword目錄。
package magerword;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.xmlbeans.XmlOptions;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
import org.springframework.util.ObjectUtils;
import java.io.*;
import java.util.*;
/**
* @program: 合并多份word文件
* @description:
* @author: corey
* @create: 2020-04-29 19:04
**/
public class MagerUtil {
/**
* 合并多個Word
* @param filepaths
* @throws Exception
*/
public static void mergeDoc(String... filepaths) throws Exception {
// 需要配置導(dǎo)出文件路徑 記得替換為自己電腦的路徑
OutputStream dest = new FileOutputStream("/Users/corey/Desktop/temp/wordtools/合并文檔3.docx");
List<CTBody> ctBodyList = new ArrayList<>();
List<XWPFDocument> srcDocuments = new ArrayList<>();
for (String filepath : filepaths) {
InputStream in = null;
OPCPackage srcPackage = null;
try {
in = new FileInputStream(filepath);
srcPackage = OPCPackage.open(in);
} catch (Exception e) {
e.printStackTrace();
} finally {
closeStream(in);
}
XWPFDocument srcDocument = new XWPFDocument(srcPackage);
CTBody srcBody = srcDocument.getDocument().getBody();
ctBodyList.add(srcBody);
srcDocuments.add(srcDocument);
}
if (!ObjectUtils.isEmpty(ctBodyList)) {
appendBody(ctBodyList);
srcDocuments.get(0).write(dest);
}
}
/**
* 拼接所有的文檔元素
* @param ctBodyList
* @throws Exception
*/
private static void appendBody(List<CTBody> ctBodyList) throws Exception {
XmlOptions optionsOuter = new XmlOptions();
optionsOuter.setSaveOuter();
// 所有的xmlns
StringBuffer allAmlns = new StringBuffer();
// 所有文檔的內(nèi)部元素
StringBuffer allElement = new StringBuffer();
ctBodyList.forEach(ct -> {
// 拿到每一個文檔的完整xml
String appentString = ct.xmlText();
// 拼接所有的xmlns
allAmlns.append(appentString.substring(appentString.indexOf("xmlns"), appentString.indexOf(">")));
// 拼接所有的內(nèi)部元素
allElement.append(appentString.substring(appentString.indexOf(">") + 1, appentString.lastIndexOf("</")));
});
// 將xmlns去重
String distinctPrefix = distinctXmlns(allAmlns.toString());
// 合并文檔
CTBody makeBody = CTBody.Factory.parse(distinctPrefix + allElement.toString() + "</xml-fragment>");
ctBodyList.get(0).set(makeBody);
}
/**
* 去重合并xml的Xmlns
*
* @param prefix
* @return
*/
public static String distinctXmlns(String prefix) {
int start = prefix.indexOf("xmlns");
int end = prefix.indexOf("xmlns", start + 1);
Set s = new HashSet();
while (end > 0) {
s.add(prefix.substring(start, end));
start = end;
end = prefix.indexOf("xmlns", start + 1);
}
String xmlHead = "<xml-fragment ";
StringBuffer sb = new StringBuffer(xmlHead);
Map<String, String> map = distinctXmlns(s);
for (Map.Entry<String, String> entry : map.entrySet()) {
sb.append(" ");
sb.append(entry.getKey());
sb.append("=");
sb.append(entry.getValue());
}
sb.append(">");
return sb.toString();
}
/**
* xmlns 可能存在xmlns頭相同但是指向地址不同的情況
*
* @param set
* @return
*/
public static Map<String, String> distinctXmlns(Set set) {
Map<String, String> map = new HashMap();
Iterator i = set.iterator();
while (i.hasNext()) {
String xmlns = (String) i.next();
map.put(xmlns.substring(0, xmlns.indexOf("=")), xmlns.substring(xmlns.indexOf("=") + 1));
}
return map;
}
/**
* 關(guān)閉流
* 這一步可以放到公用工具類中惋鸥,close的類型可以使用Closeable杂穷,這樣就可以關(guān)閉input和output的流
* @param inputStream
*/
public static void closeStream(InputStream... inputStream) {
for (InputStream i : inputStream) {
if (i != null) {
try {
i.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
<a name='3'>替換文檔中的占位符,包含段落占位符卦绣、表格占位符
替換占位符的思路耐量,首先需要遍歷文檔中所有的段落和表格,再去一個個匹配占位符與你需要替換的參數(shù)滤港,Word中段落是XWPFParagraph對象廊蜒,表格是XWPFTable對象。更多代碼請查看項目的replacemark目錄溅漾。
package replacemark;
import org.apache.poi.xwpf.usermodel.*;
import org.springframework.util.StringUtils;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 替換文檔中的段落和表格占位符
* @author corey
* @version 1.0
* @date 2020/5/9 9:14 上午
*/
public class ReplaceUtil {
/**
* 替換段落中的占位符
* @param doc 需要替換的文檔
* @param params 替換的參數(shù)山叮,key=占位符,value=實際值
*/
public static void replaceInPara(XWPFDocument doc, Map<String,Object> params) {
Iterator<XWPFParagraph> iterator = doc.getParagraphsIterator();
XWPFParagraph para;
while (iterator.hasNext()) {
para = iterator.next();
if(!StringUtils.isEmpty(para.getParagraphText())){
replaceInPara(para, params);
}
}
}
/**
* 替換段落中的占位符
* @param para
*/
public static void replaceInPara(XWPFParagraph para, Map<String,Object> params) {
// 獲取當前段落的文本
String sourceText = para.getParagraphText();
// 控制變量
boolean replace = false;
for (Map.Entry<String, Object> entry : params.entrySet()) {
String key = entry.getKey();
if(sourceText.indexOf(key)!=-1){
Object value = entry.getValue();
if(value instanceof String){
// 替換文本占位符
sourceText = sourceText.replace(key, value.toString());
replace = true;
}
}
}
if(replace){
// 獲取段落中的行數(shù)
List<XWPFRun> runList = para.getRuns();
for (int i=runList.size();i>=0;i--){
// 刪除之前的行
para.removeRun(i);
}
// 創(chuàng)建一個新的文本并設(shè)置為替換后的值 這樣操作之后之前文本的樣式就沒有了添履,待改進
para.createRun().setText(sourceText);
}
}
/**
* 替換表格中的占位符
* @param doc
* @param params
*/
public static void replaceTable(XWPFDocument doc,Map<String,Object> params){
// 獲取文檔中所有的表格
Iterator<XWPFTable> iterator = doc.getTablesIterator();
XWPFTable table;
List<XWPFTableRow> rows;
List<XWPFTableCell> cells;
List<XWPFParagraph> paras;
while (iterator.hasNext()) {
table = iterator.next();
if (table.getRows().size() > 1) {
//判斷表格是需要替換還是需要插入屁倔,判斷邏輯有${為替換,
if (matcher(table.getText()).find()) {
rows = table.getRows();
for (XWPFTableRow row : rows) {
cells = row.getTableCells();
for (XWPFTableCell cell : cells) {
paras = cell.getParagraphs();
for (XWPFParagraph para : paras) {
replaceInPara(para, params);
}
}
}
}
}
}
}
/**
* 正則匹配字符串
*
* @param str
* @return
*/
private static Matcher matcher(String str) {
Pattern pattern = Pattern.compile("\\$\\{(.+?)\\}", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(str);
return matcher;
}
}
富文本轉(zhuǎn)Word及注意事項
富文本轉(zhuǎn)成Word的思路暮胧,富文本本身就是一段HTML字符串汰现,可以直接把這段字符串當做一個段落寫入到Word中,但這樣會丟失HTML樣式叔壤,所以需要將識別到的HTML標簽替換成Word標簽,這也是難點所在口叙,所以需要設(shè)計一個大而全的樣式替換工具炼绘,目前筆者的項目中只做H1\H2\H3\段落\表格\img的src是url的圖片轉(zhuǎn)換(base64流放在富文本中太大了,不易識別)妄田,再提一句這些替換的工具可以設(shè)計為責(zé)任鏈模式俺亮,筆者也還沒有這樣做。更多代碼在項目的insertword目錄疟呐。
package insertword;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.xmlbeans.XmlCursor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.util.ObjectUtils;
import org.springframework.util.StringUtils;
import java.io.*;
/**
* Html工具類
* @author corey
* @version 1.0
* @date 2020/5/5 9:36 下午
*/
public class HtmlUtil {
/**
* 給document添加指定元素
* @param document
*/
public static void addElement(Document document){
if(ObjectUtils.isEmpty(document)){
throw new NullPointerException("不允許為空的對象添加元素");
}
Elements elements = document.getAllElements();
for(Element e:elements){
String attrName = ElementEnum.getValueByCode(e.tag().getName());
if(!StringUtils.isEmpty(attrName)) {
e.attr(CommonConStant.COMMONATTR, attrName);
}
}
}
/**
* 將富文本內(nèi)容寫入到Word
* 因富文本樣式種類繁多脚曾,不能一一枚舉,目前實現(xiàn)了H1启具、H2本讥、H3、段落、圖片拷沸、表格枚舉
* @param ritchText 富文本內(nèi)容
* @param doc 需要寫入富文本內(nèi)容的Word 寫入圖片和表格需要用到
* @param paragraph
*/
public static void resolveHtml(String ritchText, XWPFDocument doc, XWPFParagraph paragraph){
Document document = Jsoup.parseBodyFragment(ritchText, "UTF-8");
try {
// 添加固定元素
HtmlUtil.addElement(document);
Elements elements = document.select("["+CommonConStant.COMMONATTR+"]");
for (Element em : elements) {
XmlCursor xmlCursor = paragraph.getCTP().newCursor();
switch (em.attr(CommonConStant.COMMONATTR)) {
case "title":
break;
case "subtitle":
break;
case "imgurl":
String url = em.attr("src");
InputStream inputStream = new FileInputStream(url);
XWPFParagraph imgurlparagraph = doc.insertNewParagraph(xmlCursor);
//居中
ParagraphStyleUtil.setImageCenter(imgurlparagraph);
imgurlparagraph.createRun().addPicture(inputStream,XWPFDocument.PICTURE_TYPE_PNG,"圖片.jpeg", Units.toEMU(200),Units.toEMU(200));
closeStream(inputStream);
break;
case "imgbase64":
break;
case "table":
XWPFTable xwpfTable = doc.insertNewTbl(xmlCursor);
addTable(xwpfTable,em);
// 設(shè)置表格居中
ParagraphStyleUtil.setTableLocation(xwpfTable,"center");
// 設(shè)置內(nèi)容居中
ParagraphStyleUtil.setCellLocation(xwpfTable,"CENTER","center");
break;
case "h1":
XWPFParagraph h1paragraph1 = doc.insertNewParagraph(xmlCursor);
XWPFRun xwpfRun_1 = h1paragraph1.createRun();
xwpfRun_1.setText(em.text());
// 設(shè)置字體
ParagraphStyleUtil.setTitle(xwpfRun_1, TitleFontEnum.H1.getTitle());
break;
case "h2":
XWPFParagraph h2paragraph = doc.insertNewParagraph(xmlCursor);
XWPFRun xwpfRun_2 = h2paragraph.createRun();
xwpfRun_2.setText(em.text());
// 設(shè)置字體
ParagraphStyleUtil.setTitle(xwpfRun_2, TitleFontEnum.H2.getTitle());
break;
case "h3":
XWPFParagraph h3paragraph = doc.insertNewParagraph(xmlCursor);
XWPFRun xwpfRun_3 = h3paragraph.createRun();
xwpfRun_3.setText(em.text());
// 設(shè)置字體
ParagraphStyleUtil.setTitle(xwpfRun_3, TitleFontEnum.H3.getTitle());
break;
case "paragraph":
XWPFParagraph paragraphd = doc.insertNewParagraph(xmlCursor);
// 設(shè)置段落縮進 4個空格
paragraphd.createRun().setText(" "+em.text());
break;
default:
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 讀取txt文件的內(nèi)容
*
* @param file 想要讀取的文件對象
* @return 返回文件內(nèi)容
*/
public static String txt2String(File file) {
StringBuilder result = new StringBuilder();
try {
BufferedReader br = new BufferedReader(new FileReader(file));//構(gòu)造一個BufferedReader類來讀取文件
String s = null;
while ((s = br.readLine()) != null) {//使用readLine方法色查,一次讀一行
result.append(System.lineSeparator() + s);
}
br.close();
} catch (Exception e) {
e.printStackTrace();
}
return result.toString();
}
/**
* 將富文本的表格轉(zhuǎn)換為Word里面的表格
*/
private static void addTable(XWPFTable xwpfTable,Element table) {
Elements trs = table.getElementsByTag("tr");
// XWPFTableRow 第0行特殊處理
int rownum = 0;
for (Element tr : trs) {
addTableTr(xwpfTable,tr,rownum);
rownum++;
}
}
/**
* 將元素里面的tr 提取到 xwpfTabel
*/
private static void addTableTr(XWPFTable xwpfTable,Element tr,int rownum) {
Elements tds = tr.getElementsByTag("th").isEmpty() ? tr.getElementsByTag("td") : tr.getElementsByTag("th");
XWPFTableRow row_1 = null;
for (int i = 0, j = tds.size(); i < j; i++) {
if(0==rownum){
// XWPFTableRow 第0行特殊處理,
XWPFTableRow row_0 = xwpfTable.getRow(0);
if(i==0){
row_0.getCell(0).setText(tds.get(i).text());
}else{
row_0.addNewTableCell().setText(tds.get(i).text());
}
}else{
if(i==0) {
// 換行需要創(chuàng)建一個新行
row_1 = xwpfTable.createRow();
row_1.getCell(i).setText(tds.get(i).text());
}else {
row_1.getCell(i).setText(tds.get(i).text());
}
}
}
}
/**
* 關(guān)閉輸入流
*
* @param closeables
*/
public static void closeStream(Closeable... closeables) {
for (Closeable c: closeables) {
if (c != null) {
try {
c.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
給Word生成水印
Word添加水印的思路昔馋,利用XWPFHeader對象創(chuàng)建頁眉炼杖,給頁眉添加文字嚎杨,設(shè)置字體对碌、大小希坚、顏色乘盼、旋轉(zhuǎn)角度即可头朱。代碼在項目的insertword目錄
package insertword;
import com.microsoft.schemas.office.office.CTLock;
import com.microsoft.schemas.vml.*;
import org.apache.poi.wp.usermodel.HeaderFooterType;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFHeader;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import java.util.stream.Stream;
/**
* @desc 添加水印
* @author corey
* @version 1.0
* @date 2020/5/5 10:07 下午
*/
public class WatermarkUtil {
// word字體
private static final String fontName = "宋體";
// 字體大小
private static final String fontSize = "0.2pt";
// 字體顏色
private static final String fontColor = "#d0d0d0";
// 一個字平均長度勘高,單位pt帝嗡,用于:計算文本占用的長度(文本總個數(shù)*單字長度)
private static final Integer widthPerWord = 10;
// 與頂部的間距
private static Integer styleTop = 0;
// 文本旋轉(zhuǎn)角度
private static final String styleRotation = "45";
/**
* 給文檔添加水印
* 此方法可以單獨使用
* @param doc
* @param customText
*/
public static void waterMarkDocXDocument(XWPFDocument doc,String customText){
// 把整頁都打上水印
for (int lineIndex = -5; lineIndex < 20; lineIndex++) {
styleTop = 100*lineIndex;
waterMarkDocXDocument_0(doc,customText);
}
}
/**
* 為文檔添加水印
* @param doc 需要被處理的docx文檔對象
* @param customText 需要添加的水印文字
*/
public static void waterMarkDocXDocument_0(XWPFDocument doc,String customText) {
// 水印文字之間使用8個空格分隔
customText = customText + repeatString(" ", 8);
// 一行水印重復(fù)水印文字次數(shù)
customText = repeatString(customText, 10);
// 如果之前已經(jīng)創(chuàng)建過 DEFAULT 的Header晶通,將會復(fù)用
XWPFHeader header = doc.createHeader(HeaderFooterType.DEFAULT);
int size = header.getParagraphs().size();
if (size == 0) {
header.createParagraph();
}
CTP ctp = header.getParagraphArray(0).getCTP();
byte[] rsidr = doc.getDocument().getBody().getPArray(0).getRsidR();
byte[] rsidrdefault = doc.getDocument().getBody().getPArray(0).getRsidRDefault();
ctp.setRsidP(rsidr);
ctp.setRsidRDefault(rsidrdefault);
CTPPr ppr = ctp.addNewPPr();
ppr.addNewPStyle().setVal("Header");
// 開始加水印
CTR ctr = ctp.addNewR();
CTRPr ctrpr = ctr.addNewRPr();
ctrpr.addNewNoProof();
CTGroup group = CTGroup.Factory.newInstance();
CTShapetype shapetype = group.addNewShapetype();
CTTextPath shapeTypeTextPath = shapetype.addNewTextpath();
shapeTypeTextPath.setOn(STTrueFalse.T);
shapeTypeTextPath.setFitshape(STTrueFalse.T);
CTLock lock = shapetype.addNewLock();
lock.setExt(STExt.VIEW);
CTShape shape = group.addNewShape();
shape.setId("PowerPlusWaterMarkObject");
shape.setSpid("_x0000_s102");
shape.setType("#_x0000_t136");
// 設(shè)置形狀樣式(旋轉(zhuǎn),位置丈探,相對路徑等參數(shù))
shape.setStyle(getShapeStyle(customText));
shape.setFillcolor(fontColor);
// 字體設(shè)置為實心
shape.setStroked(STTrueFalse.FALSE);
// 繪制文本的路徑
CTTextPath shapeTextPath = shape.addNewTextpath();
// 設(shè)置文本字體與大小
shapeTextPath.setStyle("font-family:" + fontName + ";font-size:" + fontSize);
shapeTextPath.setString(customText);
CTPicture pict = ctr.addNewPict();
pict.set(group);
}
/**
* 構(gòu)建Shape的樣式參數(shù)
* @param customText
* @return
*/
private static String getShapeStyle(String customText) {
StringBuilder sb = new StringBuilder();
// 文本path繪制的定位方式
sb.append("position: ").append("absolute");
// 計算文本占用的長度(文本總個數(shù)*單字長度)
sb.append(";width: ").append(customText.length() * widthPerWord).append("pt");
// 字體高度
sb.append(";height: ").append("20pt");
sb.append(";z-index: ").append("-251654144");
sb.append(";mso-wrap-edited: ").append("f");
// 設(shè)置水印的間隔录择,這是一個大坑,不能用top,必須要margin-top碗降。
sb.append(";margin-top: ").append(styleTop);
sb.append(";mso-position-horizontal-relative: ").append("page");
sb.append(";mso-position-vertical-relative: ").append("page");
sb.append(";mso-position-vertical: ").append("left");
sb.append(";mso-position-horizontal: ").append("center");
sb.append(";rotation: ").append(styleRotation);
return sb.toString();
}
/**
* 將指定的字符串重復(fù)repeats次.
*/
private static String repeatString(String pattern, int repeats) {
StringBuilder buffer = new StringBuilder(pattern.length() * repeats);
Stream.generate(() -> pattern).limit(repeats).forEach(buffer::append);
return new String(buffer);
}
}
傳送門
鳴謝
感謝項目中同事對Word操作提出的改善意見隘竭,讓本代碼得以順利交付運行。感謝所有提供了源代碼的博主讼渊。
感謝各位猿佬百忙之中抽空閱讀动看、點贊、收藏爪幻,記得幫忙在GitHub上點亮你尊貴的小星星哦菱皆。