doc转htmljava,doc转excel
java读取word 转换html然后在转换txt怎么实现
思路是用一些Java能操作word文件的库,例如Apache的POI库,还有jacob库等,
读出word文件的格式和内容,然后根据格式转换为html(该加粗的加粗,该分段的分段),
这个过程中可以把文字内容提取成txt纯文本文件。
java中的jacob将word文档转化为HTML文件问题
配置:
(1)将解压包中的jacob.dll(x86常用,x64)拷到jdk安装目录下的jre\bin文件夹或windows安装路径下的WINDOWS\system32文件夹下
(2)将jacob.jar文件拷到classpath下即可
常见问题解决:
对于”java.lang.UnsatisfiedLinkError:?C:\WINDOWS\system32\jacob-1.14.3-x86.dll:?由于应用程序配置不正确,应用程序未能启动。重新安装应用程序可能会纠正”这个问题,可以通过
重新下载Jacob的jar及dll文件(最好版本比现在的低,如1.11)解决
实例制作(主要功能:标题制作,表格制作,合并表格,替换文本,页眉页脚,书签处理):
import?com.jacob.activeX.ActiveXComponent;
import?com.jacob.com.Dispatch;
import?com.jacob.com.Variant;
public?class?WordOperate?{
?public?static?void?main(String?args[])?{
??ActiveXComponent?wordApp?=?new?ActiveXComponent("Word.Application");?//?启动word
??//?Set?the?visible?property?as?required.
??Dispatch.put(wordApp,?"Visible",?new?Variant(true));//?//设置word可见
??Dispatch?docs?=?wordApp.getProperty("Documents").toDispatch();
??//?String?inFile?=?"d:\\test.doc";
??//?Dispatch?doc?=?Dispatch.invoke(docs,?"Open",?Dispatch.Method,
??//?new?Object[]?{?inFile,?new?Variant(false),?new?Variant(false)},//参数3,false:可写,true:只读
??//?new?int[1]).toDispatch();//打开文档
??Dispatch?document?=?Dispatch.call(docs,?"Add").toDispatch();//?create?new?document
??String?userName?=?wordApp.getPropertyAsString("Username");//?显示用户信息
??System.out.println("用户名:"?+?userName);
??//?文档对齐,字体设置////////////////////////
??Dispatch?selection?=?Dispatch.get(wordApp,?"Selection").toDispatch();
??Dispatch?align?=?Dispatch.get(selection,?"ParagraphFormat")
????.toDispatch();?//?行列格式化需要的对象
??Dispatch?font?=?Dispatch.get(selection,?"Font").toDispatch();?//?字型格式化需要的对象
??//?标题处理////////////////////////
??Dispatch.put(align,?"Alignment",?"1");?//?1:置中?2:靠右?3:靠左
??Dispatch.put(font,?"Bold",?"1");?//?字型租体
??Dispatch.put(font,?"Color",?"1,0,0,0");?//?字型颜色红色
??Dispatch.call(selection,?"TypeText",?"Word文档处理");?//?写入标题内容
??Dispatch.call(selection,?"TypeParagraph");?//?空一行段落
??Dispatch.put(align,?"Alignment",?"3");?//?1:置中?2:靠右?3:靠左
??Dispatch.put(selection,?"Text",?"????????");
??Dispatch.call(selection,?"MoveDown");?//?光标标往下一行
??//表格处理////////////////////////
??Dispatch?tables?=?Dispatch.get(document,?"Tables").toDispatch();
??Dispatch?range?=?Dispatch.get(selection,?"Range").toDispatch();
??Dispatch?table1?=?Dispatch.call(tables,?"Add",?range,?new?Variant(3),
????new?Variant(2),?new?Variant(1)).toDispatch();?//?设置行数,列数,表格外框宽度
??//?所有表格
??Variant?tableAmount?=?Dispatch.get(tables,?"count");
??System.out.println(tableAmount);
??//?要填充的表格
??Dispatch?t1?=?Dispatch.call(tables,?"Item",?new?Variant(1))
????.toDispatch();
??Dispatch?t1_row?=?Dispatch.get(t1,?"rows").toDispatch();//?所有行
??int?t1_rowNum?=?Dispatch.get(t1_row,?"count").getInt();
??Dispatch.call(Dispatch.get(t1,?"columns").toDispatch(),?"AutoFit");//?自动调整
??int?t1_colNum?=?Dispatch.get(Dispatch.get(t1,?"columns").toDispatch(),
????"count").getInt();
??System.out.println(t1_rowNum?+?"?"?+?t1_colNum);
??for?(int?i?=?1;?i?=?t1_rowNum;?i++)?{
???for?(int?j?=?1;?j?=?t1_colNum;?j++)?{
????Dispatch?cell?=?Dispatch.call(t1,?"Cell",?new?Variant(i),
??????new?Variant(j)).toDispatch();//?行,列
????Dispatch.call(cell,?"Select");
????Dispatch.put(selection,?"Text",?"cell"?+?i?+?j);?//?写入word的内容
????Dispatch.put(font,?"Bold",?"0");?//?字型租体(1:租体?0:取消租体)
????Dispatch.put(font,?"Color",?"1,1,1,0");?//?字型颜色
????Dispatch.put(font,?"Italic",?"1");?//?斜体?1:斜体?0:取消斜体
????Dispatch.put(font,?"Underline",?"1");?//?下划线
????Dispatch?Range?=?Dispatch.get(cell,?"Range").toDispatch();
????String?cellContent?=?Dispatch.get(Range,?"Text").toString();
????System.out.println((cellContent.substring(0,?cellContent
??????.length()?-?1)).trim());
???}
???Dispatch.call(selection,?"MoveDown");?//?光标往下一行(才不会输入盖过上一输入位置)
??}
??//合并单元格////////////////////////
??Dispatch.put(selection,?"Text",?"????????");
??Dispatch.call(selection,?"MoveDown");?//?光标标往下一行
??Dispatch?range2?=?Dispatch.get(selection,?"Range").toDispatch();
??Dispatch?table2?=?Dispatch.call(tables,?"Add",?range2,?new?Variant(8),
????new?Variant(4),?new?Variant(1)).toDispatch();?//?设置行数,列数,表格外框宽度
??Dispatch?t2?=?Dispatch.call(tables,?"Item",?new?Variant(2))
????.toDispatch();
??Dispatch?beginCell?=?Dispatch.call(t2,?"Cell",?new?Variant(1),
????new?Variant(1)).toDispatch();
??Dispatch?endCell?=?Dispatch.call(t2,?"Cell",?new?Variant(4),
????new?Variant(4)).toDispatch();
??Dispatch.call(beginCell,?"Merge",?endCell);
??for?(int?row?=?1;?row?=?Dispatch.get(
????Dispatch.get(t2,?"rows").toDispatch(),?"count").getInt();?row++)?{
???for?(int?col?=?1;?col?=?Dispatch.get(
?????Dispatch.get(t2,?"columns").toDispatch(),?"count").getInt();?col++)?{
????if?(row?==?1)?{
?????Dispatch?cell?=?Dispatch.call(t2,?"Cell",?new?Variant(1),
???????new?Variant(1)).toDispatch();//?行,列
?????Dispatch.call(cell,?"Select");
?????Dispatch.put(font,?"Color",?"1,1,1,0");?//?字型颜色
?????Dispatch.put(selection,?"Text",?"merge?Cell!");
????}?else?{
?????Dispatch?cell?=?Dispatch.call(t2,?"Cell",?new?Variant(row),
???????new?Variant(col)).toDispatch();//?行,列
?????Dispatch.call(cell,?"Select");
?????Dispatch.put(font,?"Color",?"1,1,1,0");?//?字型颜色
?????Dispatch.put(selection,?"Text",?"cell"?+?row?+?col);
????}
???}
???Dispatch.call(selection,?"MoveDown");
??}
??//Dispatch.call(selection,?"MoveRight",?new?Variant(1),?new?Variant(1));//?取消选择
??//?Object?content?=?Dispatch.get(doc,"Content").toDispatch();
??//?Word文档内容查找及替换////////////////////////
??Dispatch.call(selection,?"TypeParagraph");?//?空一行段落
??Dispatch.put(align,?"Alignment",?"3");?//?1:置中?2:靠右?3:靠左
??Dispatch.put(font,?"Color",?0);
??Dispatch.put(selection,?"Text",?"欢迎,Hello,world!");
??Dispatch.call(selection,?"HomeKey",?new?Variant(6));//?移到开头
??Dispatch?find?=?Dispatch.call(selection,?"Find").toDispatch();//?获得Find组件
??Dispatch.put(find,?"Text",?"hello");?//?查找字符串"hello"
??Dispatch.put(find,?"Forward",?"True");//?向前查找
??//?Dispatch.put(find,?"Format",?"True");//?设置格式
??Dispatch.put(find,?"MatchCase",?"false");//?大小写匹配
??Dispatch.put(find,?"MatchWholeWord",?"True");?//?全字匹配
??Dispatch.call(find,?"Execute");?//?执行查询
??Dispatch.put(selection,?"Text",?"你好");//?替换为"你好"
??//使用方法传入的参数parameter调用word文档中的MyWordMacro宏//
??//Dispatch.call(document,macroName,parameter);
??//Dispatch.invoke(document,macroName,Dispatch.Method,parameter,new?int[1]);
??//页眉,页脚处理////////////////////////
??Dispatch?ActiveWindow?=?wordApp.getProperty("ActiveWindow")
????.toDispatch();
??Dispatch?ActivePane?=?Dispatch.get(ActiveWindow,?"ActivePane")
????.toDispatch();
??Dispatch?View?=?Dispatch.get(ActivePane,?"View").toDispatch();
??Dispatch.put(View,?"SeekView",?"9");?//9是设置页眉
??Dispatch.put(align,?"Alignment",?"1");?//?置中
??Dispatch.put(selection,?"Text",?"这里是页眉");?//?初始化时间
??Dispatch.put(View,?"SeekView",?"10");?//?10是设置页脚
??Dispatch.put(align,?"Alignment",?"2");?//?靠右
??Dispatch.put(selection,?"Text",?"这里是页脚");?//?初始化从1开始
??//书签处理(打开文档时处理)////////////////////////
??//Dispatch?activeDocument?=?wordApp.getProperty("ActiveDocument").toDispatch();
??Dispatch?bookMarks?=?Dispatch.call(document,?"Bookmarks").toDispatch();
??boolean?isExist?=?Dispatch.call(bookMarks,?"Exists",?"bookMark1")
????.getBoolean();
??if?(isExist?==?true)?{
???Dispatch?rangeItem1?=?Dispatch.call(bookMarks,?"Item",?"bookMark1")
?????.toDispatch();
???Dispatch?range1?=?Dispatch.call(rangeItem1,?"Range").toDispatch();
???Dispatch.put(range1,?"Text",?new?Variant("当前是书签1的文本信息!"));
???String?bookMark1Value?=?Dispatch.get(range1,?"Text").toString();
???System.out.println(bookMark1Value);
??}?else?{
???System.out.println("当前书签不存在,重新建立!");
???Dispatch.call(bookMarks,?"Add",?"bookMark1",?selection);
???Dispatch?rangeItem1?=?Dispatch.call(bookMarks,?"Item",?"bookMark1")
???.toDispatch();
???Dispatch?range1?=?Dispatch.call(rangeItem1,?"Range").toDispatch();
???Dispatch.put(range1,?"Text",?new?Variant("当前是书签1的文本信息!"));
???String?bookMark1Value?=?Dispatch.get(range1,?"Text").toString();
???System.out.println(bookMark1Value);?
??}
??//保存操作////////////////////////
??Dispatch.call(document,?"SaveAs",?"D:/wordOperate.doc");
??//Dispatch.invoke((Dispatch)?doc,?"SaveAs",?Dispatch.Method,?new?Object[]{htmlPath,?new?Variant(8)},?new?int[1]);???//生成html文件
??//?0?=?wdDoNotSaveChanges
??//?-1?=?wdSaveChanges
??//?-2?=?wdPromptToSaveChanges
??//Dispatch.call(document,?"Close",?new?Variant(0));
??//?//?worddoc.olefunction("protect",2,true,"");
??//?//?Dispatch?bookMarks?=?wordApp.call(docs,"Bookmarks").toDispatch();
??//?//?System.out.println("bookmarks"+bookMarks.getProgramId());
??//?//Dispatch.call(doc,?"Save");?//保存
??//?//?Dispatch.call(doc,?"Close",?new?Variant(true));
??//?//wordApp.invoke("Quit",new?Variant[]{});
??//?wordApp.safeRelease();//Finalizers?call?this?method
?}
}
docx转html转pad。java
加载带有DOC或DOCX扩展名的源Word文件将文件另存为输出HTML。
MicrosoftWord文件格式DOC/DOCX很著名,因为文字处理器支持多种功能来组织和解释信息。同样,HTML文件格式有助于在Web应用程序中显示信息。
如果想使用Java将DOCX转换为HTML5可以加载输入的DOCX文件,设置SaveFormat时设置HtmlSaveOptions,设置HtmlVersionHTML5的枚举值,保存输出文件。
java使用jacob将word转换为html,如何设置转换后html的编码格式。我想要utf-8的,不要gb2312。
强制转码~~
line你要转的内容
line=new String(line.getBytes("gb2312"),"utf-8");代码是我凭记忆写的,应该没问题
或者你在写之前。先写一个HTML页面编码的代码 。把页面的格式设置成utf-8
Java程序调用 openoffice,将doc文件转Html文件,但转换完格式都变成居左边?
1、到官网下载Jacob,
2、将压缩包解压后,Jacob.jar添加到Libraries中(先复制到项目目录中,右键单击jar包选择Build?Path—Add?to?Build?Path);
3、将Jacob.dll放至当前项目所用到的“jre\bin”下面(比如Eclipse正在用的Jre路径是C:\Java\jdk1.7.0_17\jre\bin)。
Ps:按照上面的步骤配置的,基本没有问题,但是有些电脑可能还会报错,比如:java.lang.UnsatisfiedLinkError:?no?jacob?in?java.library.path,这是系统没有加载到jacob.dll,网上解决方法是将Jacob.dll放至“WINDOWS\SYSTEM32”下面。
Java代码:
public?class?JacobUtil?{?
//?8?代表word保存成html?
public?static?final?int?WORD_HTML?=?8;?
public?static?void?main(String[]?args)?{?
String?docfile?=?"C:\\Users\\无名\\Desktop\\xxx.doc";?
String?htmlfile?=?"C:\\Users\\无名\\Desktop\\xxx.html";?
JacobUtil.wordToHtml(docfile,?htmlfile);?
}?
/**?
*?WORD转HTML?
*?@param?docfile?WORD文件全路径?
*?@param?htmlfile?转换后HTML存放路径?
*/?
public?static?void?wordToHtml(String?docfile,?String?htmlfile)?
{?
//?启动word应用程序(Microsoft?Office?Word?2003)?
ActiveXComponent?app?=?new?ActiveXComponent("Word.Application");?
System.out.println("*****正在转换...*****");?
try?
{?
//?设置word应用程序不可见?
app.setProperty("Visible",?new?Variant(false));?
//?documents表示word程序的所有文档窗口,(word是多文档应用程序)?
Dispatch?docs?=?app.getProperty("Documents").toDispatch();?
//?打开要转换的word文件?
Dispatch?doc?=?Dispatch.invoke(?
docs,?
"Open",?
Dispatch.Method,?
new?Object[]?{?docfile,?new?Variant(false),?
new?Variant(true)?},?new?int[1]).toDispatch();?
//?作为html格式保存到临时文件?
Dispatch.invoke(doc,?"SaveAs",?Dispatch.Method,?new?Object[]?{?
htmlfile,?new?Variant(WORD_HTML)?},?new?int[1]);?
//?关闭word文件?
Dispatch.call(doc,?"Close",?new?Variant(false));?
}catch?(Exception?e){?
e.printStackTrace();?
}finally{?
//关闭word应用程序?
app.invoke("Quit",?new?Variant[]?{});?
}?
System.out.println("*****转换完毕********");?
}?
}
java excel怎么读取到html
一般都是用jacob的,这里不能发地址,自己搜一下,要下1.9的版本
1、我们解开下载的jacob_1.9.zip,在文件夹中找到jacob.dll和jacob.jar两个文件
2、将压缩包解压后,Jacob.jar添加到Libraries中;
3、将Jacob.dll放至“WINDOWS\SYSTEM32”下面。
需要注意的是:
【使用IDE启动Web服务器时,系统读取不到Jacob.dll,例如用MyEclipse启动Tomcat,就需要将dll文件copy到MyEclipse安装目录的“jre\bin”下面。
一般系统没有加载到Jacob.dll文件时,报错信息为:“java.lang.UnsatisfiedLinkError: no jacob in java.library.path”】
使用Jacob转换Word,Excel为HTML
JAVA代码:
Java代码
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import com.jacob.activeX.ActiveXComponent;
import com.jacob.com.Dispatch;
import com.jacob.com.Variant;
public class TransformFiletoHtml
{
int WORD_HTML = 8;
int WORD_TXT = 7;
int EXCEL_HTML = 44;
/**
* WORD转HTML
* @param docfile WORD文件全路径
* @param htmlfile 转换后HTML存放路径
*/
public void wordToHtml(String docfile, String htmlfile)
{
ActiveXComponent app = new ActiveXComponent("Word.Application"); // 启动word
try
{
app.setProperty("Visible", new Variant(false));
Dispatch docs = app.getProperty("Documents").toDispatch();
Dispatch doc = Dispatch.invoke(docs,"Open",Dispatch.Method,new Object[] { docfile, new Variant(false),new Variant(true) }, new int[1]).toDispatch();
Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[] {htmlfile, new Variant(WORD_HTML) }, new int[1]);
Variant f = new Variant(false);
Dispatch.call(doc, "Close", f);
}
catch (Exception e)
{
e.printStackTrace();
}
finally
{
app.invoke("Quit", new Variant[] {});
}
}
/**
* EXCEL转HTML
* @param xlsfile EXCEL文件全路径
* @param htmlfile 转换后HTML存放路径
*/
public void excelToHtml(String xlsfile, String htmlfile)
{
ActiveXComponent app = new ActiveXComponent("Excel.Application"); // 启动excel
try
{
app.setProperty("Visible", new Variant(false));
Dispatch excels = app.getProperty("Workbooks").toDispatch();
Dispatch excel = Dispatch.invoke(excels,"Open",Dispatch.Method,new Object[] { xlsfile, new Variant(false),new Variant(true) }, new int[1]).toDispatch();
Dispatch.invoke(excel, "SaveAs", Dispatch.Method, new Object[] {htmlfile, new Variant(EXCEL_HTML) }, new int[1]);
Variant f = new Variant(false);
Dispatch.call(excel, "Close", f);
}
catch (Exception e)
{
e.printStackTrace();
}
finally
{
app.invoke("Quit", new Variant[] {});
}
}
/**
* /删除指定文件夹
* @param folderPath 文件夹全路径
* @param htmlfile 转换后HTML存放路径
*/
public void delFolder(String folderPath)
{
try
{
delAllFile(folderPath); //删除完里面所有内容
String filePath = folderPath;
filePath = filePath.toString();
java.io.File myFilePath = new java.io.File(filePath);
myFilePath.delete(); //删除空文件夹
} catch (Exception e) {e.printStackTrace();}
}
/**
* /删除指定文件夹下所有文件
* @param path 文件全路径
*/
public boolean delAllFile(String path)
{
boolean flag = false;
File file = new File(path);
if (!file.exists())
{
return flag;
}
if (!file.isDirectory())
{
return flag;
}
String[] tempList = file.list();
File temp = null;
for (int i = 0; i tempList.length; i++)
{
if (path.endsWith(File.separator))
{
temp = new File(path + tempList[i]);
}
else
{
temp = new File(path + File.separator + tempList[i]);
}
if (temp.isFile())
{
temp.delete();
}
if (temp.isDirectory())
{
delAllFile(path + "/" + tempList[i]);//先删除文件夹里面的文件
delFolder(path + "/" + tempList[i]);//再删除空文件夹
flag = true;
}
}
return flag;
}
}
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import com.jacob.activeX.ActiveXComponent;
import com.jacob.com.Dispatch;
import com.jacob.com.Variant;
public class TransformFiletoHtml
{
int WORD_HTML = 8;
int WORD_TXT = 7;
int EXCEL_HTML = 44;
/**
* WORD转HTML
* @param docfile WORD文件全路径
* @param htmlfile 转换后HTML存放路径
*/
public void wordToHtml(String docfile, String htmlfile)
{
ActiveXComponent app = new ActiveXComponent("Word.Application"); // 启动word
try
{
app.setProperty("Visible", new Variant(false));
Dispatch docs = app.getProperty("Documents").toDispatch();
Dispatch doc = Dispatch.invoke(docs,"Open",Dispatch.Method,new Object[] { docfile, new Variant(false),new Variant(true) }, new int[1]).toDispatch();
Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[] {htmlfile, new Variant(WORD_HTML) }, new int[1]);
Variant f = new Variant(false);
Dispatch.call(doc, "Close", f);
}
catch (Exception e)
{
e.printStackTrace();
}
finally
{
app.invoke("Quit", new Variant[] {});
}
}
/**
* EXCEL转HTML
* @param xlsfile EXCEL文件全路径
* @param htmlfile 转换后HTML存放路径
*/
public void excelToHtml(String xlsfile, String htmlfile)
{
ActiveXComponent app = new ActiveXComponent("Excel.Application"); // 启动excel
try
{
app.setProperty("Visible", new Variant(false));
Dispatch excels = app.getProperty("Workbooks").toDispatch();
Dispatch excel = Dispatch.invoke(excels,"Open",Dispatch.Method,new Object[] { xlsfile, new Variant(false),new Variant(true) }, new int[1]).toDispatch();
Dispatch.invoke(excel, "SaveAs", Dispatch.Method, new Object[] {htmlfile, new Variant(EXCEL_HTML) }, new int[1]);
Variant f = new Variant(false);
Dispatch.call(excel, "Close", f);
}
catch (Exception e)
{
e.printStackTrace();
}
finally
{
app.invoke("Quit", new Variant[] {});
}
}
/**
* /删除指定文件夹
* @param folderPath 文件夹全路径
* @param htmlfile 转换后HTML存放路径
*/
public void delFolder(String folderPath)
{
try
{
delAllFile(folderPath); //删除完里面所有内容
String filePath = folderPath;
filePath = filePath.toString();
java.io.File myFilePath = new java.io.File(filePath);
myFilePath.delete(); //删除空文件夹
} catch (Exception e) {e.printStackTrace();}
}
/**
* /删除指定文件夹下所有文件
* @param path 文件全路径
*/
public boolean delAllFile(String path)
{
boolean flag = false;
File file = new File(path);
if (!file.exists())
{
return flag;
}
if (!file.isDirectory())
{
return flag;
}
String[] tempList = file.list();
File temp = null;
for (int i = 0; i tempList.length; i++)
{
if (path.endsWith(File.separator))
{
temp = new File(path + tempList[i]);
}
else
{
temp = new File(path + File.separator + tempList[i]);
}
if (temp.isFile())
{
temp.delete();
}
if (temp.isDirectory())
{
delAllFile(path + "/" + tempList[i]);//先删除文件夹里面的文件
delFolder(path + "/" + tempList[i]);//再删除空文件夹
flag = true;
}
}
return flag;
}
}调用JAVA代码:
Java代码
public class Test1 {
public static void main(String[] args) {
// TODO Auto-generated method stub
TransformFiletoHtml trans = new TransformFiletoHtml();
trans.wordToHtml("D:\\sinye.doc", "D:\\sinye.html");
}
}
public class Test1 {
public static void main(String[] args) {
// TODO Auto-generated method stub
TransformFiletoHtml trans = new TransformFiletoHtml();
trans.wordToHtml("D:\\sinye.doc", "D:\\sinye.html");
}
} 只写了一个测试word转html的,excel转html的同理,在TransformFiletoHtml类中,写了两个方法,一个是删除文件夹的方法(delFolder()),一个是删除文件夹下所有文件的方法(delAllFile())。