doc转htmljava,doc转excel

http://www.itjxue.com  2023-01-12 22:06  来源:未知  点击次数: 

java读取word 转换html然后在转换txt怎么实现

思路是用一些Java能操作word文件的库,例如Apache的POI库,还有jacob库等,

读出word文件的格式和内容,然后根据格式转换为html(该加粗的加粗,该分段的分段),

这个过程中可以把文字内容提取成txt纯文本文件。

java中的jacob将word文档转化为HTML文件问题

配置:

(1)将解压包中的jacob.dll(x86常用,x64)拷到jdk安装目录下的jre\bin文件夹或windows安装路径下的WINDOWS\system32文件夹下

(2)将jacob.jar文件拷到classpath下即可

常见问题解决:

对于”java.lang.UnsatisfiedLinkError:?C:\WINDOWS\system32\jacob-1.14.3-x86.dll:?由于应用程序配置不正确,应用程序未能启动。重新安装应用程序可能会纠正”这个问题,可以通过

重新下载Jacob的jar及dll文件(最好版本比现在的低,如1.11)解决

实例制作(主要功能:标题制作,表格制作,合并表格,替换文本,页眉页脚,书签处理):

import?com.jacob.activeX.ActiveXComponent;

import?com.jacob.com.Dispatch;

import?com.jacob.com.Variant;

public?class?WordOperate?{

?public?static?void?main(String?args[])?{

??ActiveXComponent?wordApp?=?new?ActiveXComponent("Word.Application");?//?启动word

??//?Set?the?visible?property?as?required.

??Dispatch.put(wordApp,?"Visible",?new?Variant(true));//?//设置word可见

??Dispatch?docs?=?wordApp.getProperty("Documents").toDispatch();

??//?String?inFile?=?"d:\\test.doc";

??//?Dispatch?doc?=?Dispatch.invoke(docs,?"Open",?Dispatch.Method,

??//?new?Object[]?{?inFile,?new?Variant(false),?new?Variant(false)},//参数3,false:可写,true:只读

??//?new?int[1]).toDispatch();//打开文档

??Dispatch?document?=?Dispatch.call(docs,?"Add").toDispatch();//?create?new?document

??String?userName?=?wordApp.getPropertyAsString("Username");//?显示用户信息

??System.out.println("用户名:"?+?userName);

??//?文档对齐,字体设置////////////////////////

??Dispatch?selection?=?Dispatch.get(wordApp,?"Selection").toDispatch();

??Dispatch?align?=?Dispatch.get(selection,?"ParagraphFormat")

????.toDispatch();?//?行列格式化需要的对象

??Dispatch?font?=?Dispatch.get(selection,?"Font").toDispatch();?//?字型格式化需要的对象

??//?标题处理////////////////////////

??Dispatch.put(align,?"Alignment",?"1");?//?1:置中?2:靠右?3:靠左

??Dispatch.put(font,?"Bold",?"1");?//?字型租体

??Dispatch.put(font,?"Color",?"1,0,0,0");?//?字型颜色红色

??Dispatch.call(selection,?"TypeText",?"Word文档处理");?//?写入标题内容

??Dispatch.call(selection,?"TypeParagraph");?//?空一行段落

??Dispatch.put(align,?"Alignment",?"3");?//?1:置中?2:靠右?3:靠左

??Dispatch.put(selection,?"Text",?"????????");

??Dispatch.call(selection,?"MoveDown");?//?光标标往下一行

??//表格处理////////////////////////

??Dispatch?tables?=?Dispatch.get(document,?"Tables").toDispatch();

??Dispatch?range?=?Dispatch.get(selection,?"Range").toDispatch();

??Dispatch?table1?=?Dispatch.call(tables,?"Add",?range,?new?Variant(3),

????new?Variant(2),?new?Variant(1)).toDispatch();?//?设置行数,列数,表格外框宽度

??//?所有表格

??Variant?tableAmount?=?Dispatch.get(tables,?"count");

??System.out.println(tableAmount);

??//?要填充的表格

??Dispatch?t1?=?Dispatch.call(tables,?"Item",?new?Variant(1))

????.toDispatch();

??Dispatch?t1_row?=?Dispatch.get(t1,?"rows").toDispatch();//?所有行

??int?t1_rowNum?=?Dispatch.get(t1_row,?"count").getInt();

??Dispatch.call(Dispatch.get(t1,?"columns").toDispatch(),?"AutoFit");//?自动调整

??int?t1_colNum?=?Dispatch.get(Dispatch.get(t1,?"columns").toDispatch(),

????"count").getInt();

??System.out.println(t1_rowNum?+?"?"?+?t1_colNum);

??for?(int?i?=?1;?i?=?t1_rowNum;?i++)?{

???for?(int?j?=?1;?j?=?t1_colNum;?j++)?{

????Dispatch?cell?=?Dispatch.call(t1,?"Cell",?new?Variant(i),

??????new?Variant(j)).toDispatch();//?行,列

????Dispatch.call(cell,?"Select");

????Dispatch.put(selection,?"Text",?"cell"?+?i?+?j);?//?写入word的内容

????Dispatch.put(font,?"Bold",?"0");?//?字型租体(1:租体?0:取消租体)

????Dispatch.put(font,?"Color",?"1,1,1,0");?//?字型颜色

????Dispatch.put(font,?"Italic",?"1");?//?斜体?1:斜体?0:取消斜体

????Dispatch.put(font,?"Underline",?"1");?//?下划线

????Dispatch?Range?=?Dispatch.get(cell,?"Range").toDispatch();

????String?cellContent?=?Dispatch.get(Range,?"Text").toString();

????System.out.println((cellContent.substring(0,?cellContent

??????.length()?-?1)).trim());

???}

???Dispatch.call(selection,?"MoveDown");?//?光标往下一行(才不会输入盖过上一输入位置)

??}

??//合并单元格////////////////////////

??Dispatch.put(selection,?"Text",?"????????");

??Dispatch.call(selection,?"MoveDown");?//?光标标往下一行

??Dispatch?range2?=?Dispatch.get(selection,?"Range").toDispatch();

??Dispatch?table2?=?Dispatch.call(tables,?"Add",?range2,?new?Variant(8),

????new?Variant(4),?new?Variant(1)).toDispatch();?//?设置行数,列数,表格外框宽度

??Dispatch?t2?=?Dispatch.call(tables,?"Item",?new?Variant(2))

????.toDispatch();

??Dispatch?beginCell?=?Dispatch.call(t2,?"Cell",?new?Variant(1),

????new?Variant(1)).toDispatch();

??Dispatch?endCell?=?Dispatch.call(t2,?"Cell",?new?Variant(4),

????new?Variant(4)).toDispatch();

??Dispatch.call(beginCell,?"Merge",?endCell);

??for?(int?row?=?1;?row?=?Dispatch.get(

????Dispatch.get(t2,?"rows").toDispatch(),?"count").getInt();?row++)?{

???for?(int?col?=?1;?col?=?Dispatch.get(

?????Dispatch.get(t2,?"columns").toDispatch(),?"count").getInt();?col++)?{

????if?(row?==?1)?{

?????Dispatch?cell?=?Dispatch.call(t2,?"Cell",?new?Variant(1),

???????new?Variant(1)).toDispatch();//?行,列

?????Dispatch.call(cell,?"Select");

?????Dispatch.put(font,?"Color",?"1,1,1,0");?//?字型颜色

?????Dispatch.put(selection,?"Text",?"merge?Cell!");

????}?else?{

?????Dispatch?cell?=?Dispatch.call(t2,?"Cell",?new?Variant(row),

???????new?Variant(col)).toDispatch();//?行,列

?????Dispatch.call(cell,?"Select");

?????Dispatch.put(font,?"Color",?"1,1,1,0");?//?字型颜色

?????Dispatch.put(selection,?"Text",?"cell"?+?row?+?col);

????}

???}

???Dispatch.call(selection,?"MoveDown");

??}

??//Dispatch.call(selection,?"MoveRight",?new?Variant(1),?new?Variant(1));//?取消选择

??//?Object?content?=?Dispatch.get(doc,"Content").toDispatch();

??//?Word文档内容查找及替换////////////////////////

??Dispatch.call(selection,?"TypeParagraph");?//?空一行段落

??Dispatch.put(align,?"Alignment",?"3");?//?1:置中?2:靠右?3:靠左

??Dispatch.put(font,?"Color",?0);

??Dispatch.put(selection,?"Text",?"欢迎,Hello,world!");

??Dispatch.call(selection,?"HomeKey",?new?Variant(6));//?移到开头

??Dispatch?find?=?Dispatch.call(selection,?"Find").toDispatch();//?获得Find组件

??Dispatch.put(find,?"Text",?"hello");?//?查找字符串"hello"

??Dispatch.put(find,?"Forward",?"True");//?向前查找

??//?Dispatch.put(find,?"Format",?"True");//?设置格式

??Dispatch.put(find,?"MatchCase",?"false");//?大小写匹配

??Dispatch.put(find,?"MatchWholeWord",?"True");?//?全字匹配

??Dispatch.call(find,?"Execute");?//?执行查询

??Dispatch.put(selection,?"Text",?"你好");//?替换为"你好"

??//使用方法传入的参数parameter调用word文档中的MyWordMacro宏//

??//Dispatch.call(document,macroName,parameter);

??//Dispatch.invoke(document,macroName,Dispatch.Method,parameter,new?int[1]);

??//页眉,页脚处理////////////////////////

??Dispatch?ActiveWindow?=?wordApp.getProperty("ActiveWindow")

????.toDispatch();

??Dispatch?ActivePane?=?Dispatch.get(ActiveWindow,?"ActivePane")

????.toDispatch();

??Dispatch?View?=?Dispatch.get(ActivePane,?"View").toDispatch();

??Dispatch.put(View,?"SeekView",?"9");?//9是设置页眉

??Dispatch.put(align,?"Alignment",?"1");?//?置中

??Dispatch.put(selection,?"Text",?"这里是页眉");?//?初始化时间

??Dispatch.put(View,?"SeekView",?"10");?//?10是设置页脚

??Dispatch.put(align,?"Alignment",?"2");?//?靠右

??Dispatch.put(selection,?"Text",?"这里是页脚");?//?初始化从1开始

??//书签处理(打开文档时处理)////////////////////////

??//Dispatch?activeDocument?=?wordApp.getProperty("ActiveDocument").toDispatch();

??Dispatch?bookMarks?=?Dispatch.call(document,?"Bookmarks").toDispatch();

??boolean?isExist?=?Dispatch.call(bookMarks,?"Exists",?"bookMark1")

????.getBoolean();

??if?(isExist?==?true)?{

???Dispatch?rangeItem1?=?Dispatch.call(bookMarks,?"Item",?"bookMark1")

?????.toDispatch();

???Dispatch?range1?=?Dispatch.call(rangeItem1,?"Range").toDispatch();

???Dispatch.put(range1,?"Text",?new?Variant("当前是书签1的文本信息!"));

???String?bookMark1Value?=?Dispatch.get(range1,?"Text").toString();

???System.out.println(bookMark1Value);

??}?else?{

???System.out.println("当前书签不存在,重新建立!");

???Dispatch.call(bookMarks,?"Add",?"bookMark1",?selection);

???Dispatch?rangeItem1?=?Dispatch.call(bookMarks,?"Item",?"bookMark1")

???.toDispatch();

???Dispatch?range1?=?Dispatch.call(rangeItem1,?"Range").toDispatch();

???Dispatch.put(range1,?"Text",?new?Variant("当前是书签1的文本信息!"));

???String?bookMark1Value?=?Dispatch.get(range1,?"Text").toString();

???System.out.println(bookMark1Value);?

??}

??//保存操作////////////////////////

??Dispatch.call(document,?"SaveAs",?"D:/wordOperate.doc");

??//Dispatch.invoke((Dispatch)?doc,?"SaveAs",?Dispatch.Method,?new?Object[]{htmlPath,?new?Variant(8)},?new?int[1]);???//生成html文件

??//?0?=?wdDoNotSaveChanges

??//?-1?=?wdSaveChanges

??//?-2?=?wdPromptToSaveChanges

??//Dispatch.call(document,?"Close",?new?Variant(0));

??//?//?worddoc.olefunction("protect",2,true,"");

??//?//?Dispatch?bookMarks?=?wordApp.call(docs,"Bookmarks").toDispatch();

??//?//?System.out.println("bookmarks"+bookMarks.getProgramId());

??//?//Dispatch.call(doc,?"Save");?//保存

??//?//?Dispatch.call(doc,?"Close",?new?Variant(true));

??//?//wordApp.invoke("Quit",new?Variant[]{});

??//?wordApp.safeRelease();//Finalizers?call?this?method

?}

}

docx转html转pad。java

加载带有DOC或DOCX扩展名的源Word文件将文件另存为输出HTML。

MicrosoftWord文件格式DOC/DOCX很著名,因为文字处理器支持多种功能来组织和解释信息。同样,HTML文件格式有助于在Web应用程序中显示信息。

如果想使用Java将DOCX转换为HTML5可以加载输入的DOCX文件,设置SaveFormat时设置HtmlSaveOptions,设置HtmlVersionHTML5的枚举值,保存输出文件。

java使用jacob将word转换为html,如何设置转换后html的编码格式。我想要utf-8的,不要gb2312。

强制转码~~

line你要转的内容

line=new String(line.getBytes("gb2312"),"utf-8");代码是我凭记忆写的,应该没问题

或者你在写之前。先写一个HTML页面编码的代码 。把页面的格式设置成utf-8

Java程序调用 openoffice,将doc文件转Html文件,但转换完格式都变成居左边?

1、到官网下载Jacob,

2、将压缩包解压后,Jacob.jar添加到Libraries中(先复制到项目目录中,右键单击jar包选择Build?Path—Add?to?Build?Path);

3、将Jacob.dll放至当前项目所用到的“jre\bin”下面(比如Eclipse正在用的Jre路径是C:\Java\jdk1.7.0_17\jre\bin)。

Ps:按照上面的步骤配置的,基本没有问题,但是有些电脑可能还会报错,比如:java.lang.UnsatisfiedLinkError:?no?jacob?in?java.library.path,这是系统没有加载到jacob.dll,网上解决方法是将Jacob.dll放至“WINDOWS\SYSTEM32”下面。

Java代码:

public?class?JacobUtil?{?

//?8?代表word保存成html?

public?static?final?int?WORD_HTML?=?8;?

public?static?void?main(String[]?args)?{?

String?docfile?=?"C:\\Users\\无名\\Desktop\\xxx.doc";?

String?htmlfile?=?"C:\\Users\\无名\\Desktop\\xxx.html";?

JacobUtil.wordToHtml(docfile,?htmlfile);?

}?

/**?

*?WORD转HTML?

*?@param?docfile?WORD文件全路径?

*?@param?htmlfile?转换后HTML存放路径?

*/?

public?static?void?wordToHtml(String?docfile,?String?htmlfile)?

{?

//?启动word应用程序(Microsoft?Office?Word?2003)?

ActiveXComponent?app?=?new?ActiveXComponent("Word.Application");?

System.out.println("*****正在转换...*****");?

try?

{?

//?设置word应用程序不可见?

app.setProperty("Visible",?new?Variant(false));?

//?documents表示word程序的所有文档窗口,(word是多文档应用程序)?

Dispatch?docs?=?app.getProperty("Documents").toDispatch();?

//?打开要转换的word文件?

Dispatch?doc?=?Dispatch.invoke(?

docs,?

"Open",?

Dispatch.Method,?

new?Object[]?{?docfile,?new?Variant(false),?

new?Variant(true)?},?new?int[1]).toDispatch();?

//?作为html格式保存到临时文件?

Dispatch.invoke(doc,?"SaveAs",?Dispatch.Method,?new?Object[]?{?

htmlfile,?new?Variant(WORD_HTML)?},?new?int[1]);?

//?关闭word文件?

Dispatch.call(doc,?"Close",?new?Variant(false));?

}catch?(Exception?e){?

e.printStackTrace();?

}finally{?

//关闭word应用程序?

app.invoke("Quit",?new?Variant[]?{});?

}?

System.out.println("*****转换完毕********");?

}?

}

java excel怎么读取到html

一般都是用jacob的,这里不能发地址,自己搜一下,要下1.9的版本

1、我们解开下载的jacob_1.9.zip,在文件夹中找到jacob.dll和jacob.jar两个文件

2、将压缩包解压后,Jacob.jar添加到Libraries中;

3、将Jacob.dll放至“WINDOWS\SYSTEM32”下面。

需要注意的是:

【使用IDE启动Web服务器时,系统读取不到Jacob.dll,例如用MyEclipse启动Tomcat,就需要将dll文件copy到MyEclipse安装目录的“jre\bin”下面。

一般系统没有加载到Jacob.dll文件时,报错信息为:“java.lang.UnsatisfiedLinkError: no jacob in java.library.path”】

使用Jacob转换Word,Excel为HTML

JAVA代码:

Java代码

import java.io.BufferedReader;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileWriter;

import java.io.IOException;

import java.io.InputStreamReader;

import com.jacob.activeX.ActiveXComponent;

import com.jacob.com.Dispatch;

import com.jacob.com.Variant;

public class TransformFiletoHtml

{

int WORD_HTML = 8;

int WORD_TXT = 7;

int EXCEL_HTML = 44;

/**

* WORD转HTML

* @param docfile WORD文件全路径

* @param htmlfile 转换后HTML存放路径

*/

public void wordToHtml(String docfile, String htmlfile)

{

ActiveXComponent app = new ActiveXComponent("Word.Application"); // 启动word

try

{

app.setProperty("Visible", new Variant(false));

Dispatch docs = app.getProperty("Documents").toDispatch();

Dispatch doc = Dispatch.invoke(docs,"Open",Dispatch.Method,new Object[] { docfile, new Variant(false),new Variant(true) }, new int[1]).toDispatch();

Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[] {htmlfile, new Variant(WORD_HTML) }, new int[1]);

Variant f = new Variant(false);

Dispatch.call(doc, "Close", f);

}

catch (Exception e)

{

e.printStackTrace();

}

finally

{

app.invoke("Quit", new Variant[] {});

}

}

/**

* EXCEL转HTML

* @param xlsfile EXCEL文件全路径

* @param htmlfile 转换后HTML存放路径

*/

public void excelToHtml(String xlsfile, String htmlfile)

{

ActiveXComponent app = new ActiveXComponent("Excel.Application"); // 启动excel

try

{

app.setProperty("Visible", new Variant(false));

Dispatch excels = app.getProperty("Workbooks").toDispatch();

Dispatch excel = Dispatch.invoke(excels,"Open",Dispatch.Method,new Object[] { xlsfile, new Variant(false),new Variant(true) }, new int[1]).toDispatch();

Dispatch.invoke(excel, "SaveAs", Dispatch.Method, new Object[] {htmlfile, new Variant(EXCEL_HTML) }, new int[1]);

Variant f = new Variant(false);

Dispatch.call(excel, "Close", f);

}

catch (Exception e)

{

e.printStackTrace();

}

finally

{

app.invoke("Quit", new Variant[] {});

}

}

/**

* /删除指定文件夹

* @param folderPath 文件夹全路径

* @param htmlfile 转换后HTML存放路径

*/

public void delFolder(String folderPath)

{

try

{

delAllFile(folderPath); //删除完里面所有内容

String filePath = folderPath;

filePath = filePath.toString();

java.io.File myFilePath = new java.io.File(filePath);

myFilePath.delete(); //删除空文件夹

} catch (Exception e) {e.printStackTrace();}

}

/**

* /删除指定文件夹下所有文件

* @param path 文件全路径

*/

public boolean delAllFile(String path)

{

boolean flag = false;

File file = new File(path);

if (!file.exists())

{

return flag;

}

if (!file.isDirectory())

{

return flag;

}

String[] tempList = file.list();

File temp = null;

for (int i = 0; i tempList.length; i++)

{

if (path.endsWith(File.separator))

{

temp = new File(path + tempList[i]);

}

else

{

temp = new File(path + File.separator + tempList[i]);

}

if (temp.isFile())

{

temp.delete();

}

if (temp.isDirectory())

{

delAllFile(path + "/" + tempList[i]);//先删除文件夹里面的文件

delFolder(path + "/" + tempList[i]);//再删除空文件夹

flag = true;

}

}

return flag;

}

}

import java.io.BufferedReader;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileWriter;

import java.io.IOException;

import java.io.InputStreamReader;

import com.jacob.activeX.ActiveXComponent;

import com.jacob.com.Dispatch;

import com.jacob.com.Variant;

public class TransformFiletoHtml

{

int WORD_HTML = 8;

int WORD_TXT = 7;

int EXCEL_HTML = 44;

/**

* WORD转HTML

* @param docfile WORD文件全路径

* @param htmlfile 转换后HTML存放路径

*/

public void wordToHtml(String docfile, String htmlfile)

{

ActiveXComponent app = new ActiveXComponent("Word.Application"); // 启动word

try

{

app.setProperty("Visible", new Variant(false));

Dispatch docs = app.getProperty("Documents").toDispatch();

Dispatch doc = Dispatch.invoke(docs,"Open",Dispatch.Method,new Object[] { docfile, new Variant(false),new Variant(true) }, new int[1]).toDispatch();

Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[] {htmlfile, new Variant(WORD_HTML) }, new int[1]);

Variant f = new Variant(false);

Dispatch.call(doc, "Close", f);

}

catch (Exception e)

{

e.printStackTrace();

}

finally

{

app.invoke("Quit", new Variant[] {});

}

}

/**

* EXCEL转HTML

* @param xlsfile EXCEL文件全路径

* @param htmlfile 转换后HTML存放路径

*/

public void excelToHtml(String xlsfile, String htmlfile)

{

ActiveXComponent app = new ActiveXComponent("Excel.Application"); // 启动excel

try

{

app.setProperty("Visible", new Variant(false));

Dispatch excels = app.getProperty("Workbooks").toDispatch();

Dispatch excel = Dispatch.invoke(excels,"Open",Dispatch.Method,new Object[] { xlsfile, new Variant(false),new Variant(true) }, new int[1]).toDispatch();

Dispatch.invoke(excel, "SaveAs", Dispatch.Method, new Object[] {htmlfile, new Variant(EXCEL_HTML) }, new int[1]);

Variant f = new Variant(false);

Dispatch.call(excel, "Close", f);

}

catch (Exception e)

{

e.printStackTrace();

}

finally

{

app.invoke("Quit", new Variant[] {});

}

}

/**

* /删除指定文件夹

* @param folderPath 文件夹全路径

* @param htmlfile 转换后HTML存放路径

*/

public void delFolder(String folderPath)

{

try

{

delAllFile(folderPath); //删除完里面所有内容

String filePath = folderPath;

filePath = filePath.toString();

java.io.File myFilePath = new java.io.File(filePath);

myFilePath.delete(); //删除空文件夹

} catch (Exception e) {e.printStackTrace();}

}

/**

* /删除指定文件夹下所有文件

* @param path 文件全路径

*/

public boolean delAllFile(String path)

{

boolean flag = false;

File file = new File(path);

if (!file.exists())

{

return flag;

}

if (!file.isDirectory())

{

return flag;

}

String[] tempList = file.list();

File temp = null;

for (int i = 0; i tempList.length; i++)

{

if (path.endsWith(File.separator))

{

temp = new File(path + tempList[i]);

}

else

{

temp = new File(path + File.separator + tempList[i]);

}

if (temp.isFile())

{

temp.delete();

}

if (temp.isDirectory())

{

delAllFile(path + "/" + tempList[i]);//先删除文件夹里面的文件

delFolder(path + "/" + tempList[i]);//再删除空文件夹

flag = true;

}

}

return flag;

}

}调用JAVA代码:

Java代码

public class Test1 {

public static void main(String[] args) {

// TODO Auto-generated method stub

TransformFiletoHtml trans = new TransformFiletoHtml();

trans.wordToHtml("D:\\sinye.doc", "D:\\sinye.html");

}

}

public class Test1 {

public static void main(String[] args) {

// TODO Auto-generated method stub

TransformFiletoHtml trans = new TransformFiletoHtml();

trans.wordToHtml("D:\\sinye.doc", "D:\\sinye.html");

}

} 只写了一个测试word转html的,excel转html的同理,在TransformFiletoHtml类中,写了两个方法,一个是删除文件夹的方法(delFolder()),一个是删除文件夹下所有文件的方法(delAllFile())。

(责任编辑:IT教学网)

更多

推荐SQL Server文章