Recipes » How to read MS Word from Java

How to read MS Word from Java

Last modified by Administrator on 2011/06/06 17:26

How to read MS Word from Java

Java en

You would need Apache POI distribution (files like poi-3.7-20101029.jar and poi-scratchpad-3.7-20101029.jar.

import java.io.File;
import java.io.FileInputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
public class DocReader {
	public void readDocFile() {
		File docFile = null;
		WordExtractor docExtractor = null;
		WordExtractor exprExtractor = null;
		try {
			docFile = new File("resources/Rhythm-for-february.doc");
			// A FileInputStream obtains input bytes from a file.
			FileInputStream fis = new FileInputStream(docFile.getAbsolutePath());
			// A HWPFDocument used to read document file from FileInputStream
			HWPFDocument doc = new HWPFDocument(fis);
			docExtractor = new WordExtractor(doc);
		} catch (Exception exep) {
			System.out.println(exep.getMessage());
		}
		// This Array stores each line from the document file.
		String[] docArray = docExtractor.getParagraphText();
		for (int i = 0; i < docArray.length; i++) {
			if (docArray[i] != null)
				System.out.println("Line " + i + " : " + docArray[i]);
		}
	}
	public static void main(String[] args) {
		DocReader reader = new DocReader();
		reader.readDocFile();
	}
}


Tagi:  

Tags:
Created by Kalvis Apsītis on 2011/02/23 21:28

This wiki is licensed under a Creative Commons 2.0 license
XWiki Enterprise 3.0.36132 - Documentation