How to read MS Word from Java
You would need Apache POI distribution (files like poi-3.7-20101029.jar and poi-scratchpad-3.7-20101029.jar.
import java.io.File;
import java.io.FileInputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
public class DocReader {
public void readDocFile() {
File docFile = null;
WordExtractor docExtractor = null;
WordExtractor exprExtractor = null;
try {
docFile = new File("resources/Rhythm-for-february.doc");
// A FileInputStream obtains input bytes from a file.
FileInputStream fis = new FileInputStream(docFile.getAbsolutePath());
// A HWPFDocument used to read document file from FileInputStream
HWPFDocument doc = new HWPFDocument(fis);
docExtractor = new WordExtractor(doc);
} catch (Exception exep) {
System.out.println(exep.getMessage());
}
// This Array stores each line from the document file.
String[] docArray = docExtractor.getParagraphText();
for (int i = 0; i < docArray.length; i++) {
if (docArray[i] != null)
System.out.println("Line " + i + " : " + docArray[i]);
}
}
public static void main(String[] args) {
DocReader reader = new DocReader();
reader.readDocFile();
}
}
Tagi: