i want extract content of mail attached mail in pst using apache tika. when parsing pst, x-tika:content key of metadata attached mail didnt have body of mail. how can extract body content of attached mail ? here code:
public void parsepstfile(file file) throws ioexception, tikaexception, org.xml.sax.saxexception { parser p = new autodetectparser(); recursiveparserwrapper wrapper = new recursiveparserwrapper(p, new basiccontenthandlerfactory( basiccontenthandlerfactory.handler_type.xml, -1)); inputstream stream = tikainputstream.get(file); parsecontext context = new parsecontext(); wrapper.parse(stream, new defaulthandler(), new metadata(), context); (metadata metadata : wrapper.getmetadata()) { (string name : metadata.names()) { (string value : metadata.getvalues(name)) { if (name.equalsignorecase("message-from")) //set value in required object if ("x-tika:content".equalsignorecase(name)) { //here extracting message body autodetectparser parser = new autodetectparser(); contenthandler texthandler = new bodycontenthandler(); metadata xmetadata = new metadata(); inputstream inputstream = new bytearrayinputstream(value.getbytes("utf-8")); new htmlparser().parse(inputstream,texthandler,xmetadata,new parsecontext()); //set value of texthandler.tostring()in required object } } } } }
Comments
Post a Comment