/*
 * Decompiled with CFR 0.152.
 */
package org.cdlib.xtf.textIndexer;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.util.StringTokenizer;
import javax.xml.transform.Templates;
import org.apache.lucene.util.StringUtil;
import org.cdlib.xtf.textIndexer.XMLIndexSource;
import org.cdlib.xtf.util.StructuredStore;
import org.textmining.text.extraction.WordExtractor;
import org.xml.sax.InputSource;

public class MSWordIndexSource
extends XMLIndexSource {
    private File msWordFile;

    public MSWordIndexSource(File msWordFile, String key, Templates[] preFilters, Templates displayStyle, StructuredStore lazyStore) {
        super(null, msWordFile, key, preFilters, displayStyle, lazyStore);
        this.msWordFile = msWordFile;
    }

    protected InputSource filterInput() throws IOException {
        FileInputStream inStream = new FileInputStream(this.msWordFile);
        try {
            WordExtractor extractor = new WordExtractor();
            String str = extractor.extractText((InputStream)inStream);
            StringBuffer outBuf = new StringBuffer((int)this.msWordFile.length());
            outBuf.append("<rippedMSWordText>\n");
            StringTokenizer st = new StringTokenizer(str, "\r\t", false);
            while (st.hasMoreTokens()) {
                String para = st.nextToken().trim();
                if ((para = StringUtil.escapeHTMLChars(para)).length() <= 0) continue;
                outBuf.append("  <p>" + para + "</p>\n");
            }
            outBuf.append("</rippedMSWordText>\n");
            InputSource finalSrc = new InputSource(new StringReader(outBuf.toString()));
            finalSrc.setSystemId(this.msWordFile.toURL().toString());
            InputSource inputSource = finalSrc;
            return inputSource;
        }
        catch (IOException e) {
            throw e;
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
        finally {
            ((InputStream)inStream).close();
        }
    }
}

