diff --git a/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/XWPFEventBasedWordExtractor.java b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/XWPFEventBasedWordExtractor.java
new file mode 100644
index 00000000000..a7b48d2f40b
--- /dev/null
+++ b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/XWPFEventBasedWordExtractor.java
@@ -0,0 +1,368 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.extractor;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.logging.log4j.Logger;
+import org.apache.poi.logging.PoiLogManager;
+import org.apache.poi.ooxml.POIXMLDocument;
+import org.apache.poi.ooxml.POIXMLProperties;
+import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.xwpf.extractor.internal.OOXMLWordAndPowerPointTextHandler;
+import org.apache.poi.xwpf.extractor.internal.ParagraphProperties;
+import org.apache.poi.xwpf.extractor.internal.RunProperties;
+import org.apache.poi.xwpf.extractor.internal.XMLReaderUtils;
+import org.apache.poi.xwpf.extractor.internal.XWPFListManager;
+import org.apache.poi.xwpf.usermodel.XWPFNumbering;
+import org.apache.poi.xwpf.usermodel.XWPFRelation;
+import org.apache.xmlbeans.XmlException;
+import org.xml.sax.SAXException;
+
+/**
+ * Experimental class that is based on POI's XSSFEventBasedExcelExtractor
+ * <p>
+ * This is copied from Apache Tika.
+ * </p>
+ *
+ * @since POI 5.4.2
+ */
+public class XWPFEventBasedWordExtractor implements POIXMLTextExtractor {
+
+    private static final Logger LOG = PoiLogManager.getLogger(XWPFEventBasedWordExtractor.class);
+
+    private final OPCPackage container;
+    private final POIXMLProperties properties;
+
+    public XWPFEventBasedWordExtractor(OPCPackage container)
+            throws XmlException, OpenXML4JException, IOException {
+        this.container = container;
+        this.properties = new POIXMLProperties(container);
+    }
+
+    public OPCPackage getPackage() {
+        return this.container;
+    }
+
+    public POIXMLProperties.CoreProperties getCoreProperties() {
+        return this.properties.getCoreProperties();
+    }
+
+    public POIXMLProperties.ExtendedProperties getExtendedProperties() {
+        return this.properties.getExtendedProperties();
+    }
+
+    public POIXMLProperties.CustomProperties getCustomProperties() {
+        return this.properties.getCustomProperties();
+    }
+
+    @Override
+    public POIXMLDocument getDocument() {
+        return null;
+    }
+
+    @Override
+    public String getText() {
+        StringBuilder sb = new StringBuilder();
+        //handle main document
+        List<PackagePart> pps =
+                container.getPartsByContentType(XWPFRelation.DOCUMENT.getContentType());
+        if (pps != null) {
+            for (PackagePart pp : pps) {
+                //likely only one, but why not...
+                try {
+                    handleDocumentPart(pp, sb);
+                } catch (IOException e) {
+                    LOG.warn("IOException handling document part", e);
+                } catch (SAXException e) {
+                    //swallow this because we don't actually call it
+                    LOG.warn("SAXException handling document part", e);
+                }
+            }
+        }
+        //handle glossary document
+        pps = container.getPartsByContentType(XWPFRelation.GLOSSARY_DOCUMENT.getContentType());
+
+        if (pps != null) {
+            for (PackagePart pp : pps) {
+                //likely only one, but why not...
+                try {
+                    handleDocumentPart(pp, sb);
+                } catch (IOException e) {
+                    LOG.warn("IOException handling glossary document part", e);
+                } catch (SAXException e) {
+                    //swallow this because we don't actually call it
+                    LOG.warn("SAXException handling glossary document part", e);
+                }
+            }
+        }
+
+        return sb.toString();
+    }
+
+    @Override
+    public void setCloseFilesystem(boolean b) {
+
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return false;
+    }
+
+    @Override
+    public Closeable getFilesystem() {
+        return null;
+    }
+
+
+    private void handleDocumentPart(PackagePart documentPart, StringBuilder sb)
+            throws IOException, SAXException {
+        //load the numbering/list manager and styles from the main document part
+        XWPFNumbering numbering = loadNumbering(documentPart);
+        XWPFListManager xwpfListManager = new XWPFListManager(numbering);
+        //TODO: XWPFStyles styles = loadStyles(documentPart);
+
+        //headers
+        try {
+            PackageRelationshipCollection headersPRC =
+                    documentPart.getRelationshipsByType(XWPFRelation.HEADER.getRelation());
+            if (headersPRC != null) {
+                for (int i = 0; i < headersPRC.size(); i++) {
+                    PackagePart header = documentPart.getRelatedPart(headersPRC.getRelationship(i));
+                    handlePart(header, xwpfListManager, sb);
+                }
+            }
+        } catch (InvalidFormatException e) {
+            LOG.warn("Invalid format", e);
+        }
+
+        //main document
+        handlePart(documentPart, xwpfListManager, sb);
+
+        //for now, just dump other components at end
+        for (XWPFRelation rel : new XWPFRelation[]{XWPFRelation.FOOTNOTE, XWPFRelation.COMMENT,
+                XWPFRelation.FOOTER, XWPFRelation.ENDNOTE}) {
+            try {
+                PackageRelationshipCollection prc =
+                        documentPart.getRelationshipsByType(rel.getRelation());
+                if (prc != null) {
+                    for (int i = 0; i < prc.size(); i++) {
+                        PackagePart packagePart =
+                                documentPart.getRelatedPart(prc.getRelationship(i));
+                        handlePart(packagePart, xwpfListManager, sb);
+                    }
+                }
+            } catch (InvalidFormatException e) {
+                LOG.warn("Invalid format", e);
+            }
+        }
+    }
+
+    private void handlePart(PackagePart packagePart, XWPFListManager xwpfListManager,
+                            StringBuilder buffer) throws IOException, SAXException {
+
+        Map<String, String> hyperlinks = loadHyperlinkRelationships(packagePart);
+        try (InputStream stream = packagePart.getInputStream()) {
+            XMLReaderUtils.parseSAX(CloseShieldInputStream.wrap(stream),
+                    new OOXMLWordAndPowerPointTextHandler(new XWPFToTextContentHandler(buffer),
+                            hyperlinks));
+        }
+
+    }
+
+    private Map<String, String> loadHyperlinkRelationships(PackagePart bodyPart) {
+        Map<String, String> hyperlinks = new HashMap<>();
+        try {
+            PackageRelationshipCollection prc =
+                    bodyPart.getRelationshipsByType(XWPFRelation.HYPERLINK.getRelation());
+            for (int i = 0; i < prc.size(); i++) {
+                PackageRelationship pr = prc.getRelationship(i);
+                if (pr == null) {
+                    continue;
+                }
+                String id = pr.getId();
+                String url = (pr.getTargetURI() == null) ? null : pr.getTargetURI().toString();
+                if (id != null && url != null) {
+                    hyperlinks.put(id, url);
+                }
+            }
+        } catch (InvalidFormatException e) {
+            LOG.warn("Invalid format", e);
+        }
+        return hyperlinks;
+    }
+
+    private XWPFNumbering loadNumbering(PackagePart packagePart) {
+        try {
+            PackageRelationshipCollection numberingParts =
+                    packagePart.getRelationshipsByType(XWPFRelation.NUMBERING.getRelation());
+            if (!numberingParts.isEmpty()) {
+                PackageRelationship numberingRelationShip = numberingParts.getRelationship(0);
+                if (numberingRelationShip == null) {
+                    return null;
+                }
+                PackagePart numberingPart = container.getPart(numberingRelationShip);
+                if (numberingPart == null) {
+                    return null;
+                }
+                return new XWPFNumbering(numberingPart);
+            }
+        } catch (OpenXML4JException e) {
+            LOG.warn("Couldn't load numbering", e);
+        }
+        return null;
+    }
+
+    private static class XWPFToTextContentHandler
+            implements OOXMLWordAndPowerPointTextHandler.XWPFBodyContentsHandler {
+        private final StringBuilder buffer;
+
+        public XWPFToTextContentHandler(StringBuilder buffer) {
+            this.buffer = buffer;
+        }
+
+        @Override
+        public void run(RunProperties runProperties, String contents) {
+            buffer.append(contents);
+        }
+
+        @Override
+        public void hyperlinkStart(String link) {
+            //no-op
+        }
+
+        @Override
+        public void hyperlinkEnd() {
+            //no-op
+        }
+
+        @Override
+        public void startParagraph(ParagraphProperties paragraphProperties) {
+            //no-op
+        }
+
+        @Override
+        public void endParagraph() {
+            buffer.append("\n");
+        }
+
+        @Override
+        public void startTable() {
+
+        }
+
+        @Override
+        public void endTable() {
+
+        }
+
+        @Override
+        public void startTableRow() {
+
+        }
+
+        @Override
+        public void endTableRow() {
+            buffer.append("\n");
+        }
+
+        @Override
+        public void startTableCell() {
+
+        }
+
+        @Override
+        public void endTableCell() {
+            buffer.append("\t");
+        }
+
+        @Override
+        public void startSDT() {
+
+        }
+
+        @Override
+        public void endSDT() {
+            buffer.append("\n");
+        }
+
+        @Override
+        public void startEditedSection(String editor, Date date,
+                                       OOXMLWordAndPowerPointTextHandler.EditType editType) {
+
+        }
+
+        @Override
+        public void endEditedSection() {
+
+        }
+
+        @Override
+        public boolean isIncludeDeletedText() {
+            return true;
+        }
+
+        @Override
+        public void footnoteReference(String id) {
+
+        }
+
+        @Override
+        public void endnoteReference(String id) {
+
+        }
+
+        @Override
+        public boolean isIncludeMoveFromText() {
+            return false;
+        }
+
+        @Override
+        public void embeddedOLERef(String refId) {
+            //no-op
+        }
+
+        @Override
+        public void embeddedPicRef(String picFileName, String picDescription) {
+            //no-op
+        }
+
+        @Override
+        public void startBookmark(String id, String name) {
+            //no-op
+        }
+
+        @Override
+        public void endBookmark(String id) {
+            //no-op
+        }
+    }
+}
diff --git a/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/AbstractListManager.java b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/AbstractListManager.java
new file mode 100644
index 00000000000..7b629940af8
--- /dev/null
+++ b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/AbstractListManager.java
@@ -0,0 +1,281 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.extractor.internal;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.poi.util.NumberFormatter;
+
+/**
+ * <p>
+ * This is copied from Apache Tika.
+ * </p>
+ *
+ * @since POI 5.4.2
+ */
+abstract class AbstractListManager {
+    private final static String BULLET = "\u00b7";
+
+    protected Map<Integer, ParagraphLevelCounter> listLevelMap =
+            new HashMap<>();
+    protected Map<Integer, LevelTuple[]> overrideTupleMap = new HashMap<>();
+
+    //helper class that is docx/doc format agnostic
+    protected static class ParagraphLevelCounter {
+
+        //counts can == 0 if the format is decimal, make sure
+        //that flag values are < 0
+        private final Integer NOT_SEEN_YET = -1;
+        private final Integer FIRST_SKIPPED = -2;
+        private final LevelTuple[] levelTuples;
+        Pattern LEVEL_INTERPOLATOR = Pattern.compile("%(\\d+)");
+        private List<Integer> counts = new ArrayList<>();
+        private int lastLevel = -1;
+
+        public ParagraphLevelCounter(LevelTuple[] levelTuples) {
+            this.levelTuples = levelTuples;
+        }
+
+        public int getNumberOfLevels() {
+            return levelTuples.length;
+        }
+
+        /**
+         * Apply this to every numbered paragraph in order.
+         *
+         * @param levelNumber level number that is being incremented
+         * @return the new formatted number string for this level
+         */
+        public String incrementLevel(int levelNumber, LevelTuple[] overrideLevelTuples) {
+
+            for (int i = lastLevel + 1; i < levelNumber; i++) {
+                if (i >= counts.size()) {
+                    int val = getStart(i, overrideLevelTuples);
+                    counts.add(i, val);
+                } else {
+                    int count = counts.get(i);
+                    if (count == NOT_SEEN_YET) {
+                        count = getStart(i, overrideLevelTuples);
+                        counts.set(i, count);
+                    }
+                }
+            }
+
+            if (levelNumber < counts.size()) {
+                resetAfter(levelNumber, overrideLevelTuples);
+                int count = counts.get(levelNumber);
+                if (count == NOT_SEEN_YET) {
+                    count = getStart(levelNumber, overrideLevelTuples);
+                } else {
+                    count++;
+                }
+                counts.set(levelNumber, count);
+                lastLevel = levelNumber;
+                return format(levelNumber, overrideLevelTuples);
+            }
+
+            counts.add(levelNumber, getStart(levelNumber, overrideLevelTuples));
+            lastLevel = levelNumber;
+            return format(levelNumber, overrideLevelTuples);
+        }
+
+        /**
+         * @param level which level to format
+         * @return the string that represents the number and the surrounding text for this paragraph
+         */
+        private String format(int level, LevelTuple[] overrideLevelTuples) {
+            if (level < 0 || level >= levelTuples.length) {
+                //log?
+                return "";
+            }
+            boolean isLegal = (overrideLevelTuples != null) ? overrideLevelTuples[level].isLegal :
+                    levelTuples[level].isLegal;
+            //short circuit bullet
+            String numFmt = getNumFormat(level, isLegal, overrideLevelTuples);
+            if ("bullet".equals(numFmt)) {
+                return BULLET + " ";
+            }
+
+            String lvlText =
+                    (overrideLevelTuples == null || overrideLevelTuples[level].lvlText == null) ?
+                            levelTuples[level].lvlText : overrideLevelTuples[level].lvlText;
+            StringBuilder sb = new StringBuilder();
+            Matcher m = LEVEL_INTERPOLATOR.matcher(lvlText);
+            int last = 0;
+            while (m.find()) {
+                sb.append(lvlText, last, m.start());
+                String lvlString = m.group(1);
+                int lvlNum = -1;
+                try {
+                    lvlNum = Integer.parseInt(lvlString);
+                } catch (NumberFormatException e) {
+                    //swallow
+                }
+                String numString = "";
+                //need to subtract 1 because, e.g. %1 is the format
+                //for the number at array offset 0
+                numString = formatNum(lvlNum - 1, isLegal, overrideLevelTuples);
+
+                sb.append(numString);
+                last = m.end();
+            }
+            sb.append(lvlText.substring(last));
+            if (sb.length() > 0) {
+                //TODO: add in character after number
+                sb.append(" ");
+            }
+            return sb.toString();
+        }
+
+        //actual level number; can return empty string if numberformatter fails
+        private String formatNum(int lvlNum, boolean isLegal, LevelTuple[] overrideLevelTuples) {
+
+            int numFmtStyle = 0;
+            String numFmt = getNumFormat(lvlNum, isLegal, overrideLevelTuples);
+
+            int count = getCount(lvlNum);
+            if (count < 0) {
+                count = 1;
+            }
+            if ("lowerLetter".equals(numFmt)) {
+                numFmtStyle = 4;
+            } else if ("lowerRoman".equals(numFmt)) {
+                numFmtStyle = 2;
+            } else if ("decimal".equals(numFmt)) {
+                numFmtStyle = 0;
+            } else if ("upperLetter".equals(numFmt)) {
+                numFmtStyle = 3;
+            } else if ("upperRoman".equals(numFmt)) {
+                numFmtStyle = 1;
+            } else if ("bullet".equals(numFmt)) {
+                return "";
+                //not yet handled by NumberFormatter...TODO: add to NumberFormatter?
+            } else if ("ordinal".equals(numFmt)) {
+                return ordinalize(count);
+            } else if ("decimalZero".equals(numFmt)) {
+                return "0" + NumberFormatter.getNumber(count, 0);
+            } else if ("none".equals(numFmt)) {
+                return "";
+            }
+            try {
+                return NumberFormatter.getNumber(count, numFmtStyle);
+            } catch (IllegalArgumentException e) {
+                return "";
+            }
+        }
+
+        private String ordinalize(int count) {
+            //this is only good for locale == English
+            String countString = Integer.toString(count);
+            if (countString.endsWith("1")) {
+                return countString + "st";
+            } else if (countString.endsWith("2")) {
+                return countString + "nd";
+            } else if (countString.endsWith("3")) {
+                return countString + "rd";
+            }
+            return countString + "th";
+        }
+
+        private String getNumFormat(int lvlNum, boolean isLegal, LevelTuple[] overrideLevelTuples) {
+            if (lvlNum < 0 || lvlNum >= levelTuples.length) {
+                //log?
+                return "decimal";
+            }
+            if (isLegal) {
+                //return decimal no matter the level if isLegal is true
+                return "decimal";
+            }
+            return (overrideLevelTuples == null || overrideLevelTuples[lvlNum].numFmt == null) ?
+                    levelTuples[lvlNum].numFmt : overrideLevelTuples[lvlNum].numFmt;
+        }
+
+        private int getCount(int lvlNum) {
+            if (lvlNum < 0 || lvlNum >= counts.size()) {
+                //log?
+                return 1;
+            }
+            return counts.get(lvlNum);
+        }
+
+        private void resetAfter(int startlevelNumber, LevelTuple[] overrideLevelTuples) {
+            for (int levelNumber = startlevelNumber + 1; levelNumber < counts.size();
+                 levelNumber++) {
+                int cnt = counts.get(levelNumber);
+                if (cnt == NOT_SEEN_YET) {
+                    //do nothing
+                } else if (cnt == FIRST_SKIPPED) {
+                    //do nothing
+                } else if (levelTuples.length > levelNumber) {
+                    //never reset if restarts == 0
+                    int restart = (overrideLevelTuples == null ||
+                            overrideLevelTuples[levelNumber].restart < 0) ?
+                            levelTuples[levelNumber].restart :
+                            overrideLevelTuples[levelNumber].restart;
+                    if (restart == 0) {
+                        return;
+                    } else if (restart == -1 || startlevelNumber <= restart - 1) {
+                        counts.set(levelNumber, NOT_SEEN_YET);
+                    } else {
+                        //do nothing/don't reset
+                    }
+                } else {
+                    //reset!
+                    counts.set(levelNumber, NOT_SEEN_YET);
+                }
+            }
+        }
+
+        private int getStart(int levelNumber, LevelTuple[] overrideLevelTuples) {
+            if (levelNumber >= levelTuples.length) {
+                return 1;
+            } else {
+                return (overrideLevelTuples == null || overrideLevelTuples[levelNumber].start < 0) ?
+                        levelTuples[levelNumber].start : overrideLevelTuples[levelNumber].start;
+            }
+        }
+    }
+
+    protected static class LevelTuple {
+        private final int start;
+        private final int restart;
+        private final String lvlText;
+        private final String numFmt;
+        private final boolean isLegal;
+
+        public LevelTuple(String lvlText) {
+            this.lvlText = lvlText;
+            start = 1;
+            restart = -1;
+            numFmt = "decimal";
+            isLegal = false;
+        }
+
+        public LevelTuple(int start, int restart, String lvlText, String numFmt, boolean isLegal) {
+            this.start = start;
+            this.restart = restart;
+            this.lvlText = lvlText;
+            this.numFmt = numFmt;
+            this.isLegal = isLegal;
+        }
+    }
+}
diff --git a/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/ContentHandlerDecorator.java b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/ContentHandlerDecorator.java
new file mode 100644
index 00000000000..3a1c9eb72d1
--- /dev/null
+++ b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/ContentHandlerDecorator.java
@@ -0,0 +1,224 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.extractor.internal;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Decorator base class for the {@link ContentHandler} interface. This class
+ * simply delegates all SAX events calls to an underlying decorated handler
+ * instance. Subclasses can provide extra decoration by overriding one or more
+ * of the SAX event methods.
+ * <p>
+ * This is copied from Apache Tika.
+ * </p>
+ *
+ * @since POI 5.4.2
+ */
+public class ContentHandlerDecorator extends DefaultHandler {
+
+    /**
+     * Decorated SAX event handler.
+     */
+    private ContentHandler handler;
+
+    /**
+     * Creates a decorator for the given SAX event handler.
+     *
+     * @param handler SAX event handler to be decorated
+     */
+    public ContentHandlerDecorator(ContentHandler handler) {
+        assert handler != null;
+        this.handler = handler;
+    }
+
+    /**
+     * Creates a decorator that by default forwards incoming SAX events to
+     * a dummy content handler that simply ignores all the events. Subclasses
+     * should use the {@link #setContentHandler(ContentHandler)} method to
+     * switch to a more usable underlying content handler.
+     */
+    protected ContentHandlerDecorator() {
+        this(new DefaultHandler());
+    }
+
+    /**
+     * Sets the underlying content handler. All future SAX events will be
+     * directed to this handler instead of the one that was previously used.
+     *
+     * @param handler content handler
+     */
+    protected void setContentHandler(ContentHandler handler) {
+        assert handler != null;
+        this.handler = handler;
+    }
+
+    @Override
+    public void startPrefixMapping(String prefix, String uri) throws SAXException {
+        try {
+            handler.startPrefixMapping(prefix, uri);
+        } catch (SAXException e) {
+            handleException(e);
+        }
+    }
+
+    @Override
+    public void endPrefixMapping(String prefix) throws SAXException {
+        try {
+            handler.endPrefixMapping(prefix);
+        } catch (SAXException e) {
+            handleException(e);
+        }
+    }
+
+    @Override
+    public void processingInstruction(String target, String data) throws SAXException {
+        try {
+            handler.processingInstruction(target, data);
+        } catch (SAXException e) {
+            handleException(e);
+        }
+    }
+
+    @Override
+    public void setDocumentLocator(Locator locator) {
+        handler.setDocumentLocator(locator);
+    }
+
+    @Override
+    public void startDocument() throws SAXException {
+        try {
+            handler.startDocument();
+        } catch (SAXException e) {
+            handleException(e);
+        }
+    }
+
+    @Override
+    public void endDocument() throws SAXException {
+        try {
+            handler.endDocument();
+        } catch (SAXException e) {
+            handleException(e);
+        }
+    }
+
+    @Override
+    public void startElement(String uri, String localName, String name, Attributes atts)
+            throws SAXException {
+        try {
+            handler.startElement(uri, localName, name, atts);
+        } catch (SAXException e) {
+            handleException(e);
+        }
+    }
+
+    @Override
+    public void endElement(String uri, String localName, String name) throws SAXException {
+        try {
+            handler.endElement(uri, localName, name);
+        } catch (SAXException e) {
+            handleException(e);
+        }
+    }
+
+    @Override
+    public void characters(char[] ch, int start, int length) throws SAXException {
+        try {
+            handler.characters(ch, start, length);
+        } catch (SAXException e) {
+            handleException(e);
+        }
+    }
+
+    @Override
+    public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+        try {
+            handler.ignorableWhitespace(ch, start, length);
+        } catch (SAXException e) {
+            handleException(e);
+        }
+    }
+
+    @Override
+    public void skippedEntity(String name) throws SAXException {
+        try {
+            handler.skippedEntity(name);
+        } catch (SAXException e) {
+            handleException(e);
+        }
+    }
+
+    @Override
+    public String toString() {
+        return handler.toString();
+    }
+
+    /**
+     * Handle any exceptions thrown by methods in this class. This method
+     * provides a single place to implement custom exception handling. The
+     * default behaviour is simply to re-throw the given exception, but
+     * subclasses can also provide alternative ways of handling the situation.
+     *
+     * If the wrapped handler is itself a ContentHandlerDecorator, the call
+     * is delegated to the wrapped handler's {@link ContentHandlerDecorator#handleException(SAXException)}
+     *
+     * @param exception the exception that was thrown
+     * @throws SAXException the exception (if any) thrown to the client
+     */
+    protected void handleException(SAXException exception) throws SAXException {
+        if (handler instanceof ContentHandlerDecorator) {
+            ((ContentHandlerDecorator)handler).handleException(exception);
+        } else {
+            throw exception;
+        }
+    }
+
+    @Override
+    public void warning (SAXParseException exception) throws SAXException {
+        if (handler instanceof ErrorHandler) {
+            ((ErrorHandler)handler).warning(exception);
+        } else {
+            super.warning(exception);
+        }
+    }
+
+    @Override
+    public void error (SAXParseException exception) throws SAXException {
+        if (handler instanceof ErrorHandler) {
+            ((ErrorHandler)handler).error(exception);
+        } else {
+            super.error(exception);
+        }
+    }
+
+    @Override
+    public void fatalError (SAXParseException exception)
+            throws SAXException {
+        if (handler instanceof ErrorHandler) {
+            ((ErrorHandler)handler).fatalError(exception);
+        } else {
+            super.fatalError(exception);
+        }
+    }
+}
diff --git a/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/OOXMLWordAndPowerPointTextHandler.java b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/OOXMLWordAndPowerPointTextHandler.java
new file mode 100644
index 00000000000..3b3e21beaee
--- /dev/null
+++ b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/OOXMLWordAndPowerPointTextHandler.java
@@ -0,0 +1,634 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.extractor.internal;
+
+import java.text.DateFormat;
+import java.text.DateFormatSymbols;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.TimeZone;
+
+import org.apache.poi.xwpf.usermodel.UnderlinePatterns;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * This class is intended to handle anything that might contain IBodyElements:
+ * main document, headers, footers, notes, slides, etc.
+ * <p>
+ * <p/>
+ * <p>
+ * This class does not generally check for namespaces, and it can be applied
+ * to PPTX and DOCX for text extraction.
+ * <p>
+ * <p/>
+ * This can be used to scrape content from charts.  It currently ignores
+ * formula (&lt;c:f/&gt;) elements
+ * <p>
+ * <p/>
+ * This does not work with .xlsx or .vsdx.
+ * <p>
+ * <p>
+ * This is copied from Apache Tika.
+ * </p>
+ *
+ * @since POI 5.4.2
+ */
+
+public class OOXMLWordAndPowerPointTextHandler extends DefaultHandler {
+
+    public final static String W_NS =
+            "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
+    private final static String R = "r";
+    private final static String FLD = "fld";
+    private final static String RPR = "rPr";
+    private final static String P = "p";
+    private final static String P_STYLE = "pStyle";
+    private final static String PPR = "pPr";
+    private final static String T = "t";
+    private final static String TAB = "tab";
+    private final static String B = "b";
+    private final static String ILVL = "ilvl";
+    private final static String NUM_ID = "numId";
+    private final static String TC = "tc";
+    private final static String TR = "tr";
+    private final static String I = "i";
+    private final static String U = "u";
+    private final static String STRIKE = "strike";
+    private final static String NUM_PR = "numPr";
+    private final static String BR = "br";
+    private final static String HYPERLINK = "hyperlink";
+    private final static String HLINK_CLICK = "hlinkClick"; //pptx hlink
+    private final static String TBL = "tbl";
+    private final static String PIC = "pic";
+    private final static String PICT = "pict";
+    private final static String IMAGEDATA = "imagedata";
+    private final static String BLIP = "blip";
+    private final static String CHOICE = "Choice";
+    private final static String FALLBACK = "Fallback";
+    private final static String OLE_OBJECT = "OLEObject";
+    private final static String CR = "cr";
+    private final static String V = "v";
+    private final static String RUBY = "ruby"; //phonetic section
+    private final static String RT = "rt"; //phonetic run
+    private static final String VAL = "val";
+    private final static String MC_NS =
+            "http://schemas.openxmlformats.org/markup-compatibility/2006";
+    private final static String O_NS = "urn:schemas-microsoft-com:office:office";
+    private final static String PIC_NS = "http://schemas.openxmlformats.org/drawingml/2006/picture";
+    private final static String DRAWING_MAIN_NS =
+            "http://schemas.openxmlformats.org/drawingml/2006/main";
+    private final static String V_NS = "urn:schemas-microsoft-com:vml";
+    private final static String C_NS = "http://schemas.openxmlformats.org/drawingml/2006/chart";
+    private final static String OFFICE_DOC_RELATIONSHIP_NS =
+            "http://schemas.openxmlformats.org/officeDocument/2006/relationships";
+    private final static char[] TAB_CHAR = new char[]{'\t'};
+    private final static char NEWLINE = '\n';
+    private final static String BOOKMARK_START = "bookmarkStart";
+    private final static String BOOKMARK_END = "bookmarkEnd";
+    private final static String FOOTNOTE_REFERENCE = "footnoteReference";
+    private final static String INS = "ins";
+    private final static String DEL = "del";
+    private final static String DEL_TEXT = "delText";
+    private final static String MOVE_FROM = "moveFrom";
+    private final static String MOVE_TO = "moveTo";
+    private final static String ENDNOTE_REFERENCE = "endnoteReference";
+    private static final String TEXTBOX = "textbox";
+    private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
+    private static final TimeZone MIDDAY = TimeZone.getTimeZone("GMT-12:00");
+
+    private final XWPFBodyContentsHandler bodyContentsHandler;
+    private final Map<String, String> linkedRelationships;
+    private final RunProperties currRunProperties = new RunProperties();
+    private final ParagraphProperties currPProperties = new ParagraphProperties();
+    private final boolean includeTextBox;
+    private final boolean concatenatePhoneticRuns;
+    private final StringBuilder runBuffer = new StringBuilder();
+    private final StringBuilder rubyBuffer = new StringBuilder();
+    private boolean inR = false;
+    //in run or in field. TODO: convert this to an integer because you can have a run within a run
+    private boolean inT = false;
+    private boolean inRPr = false;
+    private boolean inNumPr = false;
+    private boolean inRt = false;
+    private boolean inPic = false;
+    private boolean inPict = false;
+    private String picDescription = null;
+    private String picRId = null;
+    private String picFilename = null;
+    //mechanism used to determine when to
+    //signal the start of the p, and still
+    //handle p with pPr and those without
+    private boolean lastStartElementWasP = false;
+    //have we signaled the start of a p?
+    //pPr can happen multiple times within a p
+    //<p><pPr/><r><t>text</t></r><pPr></p>
+    private boolean pStarted = false;
+    //alternate content can be embedded in itself.
+    //need to track depth.
+    //if in alternate, choose fallback, maybe make this configurable?
+    private int inACChoiceDepth = 0;
+    private int inACFallbackDepth = 0;
+    private boolean inDelText = false;
+    //buffers rt in ruby sections (see 17.3.3.25)
+    private boolean inHlinkClick = false;
+    private boolean inTextBox = false;
+    private boolean inV = false; //in c:v in chart file
+    private OOXMLWordAndPowerPointTextHandler.EditType editType =
+            OOXMLWordAndPowerPointTextHandler.EditType.NONE;
+    private final List<DateFormat> dateFormats;
+
+    public OOXMLWordAndPowerPointTextHandler(XWPFBodyContentsHandler bodyContentsHandler,
+                                             Map<String, String> hyperlinks) {
+        this(bodyContentsHandler, hyperlinks, true, true);
+    }
+
+    public OOXMLWordAndPowerPointTextHandler(XWPFBodyContentsHandler bodyContentsHandler,
+                                             Map<String, String> hyperlinks, boolean includeTextBox,
+                                             boolean concatenatePhoneticRuns) {
+        this.bodyContentsHandler = bodyContentsHandler;
+        this.linkedRelationships = hyperlinks;
+        this.includeTextBox = includeTextBox;
+        this.concatenatePhoneticRuns = concatenatePhoneticRuns;
+        this.dateFormats = loadDateFormats();
+    }
+
+    @Override
+    public void startDocument() throws SAXException {
+    }
+
+    @Override
+    public void endDocument() throws SAXException {
+    }
+
+    @Override
+    public void startPrefixMapping(String prefix, String uri) throws SAXException {
+    }
+
+    @Override
+    public void endPrefixMapping(String prefix) throws SAXException {
+    }
+
+    @Override
+    public void startElement(String uri, String localName, String qName, Attributes atts)
+            throws SAXException {
+        //TODO: checkBox, textBox, sym, headerReference, footerReference, commentRangeEnd
+
+        if (lastStartElementWasP && !PPR.equals(localName)) {
+            bodyContentsHandler.startParagraph(currPProperties);
+        }
+
+        lastStartElementWasP = false;
+
+        if (uri != null && uri.equals(MC_NS)) {
+            if (CHOICE.equals(localName)) {
+                inACChoiceDepth++;
+            } else if (FALLBACK.equals(localName)) {
+                inACFallbackDepth++;
+            }
+        }
+
+        if (inACChoiceDepth > 0) {
+            return;
+        }
+
+        if (!includeTextBox && localName.equals(TEXTBOX)) {
+            inTextBox = true;
+            return;
+        }
+        //these are sorted descending by frequency within docx files
+        //in our regression corpus.
+        //yes, I know, likely premature optimization...
+        if (RPR.equals(localName)) {
+            inRPr = true;
+        } else if (R.equals(localName)) {
+            inR = true;
+        } else if (T.equals(localName)) {
+            inT = true;
+        } else if (TAB.equals(localName)) {
+            runBuffer.append(TAB_CHAR);
+        } else if (P.equals(localName)) {
+            lastStartElementWasP = true;
+        } else if (B.equals(localName)) { //TODO: add bCs
+            if (inR && inRPr) {
+                currRunProperties.setBold(true);
+            }
+        } else if (TC.equals(localName)) {
+            bodyContentsHandler.startTableCell();
+        } else if (P_STYLE.equals(localName)) {
+            String styleId = atts.getValue(W_NS, "val");
+            currPProperties.setStyleID(styleId);
+        } else if (I.equals(localName)) { //TODO: add iCs
+            //rprs don't have to be inR; ignore those that aren't
+            if (inR && inRPr) {
+                currRunProperties.setItalics(true);
+            }
+        } else if (STRIKE.equals(localName)) {
+            if (inR && inRPr) {
+                currRunProperties.setStrike(true);
+            }
+        } else if (U.equals(localName)) {
+            if (inR && inRPr) {
+                currRunProperties.setUnderline(getStringVal(atts));
+            }
+        } else if (TR.equals(localName)) {
+            bodyContentsHandler.startTableRow();
+        } else if (NUM_PR.equals(localName)) {
+            inNumPr = true;
+        } else if (ILVL.equals(localName)) {
+            if (inNumPr) {
+                currPProperties.setIlvl(getIntVal(atts));
+            }
+        } else if (NUM_ID.equals(localName)) {
+            if (inNumPr) {
+                currPProperties.setNumId(getIntVal(atts));
+            }
+        } else if (BR.equals(localName)) {
+            runBuffer.append(NEWLINE);
+        } else if (BOOKMARK_START.equals(localName)) {
+            String name = atts.getValue(W_NS, "name");
+            String id = atts.getValue(W_NS, "id");
+            bodyContentsHandler.startBookmark(id, name);
+        } else if (BOOKMARK_END.equals(localName)) {
+            String id = atts.getValue(W_NS, "id");
+            bodyContentsHandler.endBookmark(id);
+        } else if (HYPERLINK.equals(localName)) { //docx hyperlink
+            String hyperlinkId = atts.getValue(OFFICE_DOC_RELATIONSHIP_NS, "id");
+            String hyperlink = null;
+            if (hyperlinkId != null) {
+                hyperlink = linkedRelationships.get(hyperlinkId);
+                bodyContentsHandler.hyperlinkStart(hyperlink);
+            } else {
+                String anchor = atts.getValue(W_NS, "anchor");
+                if (anchor != null) {
+                    anchor = "#" + anchor;
+                }
+                bodyContentsHandler.hyperlinkStart(anchor);
+            }
+        } else if (HLINK_CLICK.equals(localName)) { //pptx hyperlink
+            String hyperlinkId = atts.getValue(OFFICE_DOC_RELATIONSHIP_NS, "id");
+            String hyperlink = null;
+            if (hyperlinkId != null) {
+                hyperlink = linkedRelationships.get(hyperlinkId);
+                bodyContentsHandler.hyperlinkStart(hyperlink);
+                inHlinkClick = true;
+            }
+        } else if (TBL.equals(localName)) {
+            bodyContentsHandler.startTable();
+        } else if (BLIP.equals(localName)) { //check for DRAWING_NS
+            picRId = atts.getValue(OFFICE_DOC_RELATIONSHIP_NS, "embed");
+        } else if ("cNvPr".equals(localName)) { //check for PIC_NS?
+            picDescription = atts.getValue("", "descr");
+        } else if (PIC.equals(localName)) {
+            inPic = true; //check for PIC_NS?
+        } //TODO: add sdt, sdtPr, sdtContent goes here statistically
+        else if (FOOTNOTE_REFERENCE.equals(localName)) {
+            String id = atts.getValue(W_NS, "id");
+            bodyContentsHandler.footnoteReference(id);
+        } else if (IMAGEDATA.equals(localName)) {
+            picRId = atts.getValue(OFFICE_DOC_RELATIONSHIP_NS, "id");
+            picDescription = atts.getValue(O_NS, "title");
+        } else if (INS.equals(localName)) {
+            startEditedSection(editType.INSERT, atts);
+        } else if (DEL_TEXT.equals(localName)) {
+            inDelText = true;
+        } else if (DEL.equals(localName)) {
+            startEditedSection(editType.DELETE, atts);
+        } else if (MOVE_TO.equals(localName)) {
+            startEditedSection(EditType.MOVE_TO, atts);
+        } else if (MOVE_FROM.equals(localName)) {
+            startEditedSection(editType.MOVE_FROM, atts);
+        } else if (OLE_OBJECT.equals(localName)) { //check for O_NS?
+            String type = null;
+            String refId = null;
+            //TODO: clean this up and ...want to get ProgID?
+            for (int i = 0; i < atts.getLength(); i++) {
+                String attLocalName = atts.getLocalName(i);
+                String attValue = atts.getValue(i);
+                if (attLocalName.equals("Type")) {
+                    type = attValue;
+                } else if (OFFICE_DOC_RELATIONSHIP_NS.equals(atts.getURI(i)) &&
+                        attLocalName.equals("id")) {
+                    refId = attValue;
+                }
+            }
+            if ("Embed".equals(type)) {
+                bodyContentsHandler.embeddedOLERef(refId);
+            }
+        } else if (CR.equals(localName)) {
+            runBuffer.append(NEWLINE);
+        } else if (ENDNOTE_REFERENCE.equals(localName)) {
+            String id = atts.getValue(W_NS, "id");
+            bodyContentsHandler.endnoteReference(id);
+        } else if (V.equals(localName) && C_NS.equals(uri)) { // in value in a chart
+            inV = true;
+        } else if (RT.equals(localName)) {
+            inRt = true;
+        }
+
+    }
+
+    private void startEditedSection(EditType editType, Attributes atts) throws SAXException {
+        String editAuthor = atts.getValue(W_NS, "author");
+        String editDateString = atts.getValue(W_NS, "date");
+        Date editDate = null;
+        if (editDateString != null) {
+            editDate = tryToParseDate(editDateString);
+        }
+        bodyContentsHandler.startEditedSection(editAuthor, editDate, editType);
+        this.editType = editType;
+    }
+
+    private String getStringVal(Attributes atts) {
+        String valString = atts.getValue(W_NS, VAL);
+        if (valString != null) {
+            return valString;
+        }
+        return "";
+    }
+
+    private int getIntVal(Attributes atts) {
+        String valString = atts.getValue(W_NS, VAL);
+        if (valString != null) {
+            try {
+                return Integer.parseInt(valString);
+            } catch (NumberFormatException e) {
+                //swallow
+            }
+        }
+        return -1;
+    }
+
+    @Override
+    public void endElement(String uri, String localName, String qName) throws SAXException {
+
+        if (CHOICE.equals(localName)) {
+            inACChoiceDepth--;
+        } else if (FALLBACK.equals(localName)) {
+            inACFallbackDepth--;
+        }
+        if (inACChoiceDepth > 0) {
+            return;
+        }
+
+        if (!includeTextBox && localName.equals(TEXTBOX)) {
+            inTextBox = false;
+            return;
+        }
+        if (PIC.equals(localName)) { //PIC_NS
+            handlePict();
+            inPic = false;
+            return;
+        } else if (RPR.equals(localName)) {
+            inRPr = false;
+        } else if (R.equals(localName)) {
+            handleEndOfRun();
+        } else if (T.equals(localName)) {
+            inT = false;
+        } else if (PPR.equals(localName)) {
+            if (!pStarted) {
+                bodyContentsHandler.startParagraph(currPProperties);
+                pStarted = true;
+            }
+            currPProperties.reset();
+        } else if (P.equals(localName)) {
+            if (runBuffer.length() > 0) {
+                //<p><tab></p>...this will treat that as if it were
+                //a run...TODO: should we swallow whitespace that doesn't occur in a run?
+                bodyContentsHandler.run(currRunProperties, runBuffer.toString());
+                runBuffer.setLength(0);
+            }
+            pStarted = false;
+            bodyContentsHandler.endParagraph();
+        } else if (TC.equals(localName)) {
+            bodyContentsHandler.endTableCell();
+        } else if (TR.equals(localName)) {
+            bodyContentsHandler.endTableRow();
+        } else if (TBL.equals(localName)) {
+            bodyContentsHandler.endTable();
+        } else if (FLD.equals(localName)) {
+            handleEndOfRun();
+        } else if (DEL_TEXT.equals(localName)) {
+            inDelText = false;
+        } else if (INS.equals(localName) || DEL.equals(localName) || MOVE_TO.equals(localName) ||
+                MOVE_FROM.equals(localName)) {
+            editType = EditType.NONE;
+        } else if (HYPERLINK.equals(localName)) {
+            bodyContentsHandler.hyperlinkEnd();
+        } else if (PICT.equals(localName)) {
+            handlePict();
+        } else if (V.equals(localName) && C_NS.equals(uri)) { // in value in a chart
+            inV = false;
+            handleEndOfRun();
+        } else if (RT.equals(localName)) {
+            inRt = false;
+        } else if (RUBY.equals(localName)) {
+            handleEndOfRuby();
+        }
+    }
+
+    private void handleEndOfRuby() throws SAXException {
+        if (rubyBuffer.length() > 0) {
+            if (concatenatePhoneticRuns) {
+                bodyContentsHandler.run(currRunProperties, " (" + rubyBuffer.toString() + ")");
+            }
+            rubyBuffer.setLength(0);
+        }
+    }
+
+    private void handleEndOfRun() throws SAXException {
+        bodyContentsHandler.run(currRunProperties, runBuffer.toString());
+        if (inHlinkClick) {
+            bodyContentsHandler.hyperlinkEnd();
+            inHlinkClick = false;
+        }
+        inR = false;
+        runBuffer.setLength(0);
+        currRunProperties.setBold(false);
+        currRunProperties.setItalics(false);
+        currRunProperties.setStrike(false);
+        currRunProperties.setUnderline(UnderlinePatterns.NONE.name());
+    }
+
+    private void handlePict() throws SAXException {
+        String picFileName = null;
+        if (picRId != null) {
+            picFileName = linkedRelationships.get(picRId);
+        }
+        bodyContentsHandler.embeddedPicRef(picFileName, picDescription);
+        picDescription = null;
+        picRId = null;
+        inPic = false;
+    }
+
+    @Override
+    public void characters(char[] ch, int start, int length) throws SAXException {
+
+        if (inACChoiceDepth > 0) {
+            return;
+        } else if (!includeTextBox && inTextBox) {
+            return;
+        }
+
+        if (editType.equals(EditType.MOVE_FROM) && inT) {
+            if (bodyContentsHandler.isIncludeMoveFromText()) {
+                appendToBuffer(ch, start, length);
+            }
+        } else if (inT) {
+            appendToBuffer(ch, start, length);
+        } else if (bodyContentsHandler.isIncludeDeletedText() && editType.equals(EditType.DELETE)) {
+            appendToBuffer(ch, start, length);
+        } else if (inV) {
+            appendToBuffer(ch, start, length);
+            appendToBuffer(TAB_CHAR, 0, 1);
+        }
+    }
+
+    @Override
+    public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+        if (inACChoiceDepth > 0) {
+            return;
+        } else if (!includeTextBox && inTextBox) {
+            return;
+        }
+
+        if (inT) {
+            appendToBuffer(ch, start, length);
+        } else if (bodyContentsHandler.isIncludeDeletedText() && inDelText) {
+            appendToBuffer(ch, start, length);
+        }
+    }
+
+    private void appendToBuffer(char[] ch, int start, int length) throws SAXException {
+        if (inRt) {
+            rubyBuffer.append(ch, start, length);
+        } else {
+            runBuffer.append(ch, start, length);
+        }
+    }
+
+    /**
+     * Tries to parse the date string; returns null if no parse was possible.
+     * <p>
+     * This is not thread safe!
+     */
+    private Date tryToParseDate(String dateString) {
+        // Java doesn't like timezones in the form ss+hh:mm
+        // It only likes the hhmm form, without the colon
+        int n = dateString.length();
+        if (dateString.charAt(n - 3) == ':' &&
+                (dateString.charAt(n - 6) == '+' || dateString.charAt(n - 6) == '-')) {
+            dateString = dateString.substring(0, n - 3) + dateString.substring(n - 2);
+        }
+
+        for (DateFormat df : dateFormats) {
+            try {
+                return df.parse(dateString);
+            } catch (java.text.ParseException e) {
+                //swallow
+            }
+        }
+        return null;
+    }
+
+    private static List<DateFormat> loadDateFormats() {
+        List<DateFormat> dateFormats = new ArrayList<>();
+        // yyyy-mm-ddThh...
+        dateFormats.add(createDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", UTC));   // UTC/Zulu
+        dateFormats.add(createDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", null));    // With timezone
+        dateFormats.add(createDateFormat("yyyy-MM-dd'T'HH:mm:ss", null));     // Without timezone
+        // yyyy-mm-dd hh...
+        dateFormats.add(createDateFormat("yyyy-MM-dd' 'HH:mm:ss'Z'", UTC));   // UTC/Zulu
+        dateFormats.add(createDateFormat("yyyy-MM-dd' 'HH:mm:ssZ", null));    // With timezone
+        dateFormats.add(createDateFormat("yyyy-MM-dd' 'HH:mm:ss", null));     // Without timezone
+        // Date without time, set to Midday UTC
+        dateFormats.add(createDateFormat("yyyy-MM-dd", MIDDAY));       // Normal date format
+        dateFormats.add(createDateFormat("yyyy:MM:dd",
+                MIDDAY));              // Image (IPTC/EXIF) format
+
+        return dateFormats;
+    }
+
+    private static DateFormat createDateFormat(String format, TimeZone timezone) {
+        final SimpleDateFormat sdf = new SimpleDateFormat(format, new DateFormatSymbols(Locale.US));
+        if (timezone != null) {
+            sdf.setTimeZone(timezone);
+        }
+        return sdf;
+    }
+
+    public enum EditType {
+        NONE, INSERT, DELETE, MOVE_TO, MOVE_FROM
+    }
+
+    public interface XWPFBodyContentsHandler {
+
+        void run(RunProperties runProperties, String contents) throws SAXException;
+
+        /**
+         * @param link the link; can be null
+         */
+        void hyperlinkStart(String link) throws SAXException;
+
+        void hyperlinkEnd() throws SAXException;
+
+        void startParagraph(ParagraphProperties paragraphProperties) throws SAXException;
+
+        void endParagraph() throws SAXException;
+
+        void startTable() throws SAXException;
+
+        void endTable() throws SAXException;
+
+        void startTableRow() throws SAXException;
+
+        void endTableRow() throws SAXException;
+
+        void startTableCell() throws SAXException;
+
+        void endTableCell() throws SAXException;
+
+        void startSDT() throws SAXException;
+
+        void endSDT() throws SAXException;
+
+        void startEditedSection(String editor, Date date, EditType editType) throws SAXException;
+
+        void endEditedSection() throws SAXException;
+
+        boolean isIncludeDeletedText() throws SAXException;
+
+        void footnoteReference(String id) throws SAXException;
+
+        void endnoteReference(String id) throws SAXException;
+
+        boolean isIncludeMoveFromText() throws SAXException;
+
+        void embeddedOLERef(String refId) throws SAXException;
+
+        void embeddedPicRef(String picFileName, String picDescription) throws SAXException;
+
+        void startBookmark(String id, String name) throws SAXException;
+
+        void endBookmark(String id) throws SAXException;
+    }
+}
diff --git a/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/OfflineContentHandler.java b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/OfflineContentHandler.java
new file mode 100644
index 00000000000..47edc2455ec
--- /dev/null
+++ b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/OfflineContentHandler.java
@@ -0,0 +1,50 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.extractor.internal;
+
+import org.apache.commons.io.input.ClosedInputStream;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+
+/**
+ * Content handler decorator that always returns an empty stream from the
+ * {@link #resolveEntity(String, String)} method to prevent potential
+ * network or other external resources from being accessed by an XML parser.
+ * <p>
+ * This is copied from Apache Tika.
+ * </p>
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-185">TIKA-185</a>
+ * @since POI 5.4.2
+ */
+final class OfflineContentHandler extends ContentHandlerDecorator {
+
+    public OfflineContentHandler(ContentHandler handler) {
+        super(handler);
+    }
+
+    /**
+     * Returns an empty stream. This will make an XML parser silently
+     * ignore any external entities.
+     */
+    @Override
+    public InputSource resolveEntity(String publicId, String systemId) {
+        return new InputSource(new ClosedInputStream());
+    }
+
+}
+
diff --git a/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/ParagraphProperties.java b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/ParagraphProperties.java
new file mode 100644
index 00000000000..d77e7d1f79e
--- /dev/null
+++ b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/ParagraphProperties.java
@@ -0,0 +1,61 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.extractor.internal;
+
+/**
+ * <p>
+ * This is copied from Apache Tika.
+ * </p>
+ *
+ * @since POI 5.4.2
+ */
+public class ParagraphProperties {
+
+    private String styleId;
+    private int ilvl = -1;
+    private int numId = -1;
+
+    public String getStyleID() {
+        return styleId;
+    }
+
+    public void setStyleID(String styleId) {
+        this.styleId = styleId;
+    }
+
+    public void reset() {
+        styleId = null;
+        ilvl = -1;
+        numId = -1;
+    }
+
+    public int getIlvl() {
+        return ilvl;
+    }
+
+    public void setIlvl(int ilvl) {
+        this.ilvl = ilvl;
+    }
+
+    public int getNumId() {
+        return numId;
+    }
+
+    public void setNumId(int numId) {
+        this.numId = numId;
+    }
+}
diff --git a/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/RunProperties.java b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/RunProperties.java
new file mode 100644
index 00000000000..2feb7646cec
--- /dev/null
+++ b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/RunProperties.java
@@ -0,0 +1,76 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.extractor.internal;
+
+import org.apache.poi.xwpf.usermodel.UnderlinePatterns;
+
+/**
+ * WARNING: This class is mutable.  Make a copy of it
+ * if you want persistence!
+ * <p>
+ * This is copied from Apache Tika.
+ * </p>
+ *
+ * @since POI 5.4.2
+ */
+public final class RunProperties {
+    boolean italics = false;
+    boolean bold = false;
+    boolean strikeThrough = false;
+
+    UnderlinePatterns underline = UnderlinePatterns.NONE;
+
+    public boolean isItalics() {
+        return italics;
+    }
+
+    public void setItalics(boolean italics) {
+        this.italics = italics;
+    }
+
+    public boolean isBold() {
+        return bold;
+    }
+
+    public void setBold(boolean bold) {
+        this.bold = bold;
+    }
+
+    public boolean isStrikeThrough() {
+        return strikeThrough;
+    }
+
+    public void setStrike(boolean strikeThrough) {
+        this.strikeThrough = strikeThrough;
+    }
+
+    public UnderlinePatterns getUnderline() {
+        return underline;
+    }
+
+    public void setUnderline(String underlineString) {
+        if (underlineString == null || underlineString.equals("")) {
+            underline = UnderlinePatterns.SINGLE;
+        } else if (UnderlinePatterns.NONE.name().equals(underlineString)) {
+            underline = UnderlinePatterns.NONE;
+        } else {
+            //TODO -- fill out rest
+            underline = UnderlinePatterns.SINGLE;
+        }
+    }
+}
+
diff --git a/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/XMLReaderUtils.java b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/XMLReaderUtils.java
new file mode 100644
index 00000000000..d5abe5f9b83
--- /dev/null
+++ b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/XMLReaderUtils.java
@@ -0,0 +1,50 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.extractor.internal;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Serializable;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+
+import org.apache.poi.util.XMLHelper;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * <p>
+ * This is copied from Apache Tika.
+ * </p>
+ *
+ * @since POI 5.4.2
+ */
+public final class XMLReaderUtils implements Serializable {
+
+    /**
+     * This checks context for a user specified {@link SAXParser}.
+     * If one is not found, this reuses a SAXParser from the pool.
+     */
+    public static void parseSAX(InputStream is, ContentHandler contentHandler)
+            throws IOException, SAXException {
+        try {
+            XMLHelper.getSaxParserFactory().newSAXParser().parse(is, new OfflineContentHandler(contentHandler));
+        } catch (ParserConfigurationException e) {
+            throw new SAXException(e);
+        }
+    }
+}
diff --git a/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/XWPFListManager.java b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/XWPFListManager.java
new file mode 100644
index 00000000000..6e458d8f70b
--- /dev/null
+++ b/poi-ooxml/src/main/java/org/apache/poi/xwpf/extractor/internal/XWPFListManager.java
@@ -0,0 +1,193 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.extractor.internal;
+
+import java.math.BigInteger;
+
+import org.apache.poi.xwpf.usermodel.XWPFAbstractNum;
+import org.apache.poi.xwpf.usermodel.XWPFNum;
+import org.apache.poi.xwpf.usermodel.XWPFNumbering;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTAbstractNum;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTLvl;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTNum;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTNumLvl;
+
+/**
+ * <p>
+ * This is copied from Apache Tika.
+ * </p>
+ *
+ * @since POI 5.4.2
+ */
+public class XWPFListManager extends AbstractListManager {
+
+    /**
+     * Empty singleton to be used when there is no list manager.
+     * Always returns empty string.
+     */
+    public final static XWPFListManager EMPTY_LIST = new EmptyListManager();
+    private final static String SKIP_FORMAT = Character.toString((char) 61623);
+//if this shows up as the lvlText, don't show a number
+
+    private final XWPFNumbering numbering;
+
+    //map of numId (which paragraph series is this a member of?), levelcounts
+    public XWPFListManager(XWPFNumbering numbering) {
+        this.numbering = numbering;
+    }
+
+    /**
+     * @param paragraph paragraph
+     * @return the formatted number or an empty string if something went wrong
+     */
+    public String getFormattedNumber(final XWPFParagraph paragraph) {
+        return getFormattedNumber(paragraph.getNumID(),
+                paragraph.getNumIlvl() == null ? -1 : paragraph.getNumIlvl().intValue());
+    }
+
+    public String getFormattedNumber(BigInteger numId, int iLvl) {
+        if (numbering == null || iLvl < 0 || numId == null) {
+            return "";
+        }
+
+        int currNumId = numId.intValue();
+
+        XWPFNum xwpfNum = numbering.getNum(numId);
+
+        if (xwpfNum == null) {
+            return "";
+        }
+        CTNum ctNum = xwpfNum.getCTNum();
+        CTDecimalNumber abNum = ctNum.getAbstractNumId();
+        int currAbNumId = abNum.getVal().intValue();
+
+        ParagraphLevelCounter lc = listLevelMap.get(currAbNumId);
+        LevelTuple[] overrideTuples = overrideTupleMap.get(currNumId);
+        if (lc == null) {
+            lc = loadLevelTuples(abNum);
+        }
+        if (overrideTuples == null) {
+            overrideTuples = loadOverrideTuples(ctNum, lc.getNumberOfLevels());
+        }
+
+        String formattedString = lc.incrementLevel(iLvl, overrideTuples);
+
+        listLevelMap.put(currAbNumId, lc);
+        overrideTupleMap.put(currNumId, overrideTuples);
+
+        return formattedString;
+
+    }
+
+    private LevelTuple[] loadOverrideTuples(CTNum ctNum, int length) {
+        LevelTuple[] levelTuples = new LevelTuple[length];
+        int overrideLength = ctNum.sizeOfLvlOverrideArray();
+        if (overrideLength == 0) {
+            return null;
+        }
+        for (int i = 0; i < length; i++) {
+            LevelTuple tuple;
+            if (i >= overrideLength) {
+                tuple = new LevelTuple("%" + i + ".");
+            } else {
+                CTNumLvl ctNumLvl = ctNum.getLvlOverrideArray(i);
+                if (ctNumLvl != null) {
+                    tuple = buildTuple(i, ctNumLvl.getLvl());
+                } else {
+                    tuple = new LevelTuple("%" + i + ".");
+                }
+            }
+            levelTuples[i] = tuple;
+        }
+        return levelTuples;
+    }
+
+
+    private ParagraphLevelCounter loadLevelTuples(CTDecimalNumber abNum) {
+        //Unfortunately, we need to go this far into the underlying structure
+        //to get the abstract num information for the edge case where
+        //someone skips a level and the format is not context-free, e.g. "1.B.i".
+        XWPFAbstractNum abstractNum = numbering.getAbstractNum(abNum.getVal());
+        CTAbstractNum ctAbstractNum = abstractNum.getCTAbstractNum();
+
+        LevelTuple[] levels = new LevelTuple[ctAbstractNum.sizeOfLvlArray()];
+        for (int i = 0; i < levels.length; i++) {
+            levels[i] = buildTuple(i, ctAbstractNum.getLvlArray(i));
+        }
+        return new ParagraphLevelCounter(levels);
+    }
+
+    private LevelTuple buildTuple(int level, CTLvl ctLvl) {
+        boolean isLegal = false;
+        int start = 1;
+        int restart = -1;
+        String lvlText = "%" + level + ".";
+        String numFmt = "decimal";
+
+
+        if (ctLvl != null && ctLvl.getIsLgl() != null) {
+            isLegal = true;
+        }
+
+        if (ctLvl != null && ctLvl.getNumFmt() != null && ctLvl.getNumFmt().getVal() != null) {
+            numFmt = ctLvl.getNumFmt().getVal().toString();
+        }
+        if (ctLvl != null && ctLvl.getLvlRestart() != null &&
+                ctLvl.getLvlRestart().getVal() != null) {
+            restart = ctLvl.getLvlRestart().getVal().intValue();
+        }
+        if (ctLvl != null && ctLvl.getStart() != null && ctLvl.getStart().getVal() != null) {
+            start = ctLvl.getStart().getVal().intValue();
+        } else {
+
+            //this is a hack. Currently, this gets the lowest possible
+            //start for a given numFmt.  We should probably try to grab the
+            //restartNumberingAfterBreak value in
+            //e.g. <w:abstractNum w:abstractNumId="12" w15:restartNumberingAfterBreak="0">???
+            if ("decimal".equals(numFmt) || "ordinal".equals(numFmt) ||
+                    "decimalZero".equals(numFmt)) {
+                start = 0;
+            } else {
+                start = 1;
+            }
+        }
+        if (ctLvl != null && ctLvl.getLvlText() != null && ctLvl.getLvlText().getVal() != null) {
+            lvlText = ctLvl.getLvlText().getVal();
+        }
+        return new LevelTuple(start, restart, lvlText, numFmt, isLegal);
+    }
+
+
+    private static class EmptyListManager extends XWPFListManager {
+        EmptyListManager() {
+            super(null);
+        }
+
+        @Override
+        public String getFormattedNumber(XWPFParagraph paragraph) {
+            return "";
+        }
+
+        @Override
+        public String getFormattedNumber(BigInteger numId, int iLvl) {
+            return "";
+        }
+
+    }
+}
diff --git a/poi-ooxml/src/test/java/org/apache/poi/xwpf/XWPFTestDataSamples.java b/poi-ooxml/src/test/java/org/apache/poi/xwpf/XWPFTestDataSamples.java
index 510765d318e..3701336bb18 100644
--- a/poi-ooxml/src/test/java/org/apache/poi/xwpf/XWPFTestDataSamples.java
+++ b/poi-ooxml/src/test/java/org/apache/poi/xwpf/XWPFTestDataSamples.java
@@ -21,6 +21,8 @@ Licensed to the Apache Software Foundation (ASF) under one or more
 
 import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
 import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.util.IOUtils;
 import org.apache.poi.xwpf.usermodel.XWPFDocument;
 
@@ -31,6 +33,11 @@ public static XWPFDocument openSampleDocument(String sampleName) throws IOExcept
         return new XWPFDocument(is);
     }
 
+    public static OPCPackage openSampleOPCPackage(String sampleName) throws IOException, InvalidFormatException {
+        InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream(sampleName);
+        return OPCPackage.open(is);
+    }
+
     public static XWPFDocument writeOutAndReadBack(XWPFDocument doc) throws IOException {
         UnsynchronizedByteArrayOutputStream baos = UnsynchronizedByteArrayOutputStream.builder().setBufferSize(4096).get();
         doc.write(baos);
diff --git a/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
index 90f4c817a51..c44e4df4d4c 100644
--- a/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/poi-ooxml/src/test/java/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@@ -33,6 +33,7 @@ Licensed to the Apache Software Foundation (ASF) under one or more
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.util.StringUtil;
 import org.apache.poi.xssf.usermodel.XSSFRelation;
 import org.apache.poi.xwpf.XWPFTestDataSamples;
@@ -40,6 +41,7 @@ Licensed to the Apache Software Foundation (ASF) under one or more
 import org.apache.poi.xwpf.usermodel.XWPFSDT;
 import org.apache.xmlbeans.XmlCursor;
 import org.apache.xmlbeans.XmlObject;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRow;
@@ -57,11 +59,12 @@ class TestXWPFWordExtractor {
      */
     @Test
     void testGetSimpleText() throws IOException {
-        try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("sample.docx");
-            XWPFWordExtractor extractor = new XWPFWordExtractor(doc)) {
-
+        try (
+                XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("sample.docx");
+                XWPFWordExtractor extractor = new XWPFWordExtractor(doc)
+        ) {
             String text = extractor.getText();
-            assertTrue(text.length() > 0);
+            assertFalse(text.isEmpty());
 
             // Check contents
             assertStartsWith(text,
@@ -77,6 +80,27 @@ void testGetSimpleText() throws IOException {
         }
     }
 
+    @Test
+    void testGetSimpleTextEventBased() throws Exception {
+        try (
+                OPCPackage pkg = XWPFTestDataSamples.openSampleOPCPackage("sample.docx");
+                XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(pkg)
+        ) {
+            String text = extractor.getText();
+            assertFalse(text.isEmpty());
+
+            // result is a bit different from the one in testGetSimpleText (extra whitespace)
+
+            // Check contents
+            assertContains(text,
+                    "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio."
+            );
+            assertContains(text,
+                    "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n"
+            );
+        }
+    }
+
     /**
      * Tests getting the text out of a complex file
      */
@@ -86,7 +110,7 @@ void testGetComplexText() throws IOException {
             XWPFWordExtractor extractor = new XWPFWordExtractor(doc)) {
 
             String text = extractor.getText();
-            assertTrue(text.length() > 0);
+            assertFalse(text.isEmpty());
 
             char euro = '\u20ac';
 
@@ -107,6 +131,31 @@ void testGetComplexText() throws IOException {
         }
     }
 
+    @Test
+    void testGetComplexTextEventBased() throws Exception {
+        try (OPCPackage pkg = XWPFTestDataSamples.openSampleOPCPackage("IllustrativeCases.docx");
+             XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(pkg)) {
+
+            String text = extractor.getText();
+            assertFalse(text.isEmpty());
+
+            char euro = '\u20ac';
+
+            // Check contents
+            assertStartsWith(text,
+                    "  \n(V) ILLUSTRATIVE CASES\n\n"
+            );
+            assertContains(text,
+                    "As well as gaining " + euro + "90 from child benefit increases, he will also receive the early childhood supplement of " + euro + "250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n"// \n\n\n"
+            );
+
+            // TODO find out why this fails
+            //assertEndsWith(text,
+            //        "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n \n\n\n"
+            //);
+        }
+    }
+
     @Test
     void testGetWithHyperlinks() throws IOException {
         try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("TestDocument.docx");
@@ -234,6 +283,16 @@ void testInsertedDeletedText() throws IOException {
         }
     }
 
+    @Test
+    void testInsertedDeletedTextEventBased() throws Exception {
+        try (OPCPackage pkg = XWPFTestDataSamples.openSampleOPCPackage("delins.docx");
+             XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(pkg)) {
+
+            assertContains(extractor.getText(), "pendant worn");
+            assertContains(extractor.getText(), "extremely well");
+        }
+    }
+
     @Test
     void testParagraphHeader() throws IOException {
         try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Headers.docx");
@@ -245,6 +304,17 @@ void testParagraphHeader() throws IOException {
         }
     }
 
+    @Test
+    void testParagraphHeaderEventBased() throws Exception {
+        try (OPCPackage pkg = XWPFTestDataSamples.openSampleOPCPackage("Headers.docx");
+             XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(pkg)) {
+
+            assertContains(extractor.getText(), "Section 1");
+            assertContains(extractor.getText(), "Section 2");
+            assertContains(extractor.getText(), "Section 3");
+        }
+    }
+
     /**
      * Test that we can open and process .docm
      * (macro enabled) docx files (bug #45690)
@@ -260,6 +330,18 @@ void testDOCMFiles() throws IOException {
         }
     }
 
+    @Disabled
+    @Test
+    void testDOCMFilesEventBased() throws Exception {
+        try (OPCPackage pkg = XWPFTestDataSamples.openSampleOPCPackage("45690.docm");
+             XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(pkg)) {
+
+            assertContains(extractor.getText(), "2004");
+            assertContains(extractor.getText(), "2008");
+            assertContains(extractor.getText(), "(120 ");
+        }
+    }
+
     /**
      * Test that we handle things like tabs and
      * carriage returns properly in the text that
@@ -289,7 +371,18 @@ void testNoFieldCodes() throws IOException {
         try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("FieldCodes.docx");
             XWPFWordExtractor extractor = new XWPFWordExtractor(doc)) {
             String text = extractor.getText();
-            assertTrue(text.length() > 0);
+            assertFalse(text.isEmpty());
+            assertFalse(text.contains("AUTHOR"));
+            assertFalse(text.contains("CREATEDATE"));
+        }
+    }
+
+    @Test
+    void testNoFieldCodesEventBased() throws Exception {
+        try (OPCPackage pkg = XWPFTestDataSamples.openSampleOPCPackage("FieldCodes.docx");
+             XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(pkg)) {
+            String text = extractor.getText();
+            assertFalse(text.isEmpty());
             assertFalse(text.contains("AUTHOR"));
             assertFalse(text.contains("CREATEDATE"));
         }
@@ -304,7 +397,7 @@ void testFldSimpleContent() throws IOException {
         try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("FldSimple.docx");
             XWPFWordExtractor extractor = new XWPFWordExtractor(doc)) {
             String text = extractor.getText();
-            assertTrue(text.length() > 0);
+            assertFalse(text.isEmpty());
             assertContains(text, "FldSimple.docx");
         }
     }
@@ -318,7 +411,7 @@ void testDrawings() throws IOException {
         try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("drawing.docx");
             XWPFWordExtractor extractor = new XWPFWordExtractor(doc)) {
             String text = extractor.getText();
-            assertTrue(text.length() > 0);
+            assertFalse(text.isEmpty());
         }
     }
 
@@ -465,8 +558,23 @@ void testGlossary() throws IOException {
 
     @Test
     void testPartsInTemplate() throws IOException {
-        try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("60316b.dotx")) {
-            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+        try (
+                XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("60316b.dotx");
+                XWPFWordExtractor extractor = new XWPFWordExtractor(doc)
+        ) {
+            String txt = extractor.getText();
+            assertContains(txt, "header 2");
+            assertContains(txt, "footer 1");
+        }
+    }
+
+    @Disabled // parts in template not supported in event based
+    @Test
+    void testPartsInTemplateEventBased() throws Exception {
+        try (
+                OPCPackage pkg = XWPFTestDataSamples.openSampleOPCPackage("60316b.dotx");
+                XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(pkg)
+        ) {
             String txt = extractor.getText();
             assertContains(txt, "header 2");
             assertContains(txt, "footer 1");
@@ -475,17 +583,33 @@ void testPartsInTemplate() throws IOException {
 
     @Test
     void bug55966() throws IOException  {
-        try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("55966.docx")) {
+        try (
+                XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("55966.docx");
+                XWPFWordExtractor extractedDoc = new XWPFWordExtractor(doc)
+        ) {
             String expected = "Content control within a paragraph is here text content from within a paragraph second control with a new\n" +
                     "line\n" +
                     "\n" +
                     "Content control that is the entire paragraph\n";
 
-            XWPFWordExtractor extractedDoc = new XWPFWordExtractor(doc);
-
             String actual = extractedDoc.getText();
+            assertEquals(expected, actual);
+        }
+    }
 
-            extractedDoc.close();
+    @Disabled // extra test found in the event based extractor
+    @Test
+    void bug55966EventBased() throws Exception  {
+        try (
+                OPCPackage pkg = XWPFTestDataSamples.openSampleOPCPackage("55966.docx");
+                XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(pkg)
+        ) {
+            String expected = "Content control within a paragraph is here text content from within a paragraph second control with a new\n" +
+                    "line\n" +
+                    "\n" +
+                    "Content control that is the entire paragraph\n";
+
+            String actual = extractor.getText();
             assertEquals(expected, actual);
         }
     }
@@ -499,6 +623,16 @@ void testCapitalizedFlag() throws IOException {
         }
     }
 
+    @Disabled // capitalized flag not supported in event based
+    @Test
+    void testCapitalizedFlagEventBased() throws Exception {
+        try (OPCPackage pkg = XWPFTestDataSamples.openSampleOPCPackage("capitalized.docx");
+             XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(pkg)) {
+            String txt = extractor.getText();
+            assertEquals( "The following word is: CAPITALIZED.", txt.trim());
+        }
+    }
+
     @Test
     void testTika2163() throws IOException {
         try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("ChronologicalResume.dotx");
@@ -508,6 +642,18 @@ void testTika2163() throws IOException {
         }
     }
 
+    @Test
+    void testTika2163EventBased() throws Exception {
+        final String filename = "ChronologicalResume.dotx";
+        try (
+                OPCPackage pkg = XWPFTestDataSamples.openSampleOPCPackage(filename);
+                XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(pkg)
+        ) {
+            String txt = extractor.getText();
+            assertContains(txt, "but a great-looking résumé doesn’t have to be!");
+        }
+    }
+
     @Test
     void testTika3816() throws IOException {
         try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("tika-3816.docx");
@@ -519,6 +665,19 @@ void testTika3816() throws IOException {
         }
     }
 
+    @Disabled // whitespace issue in text
+    @Test
+    void testTika3816EventBased() throws Exception {
+        final String filename = "tika-3816.docx";
+        try (
+                OPCPackage pkg = XWPFTestDataSamples.openSampleOPCPackage(filename);
+                XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(pkg)
+        ) {
+            String txt = extractor.getText();
+            assertContains(txt, "Note\tDetails");
+        }
+    }
+
     private static List<XWPFSDT> extractSDTsFromBody(XWPFDocument document) {
         XWPFSDT sdt;
         XmlCursor xmlcursor = document.getDocument().getBody().newCursor();
diff --git a/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/NumberFormatter.java b/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/NumberFormatter.java
index 483c227c23b..a84d851cb6d 100644
--- a/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/NumberFormatter.java
+++ b/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/NumberFormatter.java
@@ -19,9 +19,6 @@
 
 package org.apache.poi.hwpf.converter;
 
-import java.util.Arrays;
-import java.util.Locale;
-
 import org.apache.poi.util.Beta;
 
 /**
@@ -29,75 +26,9 @@
  */
 @Beta
 public final class NumberFormatter {
-    // use char[] instead of String to speed up StringBuilder.append(), especially in JDK 11+
-    // where StringBuilder internally switched from char[] to byte[]
-    private static final char[][] ROMAN_LETTERS = Arrays.stream(
-            new String[] { "m", "cm", "d", "cd", "c", "xc", "l", "xl", "x", "ix", "v", "iv", "i" }).
-                map(String::toCharArray).
-                toArray(char[][]::new);
-
-    private static final int[] ROMAN_VALUES = { 1000, 900, 500, 400, 100, 90,
-            50, 40, 10, 9, 5, 4, 1 };
-
-    private static final int T_ARABIC = 0;
-    private static final int T_LOWER_LETTER = 4;
-    private static final int T_LOWER_ROMAN = 2;
-    private static final int T_ORDINAL = 5;
-    private static final int T_UPPER_LETTER = 3;
-    private static final int T_UPPER_ROMAN = 1;
 
+    // code was moved to org.apache.poi.util.NumberFormatter
     public static String getNumber( int num, int style ) {
-        switch ( style ) {
-            case T_UPPER_ROMAN:
-                return toRoman( num ).toUpperCase(Locale.ROOT);
-            case T_LOWER_ROMAN:
-                return toRoman( num );
-            case T_UPPER_LETTER:
-                return toLetters( num ).toUpperCase(Locale.ROOT);
-            case T_LOWER_LETTER:
-                return toLetters( num );
-            case T_ARABIC:
-            case T_ORDINAL:
-            default:
-                return String.valueOf( num );
-        }
-    }
-
-    private static String toLetters(int number) {
-        if ( number <= 0 ) {
-            throw new IllegalArgumentException( "Unsupported number: " + number );
-        }
-
-        int num = number;
-        final int radix = 26;
-
-        char[] buf = new char[33];
-        int charPos = buf.length;
-
-        while (num > 0) {
-            num--; // 1 => a, not 0 => a
-            int remainder = num % radix;
-            buf[--charPos] = (char)('a'+remainder);
-            num = (num - remainder) / radix;
-        }
-
-        return new String(buf, charPos, (buf.length - charPos));
-    }
-
-    private static String toRoman( int number ) {
-        if ( number <= 0 )
-            throw new IllegalArgumentException( "Unsupported number: " + number );
-
-        StringBuilder result = new StringBuilder();
-
-        for ( int i = 0; i < ROMAN_LETTERS.length; i++ ) {
-            char[] letter = ROMAN_LETTERS[i];
-            int value = ROMAN_VALUES[i];
-            while ( number >= value ) {
-                number -= value;
-                result.append( letter );
-            }
-        }
-        return result.toString();
+        return org.apache.poi.util.NumberFormatter.getNumber(num, style);
     }
 }
diff --git a/poi/src/main/java/org/apache/poi/util/NumberFormatter.java b/poi/src/main/java/org/apache/poi/util/NumberFormatter.java
new file mode 100644
index 00000000000..f88103d5412
--- /dev/null
+++ b/poi/src/main/java/org/apache/poi/util/NumberFormatter.java
@@ -0,0 +1,101 @@
+/*
+ *  ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one or more
+ *    contributor license agreements.  See the NOTICE file distributed with
+ *    this work for additional information regarding copyright ownership.
+ *    The ASF licenses this file to You under the Apache License, Version 2.0
+ *    (the "License"); you may not use this file except in compliance with
+ *    the License.  You may obtain a copy of the License at
+ *
+ *        http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ * ====================================================================
+ */
+
+package org.apache.poi.util;
+
+import java.util.Arrays;
+import java.util.Locale;
+
+/**
+ * Utility class to translate numbers in letters, usually for lists.
+ */
+@Beta
+public class NumberFormatter {
+    // use char[] instead of String to speed up StringBuilder.append(), especially in JDK 11+
+    // where StringBuilder internally switched from char[] to byte[]
+    private static final char[][] ROMAN_LETTERS = Arrays.stream(
+            new String[] { "m", "cm", "d", "cd", "c", "xc", "l", "xl", "x", "ix", "v", "iv", "i" }).
+                map(String::toCharArray).
+                toArray(char[][]::new);
+
+    private static final int[] ROMAN_VALUES = { 1000, 900, 500, 400, 100, 90,
+            50, 40, 10, 9, 5, 4, 1 };
+
+    private static final int T_ARABIC = 0;
+    private static final int T_LOWER_LETTER = 4;
+    private static final int T_LOWER_ROMAN = 2;
+    private static final int T_ORDINAL = 5;
+    private static final int T_UPPER_LETTER = 3;
+    private static final int T_UPPER_ROMAN = 1;
+
+    public static String getNumber( int num, int style ) {
+        switch ( style ) {
+            case T_UPPER_ROMAN:
+                return toRoman( num ).toUpperCase(Locale.ROOT);
+            case T_LOWER_ROMAN:
+                return toRoman( num );
+            case T_UPPER_LETTER:
+                return toLetters( num ).toUpperCase(Locale.ROOT);
+            case T_LOWER_LETTER:
+                return toLetters( num );
+            case T_ARABIC:
+            case T_ORDINAL:
+            default:
+                return String.valueOf( num );
+        }
+    }
+
+    private static String toLetters(int number) {
+        if ( number <= 0 ) {
+            throw new IllegalArgumentException( "Unsupported number: " + number );
+        }
+
+        int num = number;
+        final int radix = 26;
+
+        char[] buf = new char[33];
+        int charPos = buf.length;
+
+        while (num > 0) {
+            num--; // 1 => a, not 0 => a
+            int remainder = num % radix;
+            buf[--charPos] = (char)('a'+remainder);
+            num = (num - remainder) / radix;
+        }
+
+        return new String(buf, charPos, (buf.length - charPos));
+    }
+
+    private static String toRoman( int number ) {
+        if ( number <= 0 )
+            throw new IllegalArgumentException( "Unsupported number: " + number );
+
+        StringBuilder result = new StringBuilder();
+
+        for ( int i = 0; i < ROMAN_LETTERS.length; i++ ) {
+            char[] letter = ROMAN_LETTERS[i];
+            int value = ROMAN_VALUES[i];
+            while ( number >= value ) {
+                number -= value;
+                result.append( letter );
+            }
+        }
+        return result.toString();
+    }
+}