Bases: FileTypeHandler
Handler for extracting text from DOCX files.
Methods:
Source code in textxtract/handlers/docx.py
| class DOCXHandler(FileTypeHandler):
"""Handler for extracting text from DOCX files."""
def extract(self, file_path: Path, config: Optional[dict] = None) -> str:
try:
from docx import Document
doc = Document(file_path)
return "\n".join(paragraph.text for paragraph in doc.paragraphs)
except Exception as e:
raise ExtractionError(f"DOCX extraction failed: {e}")
async def extract_async(
self, file_path: Path, config: Optional[dict] = None
) -> str:
import asyncio
return await asyncio.to_thread(self.extract, file_path, config)
|
Functions
extract
extract(file_path, config=None)
Source code in textxtract/handlers/docx.py
| def extract(self, file_path: Path, config: Optional[dict] = None) -> str:
try:
from docx import Document
doc = Document(file_path)
return "\n".join(paragraph.text for paragraph in doc.paragraphs)
except Exception as e:
raise ExtractionError(f"DOCX extraction failed: {e}")
|
extract_async
async
extract_async(file_path, config=None)
Source code in textxtract/handlers/docx.py
| async def extract_async(
self, file_path: Path, config: Optional[dict] = None
) -> str:
import asyncio
return await asyncio.to_thread(self.extract, file_path, config)
|