Skip to content

Base Module

Abstract base classes for text extraction.

Classes:

Name Description
FileTypeHandler

Abstract base class for file type-specific handlers.

TextExtractor

Abstract base class for text extractors.

Classes

FileTypeHandler

Bases: ABC

Abstract base class for file type-specific handlers.

Methods:

Name Description
extract

Extract text synchronously from a file.

extract_async

Extract text asynchronously from a file.

Source code in textxtract/core/base.py
class FileTypeHandler(ABC):
    """Abstract base class for file type-specific handlers."""

    @abstractmethod
    def extract(self, file_path: Path, config: Optional[dict] = None) -> str:
        """Extract text synchronously from a file."""
        pass

    @abstractmethod
    async def extract_async(
        self, file_path: Path, config: Optional[dict] = None
    ) -> str:
        """Extract text asynchronously from a file."""
        pass

Functions

extract abstractmethod
extract(file_path, config=None)

Extract text synchronously from a file.

Source code in textxtract/core/base.py
@abstractmethod
def extract(self, file_path: Path, config: Optional[dict] = None) -> str:
    """Extract text synchronously from a file."""
    pass
extract_async abstractmethod async
extract_async(file_path, config=None)

Extract text asynchronously from a file.

Source code in textxtract/core/base.py
@abstractmethod
async def extract_async(
    self, file_path: Path, config: Optional[dict] = None
) -> str:
    """Extract text asynchronously from a file."""
    pass

TextExtractor

Bases: ABC

Abstract base class for text extractors.

Methods:

Name Description
extract

Extract text synchronously from file path or bytes.

Source code in textxtract/core/base.py
class TextExtractor(ABC):
    """Abstract base class for text extractors."""

    @abstractmethod
    def extract(
        self,
        source: Union[Path, str, bytes],
        filename: Optional[str] = None,
        config: Optional[dict] = None,
    ) -> str:
        """Extract text synchronously from file path or bytes."""
        pass

Functions

extract abstractmethod
extract(source, filename=None, config=None)

Extract text synchronously from file path or bytes.

Source code in textxtract/core/base.py
@abstractmethod
def extract(
    self,
    source: Union[Path, str, bytes],
    filename: Optional[str] = None,
    config: Optional[dict] = None,
) -> str:
    """Extract text synchronously from file path or bytes."""
    pass