Source code for remarking.highlight_extractor.highlight_extractor
from abc import ABCMeta, abstractmethod
from dataclasses import dataclass
from typing import List
from remarking import models
[docs]@dataclass
class ExtractorData:
""" Represents an extractor mapping entry.
This is used by remarking to generate extractor choices for the end user.
:param extractor_name: The name of the extractor on the command line.
:param instance: An instance of the extractor.
:param description: A description for the extractor. This is shown when running
``remarking list extractors``
"""
extractor_name: str
instance: 'HighlightExtractor'
description: str
[docs]class HighlightExtractor(metaclass=ABCMeta): # pylint: disable=too-few-public-methods
""" Base class for highlight extractors.
Extractors are run after documents are downloaded. remarking calls
:meth:`HighlightExtractor.get_highlights` for each document downloaded.
For example, :class:`RemarkableHighlightExtractor` is will extract the
highlgihts from the built-in reMarkable highlighting functionality.
"""
[docs] @classmethod
@abstractmethod
def get_extractor_instance_data(cls) -> List[ExtractorData]:
""" Return a list of :class:`ExtractorData` instaces representing
different run options for the extractor.
"""
[docs] @abstractmethod
def get_highlights(self, working_path: str, document: models.Document) -> List[models.Highlight]:
""" Retrieve all highlights for document.
:param working_path: The path on the operating system where all documents were downloaded. Documents
are downloaded from the cloud and unzipped into this repository. For more information on the layout
check out `<https://remarkablewiki.com/tech/filesystem#user_data_directory_structure>`_.
:param document: The document to extract highlights for.
:return: A list of highlights for the document.
"""
def clean_highlight_text(text: str) -> str:
""" Return a cleaned version of the passed text. """
to_replace = [
("“", "\""),
("‘", "'"),
("’", "'"),
("”", "\"")
]
cleaned = text
for replacements in to_replace:
cleaned = cleaned.replace(replacements[0], replacements[1])
return cleaned