[docs]classSourceABC(metaclass=ABCMeta):""" Abstract base class for source types. :param work_dir: A common local working directory that can be used for caching files during preprocessing. :param label: An optional label for this source, useful for debugging and visualizing. """DISPLAY_ICON:ClassVar[str]=""# Nerd Font icon for visualizationsdef__init__(self,*,work_dir:PathOrStr,label:Optional[str]=None):ifio.is_url(work_dir):raiseOLMoConfigurationError(f"'work_dir' should be a local path, not a URL ('{work_dir}').")work_dir=Path(io.normalize_path(work_dir))ifwork_dir.name==self.__class__.__name__:work_dir=work_dir.parentself._common_work_dir=work_dirself._fs_local_rank=dist_utils.get_fs_local_rank()self._rank=dist_utils.get_rank()self._label=labeldef__repr__(self)->str:returnf"{self.__class__.__name__}({self.fingerprint[:7]})"@propertydefcommon_work_dir(self)->Path:""" The common working directory, usually the parent of :data:`work_dir`. """returnself._common_work_dir@propertydefwork_dir(self)->Path:""" The class-specific local working directory that can be used by the source for caching files during preprocessing. """returnself.common_work_dir/self.__class__.__name__@propertydeffs_local_rank(self)->int:""" The local rank of the current process with respect to filesystem access of the working directory. """returnself._fs_local_rank@propertydefrank(self)->int:"""The global rank of the current process across the entire distributed job."""returnself._rank@propertydeflabel(self)->Optional[str]:"""The label assigned to this source."""returnself._label@property@abstractmethoddeffingerprint(self)->str:"""A unique, deterministic string representing the ordered contents of the source."""raiseNotImplementedError@property@abstractmethoddefnum_tokens(self)->int:"""The number of tokens available from this source."""raiseNotImplementedError
[docs]@abstractmethoddefchildren(self)->Iterable["SourceABC"]:"""Get the child sources that make up this source, if any."""raiseNotImplementedError
@propertydefis_leaf(self)->bool:"""Check if this source is a leaf node (i.e. has no children)."""for_inself.children():returnFalsereturnTrue