[docs]classSlicedTokenSource(TokenSource):""" A token source that provides a slice of another token source. """def__init__(self,source:TokenSource,source_slice:slice,*,work_dir:PathOrStr,label:Optional[str]=None,):ifsource_slice.stepisnotNoneandsource_slice.step!=1:raiseOLMoConfigurationError(f"'{self.__class__.__name__}' does not support slices with a step other than 1.")ifsource_slice.startisnotNoneandsource_slice.start<-source.num_tokens:raiseOLMoConfigurationError(f"Slice start {source_slice.start} is out of bounds for source with "f"{source.num_tokens} tokens.")super().__init__(work_dir=work_dir,label=label)self._source=sourceself._slice=source_sliceifself.num_tokens==0:raiseOLMoConfigurationError(f"{self.__class__.__name__} created with an empty slice ({source_slice}) from source "f"with {source.num_tokens:,d} tokens.")@propertydefsource(self)->TokenSource:returnself._source@propertydefsource_slice(self)->slice:returnself._slice@propertydefslice_start(self)->int:ifself.source_slice.startisNone:return0elifself.source_slice.start<0:assertself.source_slice.start>=-self.source.num_tokensreturnself.source.num_tokens+self.source_slice.startelse:returnself.source_slice.start@propertydefslice_stop(self)->int:ifself.source_slice.stopisNone:returnself.source.num_tokenselifself.source_slice.stop<0:returnmax(0,self.source.num_tokens+self.source_slice.stop)else:returnself.source_slice.stop@propertydefslice_step(self)->int:ifself.source_slice.stepisnotNone:returnself.source_slice.stepelse:return1@ft.cached_propertydeffingerprint(self)->str:sha256_hash=hashlib.sha256()sha256_hash.update((f"class={self.__class__.__name__},"f"slice_start={self.slice_start},"f"slice_stop={self.slice_stop},"f"slice_step={self.slice_step},"f"source={self.source.fingerprint},").encode())returnsha256_hash.hexdigest()@propertydefnum_tokens(self)->int:ifself.slice_step!=1:raiseNotImplementedError(f"'{self.__class__.__name__}' does not support slices with a step other than 1.")ifself.slice_start>=self.source.num_tokens:return0returnmax(0,min(self.slice_stop,self.source.num_tokens)-self.slice_start)
[docs]defget_token_range(self,start_idx:int,end_idx:int)->TokenRange:ifself.slice_step!=1:raiseNotImplementedError(f"'{self.__class__.__name__}' does not support slices with a step other than 1.")start_idx,end_idx=self.validate_indices(start_idx,end_idx)returnself.source.get_token_range(start_idx+self.slice_start,end_idx+self.slice_start)