[docs]classSlackNotificationSetting(StrEnum):""" Defines the notifications settings for the Slack notifier callback. """all="all"""" Send all types notifications. """end_only="end_only"""" Only send a notification when the experiment ends (successfully or with a failure). """failure_only="failure_only"""" Only send a notification when the experiment fails. """none="none"""" Don't send any notifications. """
[docs]@dataclassclassSlackNotifierCallback(Callback):name:Optional[str]=None""" A name to give the run. """notifications:SlackNotificationSetting=SlackNotificationSetting.end_only""" The notification settings. """enabled:bool=True""" Set to false to disable this callback. """webhook_url:Optional[str]=None""" The webhook URL to post. If not set, will check the environment variable ``SLACK_WEBHOOK_URL``. """defpost_attach(self):ifnotself.enabledorget_rank()!=0:returnifself.webhook_urlisNoneandSLACK_WEBHOOK_URL_ENV_VARnotinos.environ:raiseOLMoEnvironmentError(f"missing env var '{SLACK_WEBHOOK_URL_ENV_VAR}'")defpre_train(self):ifnotself.enabledorget_rank()!=0:returnifself.notifications==SlackNotificationSetting.all:self._post_message("started")defpost_checkpoint_saved(self,path:PathOrStr):ifnotself.enabledorget_rank()!=0:returnifself.notifications==SlackNotificationSetting.all:self._post_message(f"saved a checkpoint to {path}")defpost_epoch(self):ifnotself.enabledorget_rank()!=0:returnifself.notifications==SlackNotificationSetting.all:self._post_message("completed an epoch")defpost_train(self):ifnotself.enabledorget_rank()!=0:returnifself.notificationsin(SlackNotificationSetting.all,SlackNotificationSetting.end_only,):ifself.trainer.is_canceled:self._post_message("canceled")else:self._post_message("completed successfully")defon_error(self,exc:BaseException):ifnotself.enabledorget_rank()!=0:returnifself.notificationsin(SlackNotificationSetting.all,SlackNotificationSetting.end_only,SlackNotificationSetting.failure_only,):exc_lines=str(exc).rstrip("\n").split("\n")iflen(exc_lines)>EXC_LINE_LIMIT:exc_lines=exc_lines[:EXC_LINE_LIMIT]exc_lines.append("...")exc_str="\n".join(exc_lines)self._post_message(f"failed with error:\n```\n{exc_str}\n```")def_post_message(self,msg:str):webhook_url=self.webhook_urloros.environ.get(SLACK_WEBHOOK_URL_ENV_VAR)ifwebhook_urlisNone:raiseOLMoEnvironmentError(f"missing env var '{SLACK_WEBHOOK_URL_ENV_VAR}'")progress=(f"*Progress:*\n"f"- step: {self.step:,d}\n"f"- epoch: {self.trainer.epoch}\n"f"- tokens: {self.trainer.global_train_tokens_seen:,d}")ifself.nameisnotNone:msg=f"Run `{self.name}` {msg}\n{progress}"else:msg=f"Run {msg}\n{progress}"ifBEAKER_JOB_ID_ENV_VARinos.environ:msg=f"{msg}\n*Beaker job:* https://beaker.org/job/{os.environ[BEAKER_JOB_ID_ENV_VAR]}"try:requests.post(webhook_url,json={"text":msg})exceptExceptionase:log.exception(f"Failed to send Slack notification: {e}")