[docs]@dataclassclassMonkeyPatcherCallback(Callback):""" While looking into performance issues with OLMo3 training, we discovered that `DeviceMesh.__getitem__()` can become a bottleneck because it gets called very often by FSDP and creates a new sub-mesh object each time. So this callback patches that method to cache the sub-meshes. """defpre_train(self):# Cache DeviceMesh.__get_item__DeviceMesh.__getitem__=functools.lru_cache(maxsize=None)(DeviceMesh.__getitem__)