""" Integrated data processing and augmentation module for BGE fine-tuning Now with unified dataset pipeline supporting both embedding and reranker formats """ def __getattr__(name): """Lazy loading to avoid import errors for missing dependencies""" # Core dataset classes - from integrated dataset.py if name == 'BGEDataset': from .dataset import BGEDataset return BGEDataset elif name == 'BGEM3Dataset': from .dataset import BGEM3Dataset return BGEM3Dataset elif name == 'BGERerankerDataset': from .dataset import BGERerankerDataset return BGERerankerDataset elif name == 'JointDataset': from .dataset import JointDataset return JointDataset # Factory functions for dataset creation elif name == 'create_embedding_dataset': from .dataset import create_embedding_dataset return create_embedding_dataset elif name == 'create_reranker_dataset': from .dataset import create_reranker_dataset return create_reranker_dataset # Data migration utilities elif name == 'migrate_nested_to_flat': from .dataset import migrate_nested_to_flat return migrate_nested_to_flat # Collate functions elif name == 'collate_embedding_batch': from .dataset import collate_embedding_batch return collate_embedding_batch elif name == 'collate_reranker_batch': from .dataset import collate_reranker_batch return collate_reranker_batch # Data preprocessing and augmentation elif name == 'DataPreprocessor': from .preprocessing import DataPreprocessor return DataPreprocessor elif name == 'DataAugmenter': from .preprocessing import DataAugmenter return DataAugmenter elif name == 'QueryAugmenter': from .augmentation import QueryAugmenter return QueryAugmenter elif name == 'PassageAugmenter': from .augmentation import PassageAugmenter return PassageAugmenter elif name == 'HardNegativeAugmenter': from .augmentation import HardNegativeAugmenter return HardNegativeAugmenter elif name == 'CrossLingualAugmenter': from .augmentation import CrossLingualAugmenter return CrossLingualAugmenter # Hard negative mining elif name == 'ANCEHardNegativeMiner': from .hard_negative_mining import ANCEHardNegativeMiner return ANCEHardNegativeMiner elif name == 'HardNegativeDataAugmenter': from .hard_negative_mining import HardNegativeDataAugmenter return HardNegativeDataAugmenter # Backward compatibility - remove after migration elif name == 'M3Dataset': from .dataset import BGEM3Dataset return BGEM3Dataset elif name == 'RerankerDataset': from .dataset import BGERerankerDataset return BGERerankerDataset elif name == 'NestedDataset': # Deprecated - use BGEDataset with legacy_support=True from .dataset import BGEDataset return BGEDataset else: raise AttributeError(f"module '{__name__}' has no attribute '{name}'")