2025-07-23 15:06:30 +08:00

24 lines
492 B
JSON

{
"input_dir": "data/datasets/三国演义",
"output_dir": "data/datasets/三国演义/splits",
"split_ratios": {
"train": 0.8,
"validation": 0.1,
"test": 0.1
},
"seed": 42,
"datasets": {
"bge_m3": {
"total_samples": 31370,
"train_samples": 25096,
"val_samples": 3137,
"test_samples": 3137
},
"reranker": {
"total_samples": 26238,
"train_samples": 20990,
"val_samples": 2623,
"test_samples": 2625
}
}
}