Added spitted dataset

This commit is contained in:
ldy
2025-07-23 15:06:30 +08:00
parent 229f6bb027
commit 8a7a011cb1
7 changed files with 57632 additions and 0 deletions

View File

@@ -0,0 +1,24 @@
{
"input_dir": "data/datasets/三国演义",
"output_dir": "data/datasets/三国演义/splits",
"split_ratios": {
"train": 0.8,
"validation": 0.1,
"test": 0.1
},
"seed": 42,
"datasets": {
"bge_m3": {
"total_samples": 31370,
"train_samples": 25096,
"val_samples": 3137,
"test_samples": 3137
},
"reranker": {
"total_samples": 26238,
"train_samples": 20990,
"val_samples": 2623,
"test_samples": 2625
}
}
}