Added spitted dataset

This commit is contained in:
ldy 2025-07-23 15:06:30 +08:00
parent 229f6bb027
commit 8a7a011cb1
7 changed files with 57632 additions and 0 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,24 @@
{
"input_dir": "data/datasets/三国演义",
"output_dir": "data/datasets/三国演义/splits",
"split_ratios": {
"train": 0.8,
"validation": 0.1,
"test": 0.1
},
"seed": 42,
"datasets": {
"bge_m3": {
"total_samples": 31370,
"train_samples": 25096,
"val_samples": 3137,
"test_samples": 3137
},
"reranker": {
"total_samples": 26238,
"train_samples": 20990,
"val_samples": 2623,
"test_samples": 2625
}
}
}