Added spitted dataset
This commit is contained in:
parent
229f6bb027
commit
8a7a011cb1
3137
data/datasets/三国演义/splits/m3_test.jsonl
Normal file
3137
data/datasets/三国演义/splits/m3_test.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
25096
data/datasets/三国演义/splits/m3_train.jsonl
Normal file
25096
data/datasets/三国演义/splits/m3_train.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
3137
data/datasets/三国演义/splits/m3_val.jsonl
Normal file
3137
data/datasets/三国演义/splits/m3_val.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
2625
data/datasets/三国演义/splits/reranker_test.jsonl
Normal file
2625
data/datasets/三国演义/splits/reranker_test.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
20990
data/datasets/三国演义/splits/reranker_train.jsonl
Normal file
20990
data/datasets/三国演义/splits/reranker_train.jsonl
Normal file
File diff suppressed because one or more lines are too long
2623
data/datasets/三国演义/splits/reranker_val.jsonl
Normal file
2623
data/datasets/三国演义/splits/reranker_val.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
24
data/datasets/三国演义/splits/split_summary.json
Normal file
24
data/datasets/三国演义/splits/split_summary.json
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"input_dir": "data/datasets/三国演义",
|
||||||
|
"output_dir": "data/datasets/三国演义/splits",
|
||||||
|
"split_ratios": {
|
||||||
|
"train": 0.8,
|
||||||
|
"validation": 0.1,
|
||||||
|
"test": 0.1
|
||||||
|
},
|
||||||
|
"seed": 42,
|
||||||
|
"datasets": {
|
||||||
|
"bge_m3": {
|
||||||
|
"total_samples": 31370,
|
||||||
|
"train_samples": 25096,
|
||||||
|
"val_samples": 3137,
|
||||||
|
"test_samples": 3137
|
||||||
|
},
|
||||||
|
"reranker": {
|
||||||
|
"total_samples": 26238,
|
||||||
|
"train_samples": 20990,
|
||||||
|
"val_samples": 2623,
|
||||||
|
"test_samples": 2625
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user