99 lines
4.1 KiB
Python
99 lines
4.1 KiB
Python
import os
|
||
import json
|
||
|
||
|
||
# 将数据暂存到json文件(小文件 50篇)
|
||
def save_data(dataset, filetype, filename):
|
||
if dataset:
|
||
directory = "./SpringerOpen_buffer/" + filetype + "/"
|
||
os.makedirs(directory, exist_ok=True)
|
||
filepath = os.path.join(directory, filename)
|
||
with open(filepath, "w", encoding='utf-8') as json_file:
|
||
json.dump(dataset, json_file, indent=4)
|
||
print(filetype + " data have been added to", filepath)
|
||
|
||
|
||
# 文件最终筛选汇总
|
||
def Transf():
|
||
def Read(folder_path, output_files):
|
||
# 新建文件夹
|
||
os.makedirs('./SpringerOpen_buffer/Article_output/', exist_ok=True)
|
||
os.makedirs('./SpringerOpen_buffer/Author_output/', exist_ok=True)
|
||
|
||
data_oldest = []
|
||
data_2010_2014 = []
|
||
data_2015_2020 = []
|
||
data_newest = []
|
||
|
||
for filename in os.listdir(folder_path):
|
||
if filename.endswith('.json'):
|
||
file_path = os.path.join(folder_path, filename)
|
||
with open(file_path, 'r', encoding='utf-8') as file:
|
||
data = json.load(file)
|
||
|
||
for Dict in data:
|
||
if Dict.get('volume') is not None or Dict.get('affiliation', [{}])[0].get('year', 0) is not None:
|
||
# 筛选文章
|
||
if (isinstance(Dict, dict) and int(
|
||
Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2009):
|
||
data_oldest.append(Dict)
|
||
|
||
if (isinstance(Dict, dict) and 2010 <= int(
|
||
Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2014):
|
||
data_2010_2014.append(Dict)
|
||
|
||
if (isinstance(Dict, dict) and 2015 <= int(
|
||
Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2020):
|
||
data_2015_2020.append(Dict)
|
||
|
||
if (isinstance(Dict, dict) and int(
|
||
Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) >= 2021):
|
||
data_newest.append(Dict)
|
||
|
||
# 转存
|
||
Data = [data_oldest, data_2010_2014, data_2015_2020, data_newest]
|
||
|
||
for index in range(0, 4):
|
||
with open(output_files[index], 'w', encoding='utf-8') as file:
|
||
json.dump(Data[index], file, indent=4)
|
||
|
||
# 读取路径
|
||
author_folder_path = './SpringerOpen_buffer/Author_TS'
|
||
article_folder_path = './SpringerOpen_buffer/Article_TS'
|
||
|
||
# 存储路径
|
||
author_output_file = [
|
||
'./SpringerOpen_buffer/Author_output/SpringerOpen_Author_output_file(oldest).json',
|
||
'./SpringerOpen_buffer/Author_output/SpringerOpen_Author_output_file(2010-2014).json',
|
||
'./SpringerOpen_buffer/Author_output/SpringerOpen_Author_output_file(2015-2020).json',
|
||
'./SpringerOpen_buffer/Author_output/SpringerOpen_Author_output_file(newest).json'
|
||
]
|
||
|
||
article_output_file = [
|
||
'./SpringerOpen_buffer/Article_output/SpringerOpen_Article_output_file(oldest).json',
|
||
'./SpringerOpen_buffer/Article_output/SpringerOpen_Article_output_file(2010-2014).json',
|
||
'./SpringerOpen_buffer/Article_output/SpringerOpen_Article_output_file(2015-2020).json',
|
||
'./SpringerOpen_buffer/Article_output/SpringerOpen_Article_output_file(newest).json'
|
||
]
|
||
|
||
# 读取并写入文件
|
||
Read(author_folder_path, author_output_file)
|
||
Read(article_folder_path, article_output_file)
|
||
|
||
# End
|
||
print("\nData has been written into files.")
|
||
|
||
|
||
# 删除暂存区文件
|
||
def delete():
|
||
folder_paths = ['./SpringerOpen_buffer/Author_TS', './SpringerOpen_buffer/Article_TS']
|
||
for folder_path in folder_paths:
|
||
file_names = os.listdir(folder_path)
|
||
for file_name in file_names:
|
||
file_path = os.path.join(folder_path, file_name)
|
||
if os.path.isfile(file_path):
|
||
os.remove(file_path)
|
||
os.rmdir(folder_path)
|
||
|
||
print('\nAttention: The temporary storage files have been deleted!')
|