2023-09-12 09:05:55 +08:00

95 lines
3.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import json
# 将数据暂存到json文件小文件 50篇
def save_data(dataset, filetype, filename):
if dataset:
directory = "./SpringerOpen_buffer/" + filetype + "/"
os.makedirs(directory, exist_ok=True)
filepath = os.path.join(directory, filename)
with open(filepath, "w", encoding='utf-8') as json_file:
json.dump(dataset, json_file, indent=4)
print(filetype + " data have been added to", filepath)
# 文件最终筛选汇总
def Transf():
def Read(folder_path, output_files):
# 新建文件夹
os.makedirs('./SpringerOpen_buffer/Article_output/', exist_ok=True)
os.makedirs('./SpringerOpen_buffer/Author_output/', exist_ok=True)
data_oldest = []
data_2010_2014 = []
data_2015_2020 = []
data_newest = []
for filename in os.listdir(folder_path):
if filename.endswith('.json'):
file_path = os.path.join(folder_path, filename)
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
for Dict in data:
if Dict.get('volume') is not None or Dict.get('affiliation', [{}])[0].get('year', 0) is not None:
# Select data
if int(Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2009:
data_oldest.append(Dict)
elif int(Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2014:
data_2010_2014.append(Dict)
elif int(Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2020:
data_2015_2020.append(Dict)
else:
data_newest.append(Dict)
# 转存
Data = [data_oldest, data_2010_2014, data_2015_2020, data_newest]
for index in range(0, 4):
with open(output_files[index], 'w', encoding='utf-8') as file:
json.dump(Data[index], file, indent=4)
# 读取路径
author_folder_path = './SpringerOpen_buffer/Author_TS'
article_folder_path = './SpringerOpen_buffer/Article_TS'
# 存储路径
author_output_file = [
'./SpringerOpen_buffer/Author_output/SpringerOpen_Author_output_file(oldest).json',
'./SpringerOpen_buffer/Author_output/SpringerOpen_Author_output_file(2010-2014).json',
'./SpringerOpen_buffer/Author_output/SpringerOpen_Author_output_file(2015-2020).json',
'./SpringerOpen_buffer/Author_output/SpringerOpen_Author_output_file(newest).json'
]
article_output_file = [
'./SpringerOpen_buffer/Article_output/SpringerOpen_Article_output_file(oldest).json',
'./SpringerOpen_buffer/Article_output/SpringerOpen_Article_output_file(2010-2014).json',
'./SpringerOpen_buffer/Article_output/SpringerOpen_Article_output_file(2015-2020).json',
'./SpringerOpen_buffer/Article_output/SpringerOpen_Article_output_file(newest).json'
]
# 读取并写入文件
Read(author_folder_path, author_output_file)
Read(article_folder_path, article_output_file)
# End
print("\nData has been written into files.")
# 删除暂存区文件
def delete():
folder_paths = ['./SpringerOpen_buffer/Author_TS', './SpringerOpen_buffer/Article_TS']
for folder_path in folder_paths:
file_names = os.listdir(folder_path)
for file_name in file_names:
file_path = os.path.join(folder_path, file_name)
if os.path.isfile(file_path):
os.remove(file_path)
os.rmdir(folder_path)
print('\nAttention: The temporary storage files have been deleted!')