2023-07-27 10:26:02 +08:00

93 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import json
# 将数据暂存到json文件小文件 50篇
def save_data(dataset, filetype, filename):
if dataset:
directory = "./SpringerOpen_buffer/" + filetype + "/"
os.makedirs(directory, exist_ok=True)
filepath = os.path.join(directory, filename)
with open(filepath, "w", encoding='utf-8') as json_file:
json.dump(dataset, json_file, indent=4)
print(filetype + " data have been added to", filepath)
# 文件最终筛选汇总
def Transf():
def Read(folder_path, output_files):
# 新建文件夹
os.makedirs('./SpringerOpen_buffer/Article_output/', exist_ok=True)
os.makedirs('./SpringerOpen_buffer/Author_output/', exist_ok=True)
data_oldest = []
data_2010_2014 = []
data_2015_2020 = []
data_newest = []
for filename in os.listdir(folder_path):
if filename.endswith('.json'):
file_path = os.path.join(folder_path, filename)
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
for Dict in data:
if Dict.get('volume') and Dict.get('affiliation', [{}])[0].get('year', 0) is not None:
# 筛选文章
data_oldest += [Dict for Dict in data if (isinstance(Dict, dict) and int(
Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2009)]
data_2010_2014 += [Dict for Dict in data if (isinstance(Dict, dict) and 2010 <= int(
Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2014)]
data_2015_2020 += [Dict for Dict in data if (isinstance(Dict, dict) and 2015 <= int(
Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2020)]
data_newest += [Dict for Dict in data if (isinstance(Dict, dict) and int(
Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) >= 2021)]
Data = [data_oldest, data_2010_2014, data_2015_2020, data_newest]
# 转存
for index in range(0, 4):
with open(output_files[index], 'w', encoding='utf-8') as file:
json.dump(Data[index], file, indent=4)
# 读取路径
author_folder_path = './SpringerOpen_buffer/Author'
article_folder_path = './SpringerOpen_buffer/Article'
# 存储路径
author_output_file = [
'./SpringerOpen_buffer/Author_output/Author_output_file(oldest).json',
'./SpringerOpen_buffer/Author_output/Author_output_file(2010-2014).json',
'./SpringerOpen_buffer/Author_output/Author_output_file(2015-2020).json',
'./SpringerOpen_buffer/Author_output/Author_output_file(newest).json'
]
article_output_file = [
'./SpringerOpen_buffer/Article_output/Article_output_file(oldest).json',
'./SpringerOpen_buffer/Article_output/Article_output_file(2010-2014).json',
'./SpringerOpen_buffer/Article_output/Article_output_file(2015-2020).json',
'./SpringerOpen_buffer/Article_output/Article_output_file(newest).json'
]
# 读取并写入文件
Read(author_folder_path, author_output_file)
Read(article_folder_path, article_output_file)
# End
print("\nData has been written into files.")
# 删除暂存区文件
def delete(folder_path):
file_names = os.listdir(folder_path)
for file_name in file_names:
file_path = os.path.join(folder_path, file_name)
if os.path.isfile(file_path):
os.remove(file_path)
print('\nAttention: The temporary storage files have been deleted!')