import os import json # 将数据暂存到json文件(小文件 50篇) def save_data(dataset, filetype, filename): if dataset: directory = "./SpringerOpen_buffer/" + filetype + "/" os.makedirs(directory, exist_ok=True) filepath = os.path.join(directory, filename) with open(filepath, "w", encoding='utf-8') as json_file: json.dump(dataset, json_file, indent=4) print(filetype + " data have been added to", filepath) # 文件最终筛选汇总 def Transf(): def Read(folder_path, output_files): # 新建文件夹 os.makedirs('./SpringerOpen_buffer/Article_output/', exist_ok=True) os.makedirs('./SpringerOpen_buffer/Author_output/', exist_ok=True) data_oldest = [] data_2010_2014 = [] data_2015_2020 = [] data_newest = [] for filename in os.listdir(folder_path): if filename.endswith('.json'): file_path = os.path.join(folder_path, filename) with open(file_path, 'r', encoding='utf-8') as file: data = json.load(file) for Dict in data: if Dict.get('volume') and Dict.get('affiliation', [{}])[0].get('year', 0) is not None: # 筛选文章 data_oldest += [Dict for Dict in data if (isinstance(Dict, dict) and int( Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2009)] data_2010_2014 += [Dict for Dict in data if (isinstance(Dict, dict) and 2010 <= int( Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2014)] data_2015_2020 += [Dict for Dict in data if (isinstance(Dict, dict) and 2015 <= int( Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2020)] data_newest += [Dict for Dict in data if (isinstance(Dict, dict) and int( Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) >= 2021)] Data = [data_oldest, data_2010_2014, data_2015_2020, data_newest] # 转存 for index in range(0, 4): with open(output_files[index], 'w', encoding='utf-8') as file: json.dump(Data[index], file, indent=4) # 读取路径 author_folder_path = './SpringerOpen_buffer/Author' article_folder_path = './SpringerOpen_buffer/Article' # 存储路径 author_output_file = [ './SpringerOpen_buffer/Author_output/Author_output_file(oldest).json', './SpringerOpen_buffer/Author_output/Author_output_file(2010-2014).json', './SpringerOpen_buffer/Author_output/Author_output_file(2015-2020).json', './SpringerOpen_buffer/Author_output/Author_output_file(newest).json' ] article_output_file = [ './SpringerOpen_buffer/Article_output/Article_output_file(oldest).json', './SpringerOpen_buffer/Article_output/Article_output_file(2010-2014).json', './SpringerOpen_buffer/Article_output/Article_output_file(2015-2020).json', './SpringerOpen_buffer/Article_output/Article_output_file(newest).json' ] # 读取并写入文件 Read(author_folder_path, author_output_file) Read(article_folder_path, article_output_file) # End print("\nData has been written into files.") # 删除暂存区文件 def delete(folder_path): file_names = os.listdir(folder_path) for file_name in file_names: file_path = os.path.join(folder_path, file_name) if os.path.isfile(file_path): os.remove(file_path) print('\nAttention: The temporary storage files have been deleted!')