import os import json # 将数据暂存到json文件(小文件 50篇) def save_data(dataset, filetype, filename): if dataset: directory = "./SpringerOpen_buffer/" + filetype + "/" os.makedirs(directory, exist_ok=True) filepath = os.path.join(directory, filename) with open(filepath, "w", encoding='utf-8') as json_file: json.dump(dataset, json_file, indent=4) print(filetype + " data have been added to", filepath) # 文件最终筛选汇总 def Transf(): def Read(folder_path, output_files): # 新建文件夹 os.makedirs('./SpringerOpen_buffer/Article_output/', exist_ok=True) os.makedirs('./SpringerOpen_buffer/Author_output/', exist_ok=True) data_oldest = [] data_2010_2014 = [] data_2015_2020 = [] data_newest = [] for filename in os.listdir(folder_path): if filename.endswith('.json'): file_path = os.path.join(folder_path, filename) with open(file_path, 'r', encoding='utf-8') as file: data = json.load(file) for Dict in data: if Dict.get('volume') is not None or Dict.get('affiliation', [{}])[0].get('year', 0) is not None: # 筛选文章 if (isinstance(Dict, dict) and int( Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2009): data_oldest.append(Dict) if (isinstance(Dict, dict) and 2010 <= int( Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2014): data_2010_2014.append(Dict) if (isinstance(Dict, dict) and 2015 <= int( Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) <= 2020): data_2015_2020.append(Dict) if (isinstance(Dict, dict) and int( Dict.get('volume') or Dict.get('affiliation', [{}])[0].get('year', 0)) >= 2021): data_newest.append(Dict) # 转存 Data = [data_oldest, data_2010_2014, data_2015_2020, data_newest] for index in range(0, 4): with open(output_files[index], 'w', encoding='utf-8') as file: json.dump(Data[index], file, indent=4) # 读取路径 author_folder_path = './SpringerOpen_buffer/Author_TS' article_folder_path = './SpringerOpen_buffer/Article_TS' # 存储路径 author_output_file = [ './SpringerOpen_buffer/Author_output/SpringerOpen_Author_output_file(oldest).json', './SpringerOpen_buffer/Author_output/SpringerOpen_Author_output_file(2010-2014).json', './SpringerOpen_buffer/Author_output/SpringerOpen_Author_output_file(2015-2020).json', './SpringerOpen_buffer/Author_output/SpringerOpen_Author_output_file(newest).json' ] article_output_file = [ './SpringerOpen_buffer/Article_output/SpringerOpen_Article_output_file(oldest).json', './SpringerOpen_buffer/Article_output/SpringerOpen_Article_output_file(2010-2014).json', './SpringerOpen_buffer/Article_output/SpringerOpen_Article_output_file(2015-2020).json', './SpringerOpen_buffer/Article_output/SpringerOpen_Article_output_file(newest).json' ] # 读取并写入文件 Read(author_folder_path, author_output_file) Read(article_folder_path, article_output_file) # End print("\nData has been written into files.") # 删除暂存区文件 def delete(): folder_paths = ['./SpringerOpen_buffer/Author_TS', './SpringerOpen_buffer/Article_TS'] for folder_path in folder_paths: file_names = os.listdir(folder_path) for file_name in file_names: file_path = os.path.join(folder_path, file_name) if os.path.isfile(file_path): os.remove(file_path) os.rmdir(folder_path) print('\nAttention: The temporary storage files have been deleted!')