import os import json from pprint import pprint ''' ========== fileReader ========= 1. 本程序用于读取作者信息后获取来源文章的标题,并且将其存储到新的字典当中 2. 通过检索作者信息获取 author_id 和 from_article,并且返回作者信息中检索来源文章的 title, 将作者信息和标题一并存储到新字典 3. 将新字典存储到 json 文件中 ''' # Function to find the author data which does not have "email" information def Read(author_path, article_path): # Read data list def au_read(path, list): with open(path, 'r', encoding='utf-8') as file: data = json.load(file) for Dict in range(len(data)-1, -1, -1): list.append(data[Dict]) return list def ar_read(path, file_name, ar_id, au_data, num): file_path = os.path.join(path, file_name) with open(file_path, 'r', encoding='utf-8') as file: data = json.load(file) for Dict in data: if Dict.get('article_id') == ar_id[0]: # A new dictionary to stored information au_data['from_article_title'] = Dict.get('title') num[0] += 1 # ========== Main code ========== au_list = [] # List for author data num = [0] # Data number counter # Read the file ar_names = os.listdir(article_path) # Stored the author data which has no "email" information au_list = au_read(author_path, au_list) # Search the articles where the authors from for au_MergeList in au_list: for au_data in au_MergeList: if num[0] % 100 == 0 and num[0] != 0: print(str(num[0]) + " copies of data have been done.") if int(au_data.get('affiliation', [{}])[0].get('year', 0)) <= 2009: ar_read(article_path, ar_names[3], au_data.get('from_article'), au_data, num) elif int(au_data.get('affiliation', [{}])[0].get('year', 0)) <= 2010: ar_read(article_path, ar_names[0], au_data.get('from_article'), au_data, num) elif int(au_data.get('affiliation', [{}])[0].get('year', 0)) <= 2020: ar_read(article_path, ar_names[1], au_data.get('from_article'), au_data, num) else: ar_read(article_path, ar_names[2], au_data.get('from_article'), au_data, num) with open('./Author_data(merged)/Author_data(info_supplementary).json', 'w', encoding='utf-8') as file: json.dump(au_list, file, indent=4) print('All data have been stored into ./Author_data(merged)/Author_data(info_supplementary).json') # ========== Test code ========== Read('./Author_data(merged)/Author_data(merged).json', './test_buffer/Article_output')