2023-10-09 23:24:58 +08:00

72 lines
2.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import json
from pprint import pprint
'''
========== fileReader =========
1. 本程序用于读取作者信息后获取来源文章的标题,并且将其存储到新的字典当中
2. 通过检索作者信息获取 author_id 和 from_article并且返回作者信息中检索来源文章的 title
将作者信息和标题一并存储到新字典
3. 将新字典存储到 json 文件中
'''
# Function to find the author data which does not have "email" information
def Read(author_path, article_path):
# Read data list
def au_read(path, list):
with open(path, 'r', encoding='utf-8') as file:
data = json.load(file)
for Dict in range(len(data)-1, -1, -1):
list.append(data[Dict])
return list
def ar_read(path, file_name, ar_id, au_data, num):
file_path = os.path.join(path, file_name)
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
for Dict in data:
if Dict.get('article_id') == ar_id[0]:
# A new dictionary to stored information
au_data['from_article_title'] = Dict.get('title')
num[0] += 1
# ========== Main code ==========
au_list = [] # List for author data
num = [0] # Data number counter
# Read the file
ar_names = os.listdir(article_path)
# Stored the author data which has no "email" information
au_list = au_read(author_path, au_list)
# Search the articles where the authors from
for au_MergeList in au_list:
for au_data in au_MergeList:
if num[0] % 100 == 0 and num[0] != 0:
print(str(num[0]) + " copies of data have been done.")
if int(au_data.get('affiliation', [{}])[0].get('year', 0)) <= 2009:
ar_read(article_path, ar_names[3], au_data.get('from_article'), au_data, num)
elif int(au_data.get('affiliation', [{}])[0].get('year', 0)) <= 2010:
ar_read(article_path, ar_names[0], au_data.get('from_article'), au_data, num)
elif int(au_data.get('affiliation', [{}])[0].get('year', 0)) <= 2020:
ar_read(article_path, ar_names[1], au_data.get('from_article'), au_data, num)
else:
ar_read(article_path, ar_names[2], au_data.get('from_article'), au_data, num)
with open('./Author_data(merged)/Author_data(info_supplementary).json', 'w', encoding='utf-8') as file:
json.dump(au_list, file, indent=4)
print('All data have been stored into ./Author_data(merged)/Author_data(info_supplementary).json')
# ========== Test code ==========
Read('./Author_data(merged)/Author_data(merged).json', './test_buffer/Article_output')