diff --git a/00_File_merge/Merge.py b/00_File_merge/Merge.py index 4557919..5d8c635 100644 --- a/00_File_merge/Merge.py +++ b/00_File_merge/Merge.py @@ -1,8 +1,7 @@ import json import os -from pprint import pprint - import unicodedata +from pprint import pprint ''' ========== SameWeb_merge(folder_path) 运行顺序 ========== @@ -53,13 +52,16 @@ def SameWeb_merge(folder_path): # Uniform characters in English fa = unicodedata.normalize('NFKD', a['affiliation'][-1]["affiliation"]).encode('ascii', 'ignore') - faa = unicodedata.normalize('NFKD', aa['affiliation'][-1]["affiliation"]).encode('ascii', 'ignore') + faa = unicodedata.normalize('NFKD', aa['affiliation'][0]["affiliation"]).encode('ascii', 'ignore') if fa != faa: a['affiliation'].extend(aa['affiliation']) - elif a['affiliation'][-1]['year'] != aa['affiliation'][0]['year']: + elif fa == faa and a['affiliation'][-1]['year'] != aa['affiliation'][0]['year']: a['affiliation'].extend(aa['affiliation']) + if len(a['affiliation']) > 1 and a['affiliation'][0] == a['affiliation'][1]: + a['affiliation'].remove(a['affiliation'][0]) + # Delete extra elements Data.remove(aa) @@ -82,7 +84,7 @@ def SameWeb_merge(folder_path): if len(data) > 0: Data.extend(data) - Database = len(Data) + Database = len(Data) # The length of the original data Data = sorted(Data, key=lambda x: x['affiliation'][0]['year']) # Same website data merge @@ -96,7 +98,7 @@ def SameWeb_merge(folder_path): print(str(len(Data)) + ' copies of data in total, now.') # Save into file - path = os.path.dirname(folder_path) + path = os.path.dirname(folder_path) # parent path path = os.path.join(path, "Author_data(merged)") os.makedirs(path, exist_ok=True) path = os.path.join(path, "Author_data(merged).json") @@ -106,10 +108,10 @@ def SameWeb_merge(folder_path): print('\nData has been added to ' + path + '\Author_data(merged).json') - # =========== input the file path here ========== # SameWeb_merge('.\EJQTDE\EJQTDE_buffer\Author_output') # SameWeb_merge('.\SpringerOpen\SpringerOpen_buffer\Author_output') +# SameWeb_merge('.\ejde\ejde_buffer\Author_output')