Fix the bug of adding duplicate data
This commit is contained in:
parent
7726650eaa
commit
8ea31d08f4
@ -1,8 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
from pprint import pprint
|
||||
|
||||
import unicodedata
|
||||
from pprint import pprint
|
||||
|
||||
'''
|
||||
========== SameWeb_merge(folder_path) 运行顺序 ==========
|
||||
@ -53,13 +52,16 @@ def SameWeb_merge(folder_path):
|
||||
|
||||
# Uniform characters in English
|
||||
fa = unicodedata.normalize('NFKD', a['affiliation'][-1]["affiliation"]).encode('ascii', 'ignore')
|
||||
faa = unicodedata.normalize('NFKD', aa['affiliation'][-1]["affiliation"]).encode('ascii', 'ignore')
|
||||
faa = unicodedata.normalize('NFKD', aa['affiliation'][0]["affiliation"]).encode('ascii', 'ignore')
|
||||
|
||||
if fa != faa:
|
||||
a['affiliation'].extend(aa['affiliation'])
|
||||
elif a['affiliation'][-1]['year'] != aa['affiliation'][0]['year']:
|
||||
elif fa == faa and a['affiliation'][-1]['year'] != aa['affiliation'][0]['year']:
|
||||
a['affiliation'].extend(aa['affiliation'])
|
||||
|
||||
if len(a['affiliation']) > 1 and a['affiliation'][0] == a['affiliation'][1]:
|
||||
a['affiliation'].remove(a['affiliation'][0])
|
||||
|
||||
# Delete extra elements
|
||||
Data.remove(aa)
|
||||
|
||||
@ -82,7 +84,7 @@ def SameWeb_merge(folder_path):
|
||||
if len(data) > 0:
|
||||
Data.extend(data)
|
||||
|
||||
Database = len(Data)
|
||||
Database = len(Data) # The length of the original data
|
||||
Data = sorted(Data, key=lambda x: x['affiliation'][0]['year'])
|
||||
|
||||
# Same website data merge
|
||||
@ -96,7 +98,7 @@ def SameWeb_merge(folder_path):
|
||||
print(str(len(Data)) + ' copies of data in total, now.')
|
||||
|
||||
# Save into file
|
||||
path = os.path.dirname(folder_path)
|
||||
path = os.path.dirname(folder_path) # parent path
|
||||
path = os.path.join(path, "Author_data(merged)")
|
||||
os.makedirs(path, exist_ok=True)
|
||||
path = os.path.join(path, "Author_data(merged).json")
|
||||
@ -106,10 +108,10 @@ def SameWeb_merge(folder_path):
|
||||
|
||||
print('\nData has been added to ' + path + '\Author_data(merged).json')
|
||||
|
||||
|
||||
# =========== input the file path here ==========
|
||||
# SameWeb_merge('.\EJQTDE\EJQTDE_buffer\Author_output')
|
||||
# SameWeb_merge('.\SpringerOpen\SpringerOpen_buffer\Author_output')
|
||||
# SameWeb_merge('.\ejde\ejde_buffer\Author_output')
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user