Fix the bug of adding duplicate data
This commit is contained in:
parent
7726650eaa
commit
8ea31d08f4
@ -1,8 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from pprint import pprint
|
|
||||||
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
'''
|
'''
|
||||||
========== SameWeb_merge(folder_path) 运行顺序 ==========
|
========== SameWeb_merge(folder_path) 运行顺序 ==========
|
||||||
@ -53,13 +52,16 @@ def SameWeb_merge(folder_path):
|
|||||||
|
|
||||||
# Uniform characters in English
|
# Uniform characters in English
|
||||||
fa = unicodedata.normalize('NFKD', a['affiliation'][-1]["affiliation"]).encode('ascii', 'ignore')
|
fa = unicodedata.normalize('NFKD', a['affiliation'][-1]["affiliation"]).encode('ascii', 'ignore')
|
||||||
faa = unicodedata.normalize('NFKD', aa['affiliation'][-1]["affiliation"]).encode('ascii', 'ignore')
|
faa = unicodedata.normalize('NFKD', aa['affiliation'][0]["affiliation"]).encode('ascii', 'ignore')
|
||||||
|
|
||||||
if fa != faa:
|
if fa != faa:
|
||||||
a['affiliation'].extend(aa['affiliation'])
|
a['affiliation'].extend(aa['affiliation'])
|
||||||
elif a['affiliation'][-1]['year'] != aa['affiliation'][0]['year']:
|
elif fa == faa and a['affiliation'][-1]['year'] != aa['affiliation'][0]['year']:
|
||||||
a['affiliation'].extend(aa['affiliation'])
|
a['affiliation'].extend(aa['affiliation'])
|
||||||
|
|
||||||
|
if len(a['affiliation']) > 1 and a['affiliation'][0] == a['affiliation'][1]:
|
||||||
|
a['affiliation'].remove(a['affiliation'][0])
|
||||||
|
|
||||||
# Delete extra elements
|
# Delete extra elements
|
||||||
Data.remove(aa)
|
Data.remove(aa)
|
||||||
|
|
||||||
@ -82,7 +84,7 @@ def SameWeb_merge(folder_path):
|
|||||||
if len(data) > 0:
|
if len(data) > 0:
|
||||||
Data.extend(data)
|
Data.extend(data)
|
||||||
|
|
||||||
Database = len(Data)
|
Database = len(Data) # The length of the original data
|
||||||
Data = sorted(Data, key=lambda x: x['affiliation'][0]['year'])
|
Data = sorted(Data, key=lambda x: x['affiliation'][0]['year'])
|
||||||
|
|
||||||
# Same website data merge
|
# Same website data merge
|
||||||
@ -96,7 +98,7 @@ def SameWeb_merge(folder_path):
|
|||||||
print(str(len(Data)) + ' copies of data in total, now.')
|
print(str(len(Data)) + ' copies of data in total, now.')
|
||||||
|
|
||||||
# Save into file
|
# Save into file
|
||||||
path = os.path.dirname(folder_path)
|
path = os.path.dirname(folder_path) # parent path
|
||||||
path = os.path.join(path, "Author_data(merged)")
|
path = os.path.join(path, "Author_data(merged)")
|
||||||
os.makedirs(path, exist_ok=True)
|
os.makedirs(path, exist_ok=True)
|
||||||
path = os.path.join(path, "Author_data(merged).json")
|
path = os.path.join(path, "Author_data(merged).json")
|
||||||
@ -106,10 +108,10 @@ def SameWeb_merge(folder_path):
|
|||||||
|
|
||||||
print('\nData has been added to ' + path + '\Author_data(merged).json')
|
print('\nData has been added to ' + path + '\Author_data(merged).json')
|
||||||
|
|
||||||
|
|
||||||
# =========== input the file path here ==========
|
# =========== input the file path here ==========
|
||||||
# SameWeb_merge('.\EJQTDE\EJQTDE_buffer\Author_output')
|
# SameWeb_merge('.\EJQTDE\EJQTDE_buffer\Author_output')
|
||||||
# SameWeb_merge('.\SpringerOpen\SpringerOpen_buffer\Author_output')
|
# SameWeb_merge('.\SpringerOpen\SpringerOpen_buffer\Author_output')
|
||||||
|
# SameWeb_merge('.\ejde\ejde_buffer\Author_output')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user