From 35ea1dd424c118da1ee27aefaa6a3f5e3c4dd855 Mon Sep 17 00:00:00 2001
From: Chenxiao Xia <s230026169@mail.uic.edu.cn>
Date: Wed, 1 Nov 2023 13:12:38 +0800
Subject: [PATCH] Remove error code

---
 DataTransformer/FileStructureTansfer(EJDE).py | 199 -----------------
 .../FileStructureTansfer(EJQTDE).py           | 206 ------------------
 2 files changed, 405 deletions(-)
 delete mode 100644 DataTransformer/FileStructureTansfer(EJDE).py
 delete mode 100644 DataTransformer/FileStructureTansfer(EJQTDE).py

diff --git a/DataTransformer/FileStructureTansfer(EJDE).py b/DataTransformer/FileStructureTansfer(EJDE).py
deleted file mode 100644
index bc9197b..0000000
--- a/DataTransformer/FileStructureTansfer(EJDE).py
+++ /dev/null
@@ -1,199 +0,0 @@
-import json
-import os
-import unicodedata
-
-from collections import OrderedDict
-from pprint import pprint
-
-'''
-    ========== FileStructureTransfer ==========
-    1. 本程序用于将获取的数据进行结构调整
-    2. 根据论文发表的时间年限，分别将最后的数据存储在四个 json 文件中
-        （1） newest： 发表于 2020 年之后
-        （2） oldest： 发表于 2010 年之前
-        （3） 2010-2014： 发表于 2010 年至 2014 年
-        （4） 2015-2020： 发表于 2015 年至 2020 年
-    3. 考虑到部分网站的总数据量过大，所以分成多份
-    4. 本程序运行顺序为：
-        （1） fileReader() 读取本地已爬取数据，存入待处理列表
-        （2） arDataTransform() 转换论文数据格式
-        （3） auDataTransform() 转换作者数据格式
-        （4） 存入转换后数据的存储文件夹
-'''
-
-
-# Read the data
-def fileReader(folder, dataset):
-    files = os.listdir(folder)
-    for file in files:
-        file_path = os.path.join(folder, file)
-        with open(file_path, 'r', encoding='utf-8') as json_file:
-            Data = json.load(json_file)
-            dataset.append(Data)
-
-    return dataset
-
-
-# Article data structure transfer
-def arDataTransform(au_folder, ar_dataset, num):
-    def auInfoFind(path, file_name, ar_data, num):
-        authors = ar_data.get('authors')
-        authors.append(ar_data.get('corresponding_authors'))
-
-        file_path = os.path.join(path, file_name)
-        with open(file_path, 'r', encoding='utf-8') as file:
-            Data = json.load(file)
-
-        au_ID = []      # A new list to store author_id
-
-        # Find the author_id
-        for author in authors:
-            if author is None:
-                continue
-
-            author = author.replace(" ", "")
-
-            for Dict in Data:
-                Dict_name = Dict.get('first_name') + "," + Dict.get('last_name')
-                Dict_name = ''.join(char for char in unicodedata.normalize('NFKD', Dict_name) if
-                                    unicodedata.category(char) != 'Mn')
-
-                if Dict.get('from_article')[0] == ar_data.get('article_id') and Dict_name == author:
-                    au_ID.append(Dict.get('author_id'))
-
-        # Change the structure
-        ar_data_transform = {
-            "article_id": ar_data['article_id'],
-            "title": ar_data['title'],
-            "authors": au_ID,
-            "authors_name": ar_data['authors'],
-            "submit_datetime": ar_data['submit_datetime'],
-            "publish_datetime": ar_data['publish_datetime'],
-            "keywords": ar_data['keywords'],
-            "MSC": ar_data['MSC'],
-            "URL": ar_data['URL'],
-            "DOI": ar_data['DOI'],
-            "publisher": ar_data['publisher'],
-            "journal": ar_data['journal'],
-            "volume": ar_data['volume'],
-            "issue": ar_data['issue'],
-            "page": ar_data['page']
-        }
-
-        num[0] += 1     # Update the counter
-        return ar_data_transform
-
-    # ====== Main code for function =====
-    ar_names = os.listdir(au_folder)    # Read the folder
-
-    for ar_list in ar_dataset:
-        for Dict in ar_list:
-            year = Dict.get('publish_datetime')
-            if year is None:
-                continue
-
-            year = year.split('-')
-
-            if num[0] % 100 == 0 and num[0] != 0:       # Alert for complete data
-                print(str(num[0]) + " copies of article data structure have been transformed.")
-
-            if int(year[0]) <= 2009:
-                Dict = auInfoFind(au_folder, ar_names[3], Dict, num)
-                ar_dataset_new[3].append(Dict)
-
-            elif 2010 <= int(year[0]) <= 2014:
-                Dict = auInfoFind(au_folder, ar_names[0], Dict, num)
-                ar_dataset_new[0].append(Dict)
-
-            elif 2015 <= int(year[0]) <= 2020:
-                Dict = auInfoFind(au_folder, ar_names[1], Dict, num)
-                ar_dataset_new[1].append(Dict)
-
-            else:
-                Dict = auInfoFind(au_folder, ar_names[2], Dict, num)
-                ar_dataset_new[2].append(Dict)
-
-    # Store into the new file
-    filepaths = [
-        "./EJDE_buffer_transform/Article_output/EJDE_Article_output_file(2010-2014).json",
-        "./EJDE_buffer_transform/Article_output/EJDE_Article_output_file(2015-2020).json",
-        "./EJDE_buffer_transform/Article_output/EJDE_Article_output_file(newest).json",
-        "./EJDE_buffer_transform/Article_output/EJDE_Article_output_file(oldest).json",
-    ]
-
-    for i in range(4):
-        with open(filepaths[i], 'w', encoding='utf-8') as json_file:
-            json.dump(ar_dataset_new[i], json_file, indent=4)
-
-    print("\nComplete: All of the article data structure have been transformed.")
-
-
-# Author data structure transfer
-def auDataTransform(au_dataset, num):
-    def transform(list, num):
-        new_list = []   # New list to store transformed data
-
-        for au_data in list:
-            if num[0] % 100 == 0 and num[0] != 0:       # Alert for complete data
-                print(str(num[0]) + " copies of author data structure have been transformed.\n")
-
-            if au_data['middle_name'] is not None:
-                raw_name = au_data['first_name'] + ' ' + au_data['middle_name'] + ' ' + au_data['last_name']
-            else:
-                raw_name = au_data['first_name'] + ' ' + au_data['last_name']
-
-            au_data_transform = {
-                "author_id": au_data['author_id'],
-                "from_article": au_data['from_article'][0],
-                "first_name": au_data['last_name'],
-                "last_name": au_data['first_name'],
-                "middle_name": au_data['middle_name'],
-                "raw_name": raw_name,
-                "affiliation": au_data['affiliation']
-            }
-
-            new_list.append(au_data_transform)
-            num[0] += 1         # Update the counter
-
-        return new_list
-
-    for i in range(4):
-        au_list = transform(au_dataset[i], num)
-        au_dataset_new[i].append(au_list)
-
-    # Store into the new file
-    filepaths = [
-        "./EJDE_buffer_transform/Author_output/EJDE_Author_output_file(2010-2014).json",
-        "./EJDE_buffer_transform/Author_output/EJDE_Author_output_file(2015-2020).json",
-        "./EJDE_buffer_transform/Author_output/EJDE_Author_output_file(newest).json",
-        "./EJDE_buffer_transform/Author_output/EJDE_Author_output_file(oldest).json",
-    ]
-
-    for i in range(4):
-        with open(filepaths[i], 'w', encoding='utf-8') as json_file:
-            json.dump(au_dataset_new[i], json_file, indent=4)
-
-    print("\nComplete: All of the author data structure have been transformed.")
-
-
-# ========== Main code ========== #
-# New list for storing data
-ar_dataset = []
-au_dataset = []
-
-ar_dataset_new = [[] for _ in range(4)]    # New list for transformed data
-au_dataset_new = [[] for _ in range(4)]    # New list to store transformed data
-
-num1 = [0]      # Counter for complete ar_date
-num2 = [0]      # Counter for complete au_data
-
-os.makedirs('./EJDE_buffer_transform/Article_output/', exist_ok=True)
-os.makedirs('./EJDE_buffer_transform/Author_output/', exist_ok=True)
-
-# Read the data
-ar_dataset = fileReader('./EJDE_buffer/Article_output', ar_dataset)
-au_dataset = fileReader('./EJDE_buffer/Author_output', au_dataset)
-
-# Change the structure
-arDataTransform('./EJDE_buffer/Author_output', ar_dataset, num1)
-auDataTransform(au_dataset, num2)
\ No newline at end of file
diff --git a/DataTransformer/FileStructureTansfer(EJQTDE).py b/DataTransformer/FileStructureTansfer(EJQTDE).py
deleted file mode 100644
index 69c2dd4..0000000
--- a/DataTransformer/FileStructureTansfer(EJQTDE).py
+++ /dev/null
@@ -1,206 +0,0 @@
-import json
-import os
-import unicodedata
-
-from collections import OrderedDict
-from pprint import pprint
-
-'''
-    ========== FileStructureTransfer ==========
-    1. 本程序用于将获取的数据进行结构调整
-    2. 根据论文发表的时间年限，分别将最后的数据存储在四个 json 文件中
-        （1） newest： 发表于 2020 年之后
-        （2） oldest： 发表于 2010 年之前
-        （3） 2010-2014： 发表于 2010 年至 2014 年
-        （4） 2015-2020： 发表于 2015 年至 2020 年
-    3. 考虑到部分网站的总数据量过大，所以分成多份
-    4. 本程序运行顺序为：
-        （1） fileReader() 读取本地已爬取数据，存入待处理列表
-        （2） arDataTransform() 转换论文数据格式
-        （3） auDataTransform() 转换作者数据格式
-        （4） 存入转换后数据的存储文件夹
-'''
-
-
-# Read the data
-def fileReader(folder, dataset):
-    files = os.listdir(folder)
-    for file in files:
-        file_path = os.path.join(folder, file)
-        with open(file_path, 'r', encoding='utf-8') as json_file:
-            Data = json.load(json_file)
-            dataset.append(Data)
-
-    return dataset
-
-
-# Article data structure transfer
-def arDataTransform(au_folder, ar_dataset, num):
-    def auInfoFind(path, file_name, ar_data, num):
-        authors = ar_data.get('authors')
-        authors.append(ar_data.get('corresponding_authors'))
-
-        file_path = os.path.join(path, file_name)
-        with open(file_path, 'r', encoding='utf-8') as file:
-            Data = json.load(file)
-
-        au_ID = []      # A new list to store author_id
-
-        # Find the author_id
-        for author in authors:
-            author = author.replace(" ", "")
-
-            for Dict in Data:
-                Dict_name = Dict.get('first_name') + "," + Dict.get('last_name')
-                Dict_name = ''.join(char for char in unicodedata.normalize('NFKD', Dict_name) if
-                                    unicodedata.category(char) != 'Mn')
-
-                if Dict.get('from_article')[0] == ar_data.get('article_id') and Dict_name == author:
-                    au_ID.append(Dict.get('author_id'))
-
-        author_names_new = []
-        author_names = ar_data['authors']
-
-        for author_name in author_names:
-            author_name_new = ''
-            author_name = author_name.split(", ")
-
-            for i in range(len(author_name)-1, 0, -1):
-                # print(author_name[i])
-                author_name_new += author_name[i]
-                if i != 0:
-                    author_name_new += ', '
-
-            print(author_name_new)
-            author_names_new.append(author_name_new)
-
-        # Change the structure
-        ar_data_transform = {
-            "article_id": ar_data['article_id'],
-            "title": ar_data['title'],
-            "authors": au_ID,
-            "authors_name": author_names_new,
-            "submit_datetime": ar_data['submit_datetime'],
-            "publish_datetime": ar_data['publish_datetime'],
-            "keywords": ar_data['keywords'],
-            "MSC": ar_data['MSC'],
-            "URL": ar_data['URL'],
-            "DOI": ar_data['DOI'],
-            "publisher": ar_data['publisher'],
-            "journal": ar_data['journal'],
-            "volume": ar_data['volume'],
-            "issue": ar_data['issue'],
-            "page": ar_data['page']
-        }
-
-        num[0] += 1     # Update the counter
-        return ar_data_transform
-
-    # ====== Main code for function =====
-    ar_names = os.listdir(au_folder)    # Read the folder
-
-    for ar_list in ar_dataset:
-        for Dict in ar_list:
-            if num[0] % 100 == 0 and num[0] != 0:       # Alert for complete data
-                print(str(num[0]) + " copies of article data structure have been transformed.")
-
-            if int(Dict.get('volume')) <= 2009:
-                Dict = auInfoFind(au_folder, ar_names[3], Dict, num)
-                ar_dataset_new[3].append(Dict)
-
-            elif 2010 <= int(Dict.get('volume')) <= 2014:
-                Dict = auInfoFind(au_folder, ar_names[0], Dict, num)
-                ar_dataset_new[0].append(Dict)
-
-            elif 2015 <= int(Dict.get('volume')) <= 2020:
-                Dict = auInfoFind(au_folder, ar_names[1], Dict, num)
-                ar_dataset_new[1].append(Dict)
-
-            else:
-                Dict = auInfoFind(au_folder, ar_names[2], Dict, num)
-                ar_dataset_new[2].append(Dict)
-
-    # Store into the new file
-    filepaths = [
-        "./EJQTDE_buffer_transform/Article_output/EJQTDE_Article_output_file(2010-2014).json",
-        "./EJQTDE_buffer_transform/Article_output/EJQTDE_Article_output_file(2015-2020).json",
-        "./EJQTDE_buffer_transform/Article_output/EJQTDE_Article_output_file(newest).json",
-        "./EJQTDE_buffer_transform/Article_output/EJQTDE_Article_output_file(oldest).json",
-    ]
-
-    for i in range(4):
-        with open(filepaths[i], 'w', encoding='utf-8') as json_file:
-            json.dump(ar_dataset_new[i], json_file, indent=4)
-
-    print("\nComplete: All of the article data structure have been transformed.")
-
-
-# Author data structure transfer
-def auDataTransform(au_dataset, num):
-    def transform(list, num):
-        new_list = []   # New list to store transformed data
-
-        for au_data in list:
-            if num[0] % 100 == 0 and num[0] != 0:       # Alert for complete data
-                print(str(num[0]) + " copies of author data structure have been transformed.\n")
-
-            if au_data['middle_name'] is not None:
-                raw_name = au_data['first_name'] + ' ' + au_data['middle_name'] + ' ' + au_data['last_name']
-            else:
-                raw_name = au_data['first_name'] + ' ' + au_data['last_name']
-
-            au_data_transform = {
-                "author_id": au_data['author_id'],
-                "from_article": au_data['from_article'][0],
-                "first_name": au_data['last_name'],
-                "last_name": au_data['first_name'],
-                "middle_name": au_data['middle_name'],
-                "raw_name": raw_name,
-                "affiliation": au_data['affiliation']
-            }
-
-            new_list.append(au_data_transform)
-            num[0] += 1         # Update the counter
-
-        return new_list
-
-    for i in range(4):
-        au_list = transform(au_dataset[i], num)
-        au_dataset_new[i].append(au_list)
-
-    # Store into the new file
-    filepaths = [
-        "./EJQTDE_buffer_transform/Author_output/EJQTDE_Author_output_file(2010-2014).json",
-        "./EJQTDE_buffer_transform/Author_output/EJQTDE_Author_output_file(2015-2020).json",
-        "./EJQTDE_buffer_transform/Author_output/EJQTDE_Author_output_file(newest).json",
-        "./EJQTDE_buffer_transform/Author_output/EJQTDE_Author_output_file(oldest).json",
-    ]
-
-    for i in range(4):
-        with open(filepaths[i], 'w', encoding='utf-8') as json_file:
-            json.dump(au_dataset_new[i], json_file, indent=4)
-
-    print("\nComplete: All of the author data structure have been transformed.")
-
-
-# ========== Main code ========== #
-# New list for storing data
-ar_dataset = []
-au_dataset = []
-
-ar_dataset_new = [[] for _ in range(4)]    # New list for transformed data
-au_dataset_new = [[] for _ in range(4)]    # New list to store transformed data
-
-num1 = [0]      # Counter for complete ar_date
-num2 = [0]      # Counter for complete au_data
-
-os.makedirs('./EJQTDE_buffer_transform/Article_output/', exist_ok=True)
-os.makedirs('./EJQTDE_buffer_transform/Author_output/', exist_ok=True)
-
-# Read the data
-ar_dataset = fileReader('./EJQTDE_buffer/Article_output', ar_dataset)
-au_dataset = fileReader('./EJQTDE_buffer/Author_output', au_dataset)
-
-# Change the structure
-arDataTransform('./EJQTDE_buffer/Author_output', ar_dataset, num1)
-auDataTransform(au_dataset, num2)
\ No newline at end of file