From 9b33cbabe77c0c8e011965e35d941766f8ede303 Mon Sep 17 00:00:00 2001
From: Chenxiao Xia <s230026169@mail.uic.edu.cn>
Date: Fri, 6 Oct 2023 13:55:50 +0800
Subject: [PATCH] full data of calling aminer API

---
 FileMerger/webSearch_merge.py | 141 ++++++++++++++++++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 FileMerger/webSearch_merge.py

diff --git a/FileMerger/webSearch_merge.py b/FileMerger/webSearch_merge.py
new file mode 100644
index 0000000..396f9df
--- /dev/null
+++ b/FileMerger/webSearch_merge.py
@@ -0,0 +1,141 @@
+import jwt
+import json
+import requests
+import time
+import re
+
+from pprint import pprint
+
+
+# ========== Aminer pre-set ==========
+# Aminer secret key
+secret_key = "81hJKrNgKkMqow=="
+user_id = "650c31aa078ed986b5d526cc"
+expire_time = int(time.time()) + 60  # Expire in 1 min
+now_time = int(time.time())
+
+# Aminer JWT token generator
+head = {
+    "alg": "HS256",
+    "sign_type": "SIGN"
+}
+payload = {
+    "user_id": user_id,
+    "exp": expire_time,
+    "timestamp": now_time
+}
+jwt_token = jwt.encode(payload, secret_key, algorithm="HS256", headers=head)
+
+
+# ========== Aminer API link ==========
+api_author_info = "https://datacenter.aminer.cn/gateway/open_platform/api/v2/person/search"
+api_article_info = "https://datacenter.aminer.cn/gateway/open_platform/api/v3/paper/list/by/publish"
+api_articleDetail_info = "https://datacenter.aminer.cn/gateway/open_platform/api/v3/paper/detail/list"
+
+
+# ========== Function ==========
+# ---------- Get the article web-ID ----------
+def aminer_article_webID(title):
+    headers = {
+        "Authorization": f"Bearer {jwt_token}"
+    }
+    params = {
+        "page": "",
+        "size": "",
+        "title": re.sub(r'[^a-zA-Z0-9\s]+', ' ', title).strip()
+    }
+
+    response = requests.post(api_article_info, headers=headers, params=params)
+
+    if response.status_code == 200:
+        data = response.json()
+        if len(data.get('data')) > 1:
+            print("\nERROR: Cannot find the web-ID of \'" + title + "\'")
+            Retry_article_title.append(title)  # Add the title into retry list
+
+            return None
+
+        print('\n========== The web-ID of the article ==========')
+        pprint(data)
+
+        article_aminerID.append(data.get('data')[0].get('id'))      # Return article_ID in website
+
+    else:
+        print("\nERROR: Cannot find the web-ID of \'" + title + "\'")
+        Retry_article_title.append(title)           # Add the title into retry list
+
+
+# ---------- Get the article details ----------
+def aminer_article_Detail(article_id):
+    headers = {
+        "Content_Type": "application/json;charset=utf-8",
+        "Authorization": f"Bearer {jwt_token}"
+    }
+
+    request_data = {
+        "ids": [article_id]
+    }
+
+    response = requests.post(api_articleDetail_info, headers=headers, data=json.dumps(request_data))
+
+    if response.status_code == 200:
+        data = response.json()
+
+        print('\n========== The detail of the article ==========')
+        pprint(data)
+
+        authors_ID = data.get('data')[0].get('authors')
+        for author_ID in authors_ID:
+            author_ID = author_ID.get('id')
+            if author_ID is not None:
+                author_aminerID.append(author_ID)
+
+    else:
+        print("\nERROR: Cannot find the detail of the article \'" + article_id + "\'")
+        Retry_article_aminerID.append(article_id)       # Add the article ID into retry list
+
+
+# ---------- Get the author's article list ----------
+def aminer_author_info(author_id):
+    headers = {
+        "Content-Type": "application/json;charset=utf-8",
+        "Authorization": f"Bearer {jwt_token}"
+    }
+    request_data = {
+        "ids": author_id
+    }
+    response = requests.post(api_author_info, headers=headers, data=json.dumps(request_data))
+
+    if response.status_code == 200:
+        data = response.json()
+
+        print("\n========== The author's information ==========")
+        pprint(data)
+
+    else:
+        print("\nERROR: Cannot find the article-list's ID of \'" + author_id + "\'")
+        Retry_author_aminerID.append(author_id)         # Add the author ID into the retry list
+
+
+# ========== Main code ==========
+# ---------- Lists to store the data needed to be searched in API ----------
+article_title = ['Entire solutions of the spruce budworm model']     # Test title
+article_aminerID = []
+author_aminerID = []
+
+# ---------- Lists to store the data needed to be retried ----------
+Retry_article_title = []
+Retry_article_aminerID = []
+Retry_author_aminerID = []
+
+
+# ---------- Call the API ----------
+for title in article_title:             # Get the article web-ID
+    aminer_article_webID(title)
+
+    if len(article_aminerID) > 0:
+        for ar_id in article_aminerID:          # Get the article details
+            aminer_article_Detail(ar_id)
+
+    if len(author_aminerID) > 0:
+        aminer_author_info(author_aminerID)     # Get the author's article list