From 9b33cbabe77c0c8e011965e35d941766f8ede303 Mon Sep 17 00:00:00 2001 From: Chenxiao Xia Date: Fri, 6 Oct 2023 13:55:50 +0800 Subject: [PATCH] full data of calling aminer API --- FileMerger/webSearch_merge.py | 141 ++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 FileMerger/webSearch_merge.py diff --git a/FileMerger/webSearch_merge.py b/FileMerger/webSearch_merge.py new file mode 100644 index 0000000..396f9df --- /dev/null +++ b/FileMerger/webSearch_merge.py @@ -0,0 +1,141 @@ +import jwt +import json +import requests +import time +import re + +from pprint import pprint + + +# ========== Aminer pre-set ========== +# Aminer secret key +secret_key = "81hJKrNgKkMqow==" +user_id = "650c31aa078ed986b5d526cc" +expire_time = int(time.time()) + 60 # Expire in 1 min +now_time = int(time.time()) + +# Aminer JWT token generator +head = { + "alg": "HS256", + "sign_type": "SIGN" +} +payload = { + "user_id": user_id, + "exp": expire_time, + "timestamp": now_time +} +jwt_token = jwt.encode(payload, secret_key, algorithm="HS256", headers=head) + + +# ========== Aminer API link ========== +api_author_info = "https://datacenter.aminer.cn/gateway/open_platform/api/v2/person/search" +api_article_info = "https://datacenter.aminer.cn/gateway/open_platform/api/v3/paper/list/by/publish" +api_articleDetail_info = "https://datacenter.aminer.cn/gateway/open_platform/api/v3/paper/detail/list" + + +# ========== Function ========== +# ---------- Get the article web-ID ---------- +def aminer_article_webID(title): + headers = { + "Authorization": f"Bearer {jwt_token}" + } + params = { + "page": "", + "size": "", + "title": re.sub(r'[^a-zA-Z0-9\s]+', ' ', title).strip() + } + + response = requests.post(api_article_info, headers=headers, params=params) + + if response.status_code == 200: + data = response.json() + if len(data.get('data')) > 1: + print("\nERROR: Cannot find the web-ID of \'" + title + "\'") + Retry_article_title.append(title) # Add the title into retry list + + return None + + print('\n========== The web-ID of the article ==========') + pprint(data) + + article_aminerID.append(data.get('data')[0].get('id')) # Return article_ID in website + + else: + print("\nERROR: Cannot find the web-ID of \'" + title + "\'") + Retry_article_title.append(title) # Add the title into retry list + + +# ---------- Get the article details ---------- +def aminer_article_Detail(article_id): + headers = { + "Content_Type": "application/json;charset=utf-8", + "Authorization": f"Bearer {jwt_token}" + } + + request_data = { + "ids": [article_id] + } + + response = requests.post(api_articleDetail_info, headers=headers, data=json.dumps(request_data)) + + if response.status_code == 200: + data = response.json() + + print('\n========== The detail of the article ==========') + pprint(data) + + authors_ID = data.get('data')[0].get('authors') + for author_ID in authors_ID: + author_ID = author_ID.get('id') + if author_ID is not None: + author_aminerID.append(author_ID) + + else: + print("\nERROR: Cannot find the detail of the article \'" + article_id + "\'") + Retry_article_aminerID.append(article_id) # Add the article ID into retry list + + +# ---------- Get the author's article list ---------- +def aminer_author_info(author_id): + headers = { + "Content-Type": "application/json;charset=utf-8", + "Authorization": f"Bearer {jwt_token}" + } + request_data = { + "ids": author_id + } + response = requests.post(api_author_info, headers=headers, data=json.dumps(request_data)) + + if response.status_code == 200: + data = response.json() + + print("\n========== The author's information ==========") + pprint(data) + + else: + print("\nERROR: Cannot find the article-list's ID of \'" + author_id + "\'") + Retry_author_aminerID.append(author_id) # Add the author ID into the retry list + + +# ========== Main code ========== +# ---------- Lists to store the data needed to be searched in API ---------- +article_title = ['Entire solutions of the spruce budworm model'] # Test title +article_aminerID = [] +author_aminerID = [] + +# ---------- Lists to store the data needed to be retried ---------- +Retry_article_title = [] +Retry_article_aminerID = [] +Retry_author_aminerID = [] + + +# ---------- Call the API ---------- +for title in article_title: # Get the article web-ID + aminer_article_webID(title) + + if len(article_aminerID) > 0: + for ar_id in article_aminerID: # Get the article details + aminer_article_Detail(ar_id) + + if len(author_aminerID) > 0: + aminer_author_info(author_aminerID) # Get the author's article list