From 01c1a7d97830c11404af1e3dcd23fd39641bf31b Mon Sep 17 00:00:00 2001
From: XCX <1361986662@qq.com>
Date: Mon, 31 Jul 2023 18:19:11 +0800
Subject: [PATCH] Changed the code to unify the time format

---
 SpringerOpen_spider/SD_detail.py | 22 ++++++++++++++++------
 SpringerOpen_spider/SD_scrawl.py |  2 +-
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/SpringerOpen_spider/SD_detail.py b/SpringerOpen_spider/SD_detail.py
index 73639c7..2aae1f9 100644
--- a/SpringerOpen_spider/SD_detail.py
+++ b/SpringerOpen_spider/SD_detail.py
@@ -1,4 +1,6 @@
 import uuid
+import calendar
+
 
 # ==========获取细节==========
 def Author_dict(soup, article_id, Author_list):
@@ -20,10 +22,12 @@ def Author_dict(soup, article_id, Author_list):
         Middlename = ''.join(author[1:-1]) if author[1:-1] else None
 
         # Year
-        Year = info.find('span', attrs={'data-test': 'article-publication-year'}).get_text()
+        Year = info.find('span', attrs={'data-test': 'article-publication-year'}).get_text() if info.find\
+            ('span', attrs={'data-test': 'article-publication-year'}) else None
 
         # Affiliation
-        Affiliation = author_info.find('p', class_='c-article-author-affiliation__address').get_text()
+        Affiliation = author_info.find('p', class_='c-article-author-affiliation__address').get_text() if author_info.find\
+            ('p', class_='c-article-author-affiliation__address') else None
 
         # Email
         Email = None    # Can not reach the data
@@ -72,7 +76,7 @@ def Article_dict(soup, url, article_id):
 
     corresponding_author_list = info.find('p', id='corresponding-author-list')
     corresponding_authors = corresponding_author_list.find_all('a')
-    if Corresponding_author is not None:
+    if corresponding_authors is not None:
         for corresponding_author in corresponding_authors:
             corresponding_author = corresponding_author.get_text()
             corresponding_author = [char.replace('-', '') for char in corresponding_author]
@@ -82,14 +86,20 @@ def Article_dict(soup, url, article_id):
     # Submitted_datetime & Published_datetime
     Time = []
 
+    def timeSet(time):
+        time = time.split(' ')
+        time[1] = str(list(calendar.month_name).index(time[1]))
+        time = time[2] + '-' + time[1] + '-' + time[0]
+        return time
+
     time_list = info.find('ul', class_='c-bibliographic-information__list')
     times = time_list.find_all('time')
     for time in times:
         time = time.get_text()
         Time.append(time)
 
-    Submit_date = Time[0]
-    Publish_date = Time[-1]
+    Submit_date = timeSet(str(Time[0]))
+    Publish_date = timeSet(str(Time[-1]))
 
     # keyword
     Keyword = []        # A new empty list
@@ -145,4 +155,4 @@ def Article_dict(soup, url, article_id):
         "page": Page,
     }
 
-    return article_data
\ No newline at end of file
+    return article_data
diff --git a/SpringerOpen_spider/SD_scrawl.py b/SpringerOpen_spider/SD_scrawl.py
index d2b8ed2..3f9099c 100644
--- a/SpringerOpen_spider/SD_scrawl.py
+++ b/SpringerOpen_spider/SD_scrawl.py
@@ -12,7 +12,7 @@ def Scrawl(Link, Article_data, Author_data):
     # 访问论文列表页
     headers = SD_link.header()
     soup = SD_link.Link(Link, headers)
-    print(Link)
+    print('Start: ', Link)
 
     # 获得所有论文详情页的链接
     Essay_Ol = soup.find('ol')          # 获取论文列表