From 34fb579f7cc990e5377a931ba3fcddc8d8e5b78d Mon Sep 17 00:00:00 2001 From: Chenxiao Xia Date: Sat, 16 Sep 2023 18:46:52 +0800 Subject: [PATCH] Fix bugs --- 04_SpringerOpen_spider/SD_detail.py | 2 +- 04_SpringerOpen_spider/SD_main.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/04_SpringerOpen_spider/SD_detail.py b/04_SpringerOpen_spider/SD_detail.py index 76535be..2f263a1 100644 --- a/04_SpringerOpen_spider/SD_detail.py +++ b/04_SpringerOpen_spider/SD_detail.py @@ -37,7 +37,7 @@ def Author_dict(soup, article_id, Author_list): author_data = { "author_id": str(uuid.uuid4()), "from_article": article_id, - "first _name": Firstname, + "first_name": Firstname, "last_name": Lastname, "middle_name": Middlename, "affiliation": [ diff --git a/04_SpringerOpen_spider/SD_main.py b/04_SpringerOpen_spider/SD_main.py index a4b5c5d..bf2ed4d 100644 --- a/04_SpringerOpen_spider/SD_main.py +++ b/04_SpringerOpen_spider/SD_main.py @@ -1,5 +1,4 @@ import urllib - import SD_link import SD_threads import SD_save @@ -8,7 +7,7 @@ from urllib.parse import urljoin ''' 爬取网站:https://www.springeropen.com - ==========运行顺序========== + ========== 运行顺序 ========== 1、SD_main 获取SpringOpen网站下所有数学类期刊的链接 -> 获取期刊内部论文列表的链接 2、SD_threads 多线程管控 -> 调用SD_scrawl 3、SD_scrawl 获取论文详情页链接 -> 调用SD_detail @@ -61,9 +60,8 @@ SD_threads.Threads(Links) # json文件汇总 SD_save.Transf() -# # ==========删除所有暂存的小文件(可选,注意备份)=========== -# SD_save.delete('./SpringerOpen_buffer/Article_TS/') -# SD_save.delete('./SpringerOpen_buffer/Author_TS/') +# ==========删除所有暂存的小文件(可选,注意备份)=========== +SD_save.delete()