This commit is contained in:
Chenxiao Xia 2023-09-16 18:46:52 +08:00
parent a2284b7b45
commit 34fb579f7c
2 changed files with 4 additions and 6 deletions

View File

@ -37,7 +37,7 @@ def Author_dict(soup, article_id, Author_list):
author_data = {
"author_id": str(uuid.uuid4()),
"from_article": article_id,
"first _name": Firstname,
"first_name": Firstname,
"last_name": Lastname,
"middle_name": Middlename,
"affiliation": [

View File

@ -1,5 +1,4 @@
import urllib
import SD_link
import SD_threads
import SD_save
@ -8,7 +7,7 @@ from urllib.parse import urljoin
'''
爬取网站https://www.springeropen.com
==========运行顺序==========
========== 运行顺序 ==========
1SD_main 获取SpringOpen网站下所有数学类期刊的链接 -> 获取期刊内部论文列表的链接
2SD_threads 多线程管控 -> 调用SD_scrawl
3SD_scrawl 获取论文详情页链接 -> 调用SD_detail
@ -61,9 +60,8 @@ SD_threads.Threads(Links)
# json文件汇总
SD_save.Transf()
# # ==========删除所有暂存的小文件(可选,注意备份)===========
# SD_save.delete('./SpringerOpen_buffer/Article_TS/')
# SD_save.delete('./SpringerOpen_buffer/Author_TS/')
# ==========删除所有暂存的小文件(可选,注意备份)===========
SD_save.delete()