Fix bugs
This commit is contained in:
parent
a2284b7b45
commit
34fb579f7c
@ -37,7 +37,7 @@ def Author_dict(soup, article_id, Author_list):
|
||||
author_data = {
|
||||
"author_id": str(uuid.uuid4()),
|
||||
"from_article": article_id,
|
||||
"first _name": Firstname,
|
||||
"first_name": Firstname,
|
||||
"last_name": Lastname,
|
||||
"middle_name": Middlename,
|
||||
"affiliation": [
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
import urllib
|
||||
|
||||
import SD_link
|
||||
import SD_threads
|
||||
import SD_save
|
||||
@ -8,7 +7,7 @@ from urllib.parse import urljoin
|
||||
'''
|
||||
爬取网站:https://www.springeropen.com
|
||||
|
||||
==========运行顺序==========
|
||||
========== 运行顺序 ==========
|
||||
1、SD_main 获取SpringOpen网站下所有数学类期刊的链接 -> 获取期刊内部论文列表的链接
|
||||
2、SD_threads 多线程管控 -> 调用SD_scrawl
|
||||
3、SD_scrawl 获取论文详情页链接 -> 调用SD_detail
|
||||
@ -61,9 +60,8 @@ SD_threads.Threads(Links)
|
||||
# json文件汇总
|
||||
SD_save.Transf()
|
||||
|
||||
# # ==========删除所有暂存的小文件(可选,注意备份)===========
|
||||
# SD_save.delete('./SpringerOpen_buffer/Article_TS/')
|
||||
# SD_save.delete('./SpringerOpen_buffer/Author_TS/')
|
||||
# ==========删除所有暂存的小文件(可选,注意备份)===========
|
||||
SD_save.delete()
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user