Fix bugs
This commit is contained in:
parent
a2284b7b45
commit
34fb579f7c
@ -37,7 +37,7 @@ def Author_dict(soup, article_id, Author_list):
|
|||||||
author_data = {
|
author_data = {
|
||||||
"author_id": str(uuid.uuid4()),
|
"author_id": str(uuid.uuid4()),
|
||||||
"from_article": article_id,
|
"from_article": article_id,
|
||||||
"first _name": Firstname,
|
"first_name": Firstname,
|
||||||
"last_name": Lastname,
|
"last_name": Lastname,
|
||||||
"middle_name": Middlename,
|
"middle_name": Middlename,
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
import SD_link
|
import SD_link
|
||||||
import SD_threads
|
import SD_threads
|
||||||
import SD_save
|
import SD_save
|
||||||
@ -8,7 +7,7 @@ from urllib.parse import urljoin
|
|||||||
'''
|
'''
|
||||||
爬取网站:https://www.springeropen.com
|
爬取网站:https://www.springeropen.com
|
||||||
|
|
||||||
==========运行顺序==========
|
========== 运行顺序 ==========
|
||||||
1、SD_main 获取SpringOpen网站下所有数学类期刊的链接 -> 获取期刊内部论文列表的链接
|
1、SD_main 获取SpringOpen网站下所有数学类期刊的链接 -> 获取期刊内部论文列表的链接
|
||||||
2、SD_threads 多线程管控 -> 调用SD_scrawl
|
2、SD_threads 多线程管控 -> 调用SD_scrawl
|
||||||
3、SD_scrawl 获取论文详情页链接 -> 调用SD_detail
|
3、SD_scrawl 获取论文详情页链接 -> 调用SD_detail
|
||||||
@ -61,9 +60,8 @@ SD_threads.Threads(Links)
|
|||||||
# json文件汇总
|
# json文件汇总
|
||||||
SD_save.Transf()
|
SD_save.Transf()
|
||||||
|
|
||||||
# # ==========删除所有暂存的小文件(可选,注意备份)===========
|
# ==========删除所有暂存的小文件(可选,注意备份)===========
|
||||||
# SD_save.delete('./SpringerOpen_buffer/Article_TS/')
|
SD_save.delete()
|
||||||
# SD_save.delete('./SpringerOpen_buffer/Author_TS/')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user