deleted unnecessary retrying commands
This commit is contained in:
parent
e49e829682
commit
e9bdb9cdff
@ -4,9 +4,8 @@ import re
|
||||
import ejde_save
|
||||
|
||||
from datetime import datetime
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed, wait
|
||||
from retrying import retry
|
||||
from bs4 import BeautifulSoup
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
'''
|
||||
爬取网站:'ejde.math.txstate.edu'
|
||||
@ -24,7 +23,6 @@ def datetime_transform(date):
|
||||
|
||||
|
||||
# Article and author detail
|
||||
@retry(wait_fixed=5000, stop_max_attempt_number=5)
|
||||
def process_article(url):
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
@ -61,13 +59,19 @@ def process_article(url):
|
||||
|
||||
# Extract submission date
|
||||
match = re.search(r"Submitted ([A-Za-z]+\s+\d{1,2}, \d{4})", html)
|
||||
submitted_date = match.group(1) if match else None
|
||||
submitted_date = datetime_transform(submitted_date)
|
||||
submitted_date = match.group(1)
|
||||
if match:
|
||||
submitted_date = datetime_transform(submitted_date)
|
||||
else:
|
||||
submitted_date = None
|
||||
|
||||
# Extract publication date
|
||||
match = re.search(r"Published ([A-Za-z]+\s+\d{1,2}, \d{4})", html)
|
||||
publish_date = match.group(1) if match else None
|
||||
publish_date = datetime_transform(publish_date)
|
||||
publish_date = match.group(1)
|
||||
if match:
|
||||
publish_date = datetime_transform(publish_date)
|
||||
else:
|
||||
publish_date = None
|
||||
|
||||
# Extract MSC
|
||||
msc_match = re.search(r'Math Subject Classifications: (.*?)\n', html)
|
||||
@ -182,8 +186,6 @@ for future in as_completed(futures):
|
||||
except Exception as e:
|
||||
print("An error occurred:", str(e))
|
||||
|
||||
wait(futures)
|
||||
|
||||
# Save remaining data
|
||||
if len(articleData) > 0:
|
||||
ejde_save.save_data(articleData, "Article_TS", str(uuid.uuid4()) + ".json")
|
||||
|
||||
@ -93,5 +93,6 @@ def delete():
|
||||
file_path = os.path.join(folder_path, file_name)
|
||||
if os.path.isfile(file_path):
|
||||
os.remove(file_path)
|
||||
os.rmdir(folder_path)
|
||||
|
||||
print('\nAttention: The temporary storage files have been deleted!')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user