deleted unnecessary retrying commands

This commit is contained in:
ldy 2023-08-03 12:06:22 +08:00
parent e49e829682
commit e9bdb9cdff
2 changed files with 12 additions and 9 deletions

View File

@ -4,9 +4,8 @@ import re
import ejde_save
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed, wait
from retrying import retry
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed
'''
爬取网站'ejde.math.txstate.edu'
@ -24,7 +23,6 @@ def datetime_transform(date):
# Article and author detail
@retry(wait_fixed=5000, stop_max_attempt_number=5)
def process_article(url):
response = requests.get(url)
response.raise_for_status()
@ -61,13 +59,19 @@ def process_article(url):
# Extract submission date
match = re.search(r"Submitted ([A-Za-z]+\s+\d{1,2}, \d{4})", html)
submitted_date = match.group(1) if match else None
submitted_date = match.group(1)
if match:
submitted_date = datetime_transform(submitted_date)
else:
submitted_date = None
# Extract publication date
match = re.search(r"Published ([A-Za-z]+\s+\d{1,2}, \d{4})", html)
publish_date = match.group(1) if match else None
publish_date = match.group(1)
if match:
publish_date = datetime_transform(publish_date)
else:
publish_date = None
# Extract MSC
msc_match = re.search(r'Math Subject Classifications: (.*?)\n', html)
@ -182,8 +186,6 @@ for future in as_completed(futures):
except Exception as e:
print("An error occurred:", str(e))
wait(futures)
# Save remaining data
if len(articleData) > 0:
ejde_save.save_data(articleData, "Article_TS", str(uuid.uuid4()) + ".json")

View File

@ -93,5 +93,6 @@ def delete():
file_path = os.path.join(folder_path, file_name)
if os.path.isfile(file_path):
os.remove(file_path)
os.rmdir(folder_path)
print('\nAttention: The temporary storage files have been deleted!')