From 35f5f2ac5e39a53061369a86993cf62682da1356 Mon Sep 17 00:00:00 2001 From: ldy <1913292237@qq.com> Date: Fri, 11 Aug 2023 11:42:02 +0800 Subject: [PATCH] Optimization: clustered error files into a folder --- 01_EJDE_spider/ejde_main.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/01_EJDE_spider/ejde_main.py b/01_EJDE_spider/ejde_main.py index 1876b22..86b8b68 100644 --- a/01_EJDE_spider/ejde_main.py +++ b/01_EJDE_spider/ejde_main.py @@ -155,6 +155,8 @@ def process_article(title, article_url): # Extract volume volume_match = re.search(r'Vol\. (\d+) \((\d+)\)', article_text) volume = str(volume_match.group(1)) if volume_match else None + if not volume: + volume = str(volume_match.group(2)) if volume_match else None # Extract pp pp_match = re.search(r'pp\. (\d+-\d+)', article_text) @@ -374,15 +376,15 @@ if len(authorData) > 0: # Save error record if len(totallyFailedData) > 0: - ejde_save.save_data(failedData, "", "Failed_article_record.json") + ejde_save.save_data(failedData, "Error", "Failed_article_record.json") print("Total failed processing paper:", len(totallyFailedData)) if len(failedVolData) > 0: - ejde_save.save_data(failedVolData, "", "Failed_volume_record.json") + ejde_save.save_data(failedVolData, "Error", "Failed_volume_record.json") print("Total failed fetching volume:", len(failedVolData)) if len(failedFormatData) > 0: - ejde_save.save_data(failedFormatData, "", "Failed_format_record.json") + ejde_save.save_data(failedFormatData, "Error", "Failed_format_record.json") print("Total failed searching article:", len(failedFormatData)) # Total running time