From f97195c94dab9ace48e72f7c7f2d30ef9bea6ecc Mon Sep 17 00:00:00 2001 From: ldy <1913292237@qq.com> Date: Fri, 11 Aug 2023 18:05:15 +0800 Subject: [PATCH] Bug Fix: handled exception when the volume website has no title --- 01_EJDE_spider/ejde_main.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/01_EJDE_spider/ejde_main.py b/01_EJDE_spider/ejde_main.py index 3c9c71a..53228d4 100644 --- a/01_EJDE_spider/ejde_main.py +++ b/01_EJDE_spider/ejde_main.py @@ -152,6 +152,11 @@ def process_article(title, article_url): article_soup = BeautifulSoup(html, 'html.parser') article_text = article_soup.get_text() + # Extract title if title == None + if not title: + title_match = re.search(r"
", article_text) + title = str(re.sub(r'<[^>]+>', '', title_match.group(1)).strip()) if title_match else None + # Extract volume volume_match = re.search(r'Vol\. (\d+) \((\d+)\)', article_text) volume = str(volume_match.group(1)) if volume_match else None