handled exception when the volume website has no title
This commit is contained in:
ldy 2023-08-11 18:05:15 +08:00
parent 3e78e9f48e
commit f97195c94d

View File

@ -152,6 +152,11 @@ def process_article(title, article_url):
article_soup = BeautifulSoup(html, 'html.parser') article_soup = BeautifulSoup(html, 'html.parser')
article_text = article_soup.get_text() article_text = article_soup.get_text()
# Extract title if title == None
if not title:
title_match = re.search(r"<h3>(.*?)<p>", article_text)
title = str(re.sub(r'<[^>]+>', '', title_match.group(1)).strip()) if title_match else None
# Extract volume # Extract volume
volume_match = re.search(r'Vol\. (\d+) \((\d+)\)', article_text) volume_match = re.search(r'Vol\. (\d+) \((\d+)\)', article_text)
volume = str(volume_match.group(1)) if volume_match else None volume = str(volume_match.group(1)) if volume_match else None