diff --git a/WorldScientific b/WorldScientific
new file mode 100644
index 0000000..3b268f2
--- /dev/null
+++ b/WorldScientific
@@ -0,0 +1,174 @@
+import requests
+from bs4 import BeautifulSoup
+import re
+import uuid
+from bs4 import Tag
+import json
+
+
+# 发送HTTP请求获取网页内容
+url = 'https://www.worldscientific.com/doi/10.1142/S0219891623500017'
+response = requests.get(url)
+html_content = response.content
+
+# 使用BeautifulSoup解析网页内容
+soup = BeautifulSoup(html_content, 'html.parser')
+
+# 提取标题
+title = soup.find(class_="citation__title").text
+# 打印标题
+print(title)
+
+
+# 提取所有class为"author-type"的内容
+author_type_tags = soup.find_all(class_="author-type")
+
+# 获取提取到的内容
+author_type_list = [tag.text.strip() for tag in author_type_tags]
+
+# 输出提取的内容
+print(author_type_list)
+
+for author_type_tag in author_type_tags:
+ # 提取后续的
标签内容
+ affiliation = author_type_tag.find_next_sibling('p')
+ if affiliation:
+ content_affiliation = affiliation.get_text()
+ print(content_affiliation)
+ else:
+ print("No
tag found after class=\"author-type\"")
+
+
+
+
+
+# 提取作者姓名,创建作者字典
+author_tags = soup.find(class_="rlist--inline loa mobile-authors visible-xs").text
+
+# 去除额外空格
+author_tags = author_tags.replace(" ", "")
+
+# 使用逗号拆分作者标签
+author_list = author_tags.split("and")
+
+authors_dict = {}
+for i, author in enumerate(author_list, 1):
+ # 去除多余空格
+ author = author.strip()
+ authors_dict[f"author_{i}"] = author
+ authors_dict[f"affiliation_{i}"]=affiliation
+
+print(authors_dict)
+
+# 提取关键词
+# 找到包含keywords的div元素
+keywords_div = soup.find('div', id='keywords')
+
+# 提取keywords内容
+keywords = []
+if keywords_div:
+ keyword_items = keywords_div.find_all('a')
+ keywords = [item.text for item in keyword_items]
+
+print(keywords)
+
+
+
+# 定义匹配要提取的div的正则表达式
+pattern = re.compile(r'^
AMSC: .*
$')
+
+# 找到匹配正则表达式的div元素
+divs = soup.find_all(lambda tag: tag.name == 'div' and re.match(pattern, str(tag)))
+
+# 提取每个div中的AMSC内容
+for div in divs:
+ amsc_text = div.text
+ # 提取AMSC值(去除"AMSC: "前缀)
+ amsc_values = amsc_text.replace('AMSC: ', '').strip().split(', ')
+
+ print("AMSC", amsc_values)
+
+
+# 找到提交时间,定义匹配模式
+pattern = re.compile('^received', re.IGNORECASE)
+
+# 查找匹配的div元素
+received_div = soup.find('div', string=pattern)
+
+# 提取内容
+if received_div:
+ received_text = received_div.text
+
+print(received_text)
+
+
+
+# 找到发表时间,定义匹配模式
+pattern1 = re.compile('^Published', re.IGNORECASE)
+
+# 查找匹配的div元素
+Published_div = soup.find('div', string=pattern1)
+
+# 提取内容
+if Published_div:
+ Published_text = Published_div.text
+
+print(Published_text)
+
+# 查找DOI标签并提取DOI
+doi_element = soup.find("span", class_="epub-section__item").find("a")
+doi = doi_element['href'].split("doi.org")[-1]
+
+print("DOI:", doi)
+
+
+
+# 查找具有class为article__tocHeading的链接
+links = soup.find_all('a', class_='article__tocHeading')
+
+# 遍历找到的链接
+for link in links:
+ text = link.text # 链接文本内容
+
+ # 使用正则表达式提取数据
+ match = re.search(r'Vol\. (\d+), No\. (\d+), pp\. (\d+-\d+) \((\d+)\)', text)
+
+ if match:
+ vol_number = match.group(1)
+ issue_number = match.group(2)
+ page_range = match.group(3)
+ publication_year = match.group(4)
+
+ # 打印提取的数据
+ print("Volume Number:", vol_number)
+ print("Issue Number:", issue_number)
+ print("Page Range:", page_range)
+ print("Publication Year:", publication_year)
+ else:
+ print("No match found.")
+
+data={
+ "article_id":str(uuid.uuid4()),
+ "Author":authors_dict,
+ "Title":title,
+ "received time":received_text,
+ "publish time":Published_text,
+ "keyeords":keywords,
+ "Volume":vol_number,
+ "issue":issue_number,
+ "page":page_range,
+ "DOI":doi,
+ "url":"",
+ "journal":"worldscientific",
+ "MSC":amsc_values}
+
+
+class CustomJSONEncoder(json.JSONEncoder):
+ def default(self, obj):
+ if isinstance(obj, Tag):
+ return str(obj)
+ return super().default(obj)
+
+# 使用自定义JSON编码器进行序列化
+with open("data.json", "w", encoding="utf-8") as f:
+ json.dump(data, f, indent=2, ensure_ascii=False, cls=CustomJSONEncoder)