38 lines
1.5 KiB
Python
38 lines
1.5 KiB
Python
import random
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
# 用户代理地址池
|
|
uapools=[
|
|
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1 QQBrowser/6.9.11079.201",
|
|
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3493.3 Safari/537.36",
|
|
"Mozilla/5.0 (Windows; U; Windows NT 6.1; ) AppleWebKit/534.12 (KHTML, like Gecko) Maxthon/3.0 Safari/534.12",
|
|
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0" ,
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
|
|
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
|
|
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
|
|
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
|
|
]
|
|
|
|
def header():
|
|
# 网站请求头
|
|
headers = {
|
|
'User-Agent': random.choice(uapools),
|
|
}
|
|
|
|
return headers
|
|
|
|
# 标准访问格式
|
|
def Link(url, headers):
|
|
try:
|
|
response = requests.get(url, headers=headers)
|
|
response.raise_for_status() # 检查请求是否成功
|
|
html = response.text
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
return soup
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
print("Wrong:", e)
|
|
return None
|