报错代码
# @author tianyi
# {Time}-2022-09-11 08:40
import urllib.parse
import urllib.request
def create_request(page):
base_url = 'https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&start=0&limit=20'
data = {
'start' :(page-1)*20,
'limit' :20
}
print(data)
print('---------------------------------------------')
data = urllib.parse.urlencode(data)
url = base_url +data
print(url)
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36'
}
request = urllib.request.Request(url=url, headers=header)
return request
def get_connect(request):
response = urllib.request.urlopen(request)
connect = response.read().decode('utf-8')
return connect
def down_load(page,connect):
with open('douban_'+str(page)+'.json','w',encoding='utf-8') as fp:
fp.write(connect)
if __name__ == '__main__':
start_page = int(input('请输入开始的页数:'))
end_page = int(input('请输入结束的页数:'))
for page in range(start_page, end_page+1):
request = create_request(page)
print(request)
connect = get_connect(request)
down_load(page, connect)
之后报下面的错误:
C:\Users\27964\AppData\Local\Microsoft\WindowsApps\python3.10.exe H:/pythonProject/pythonProject2/练习3/ajax_get.py
请输入开始的页数:1
请输入结束的页数:1
{'start': 0, 'limit': 20}
---------------------------------------------
https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&start=0&limit=20start=0&limit=20
<urllib.request.Request object at 0x000001F566883FD0>
Traceback (most recent call last):
File "H:\pythonProject\pythonProject2\练习3\ajax_get.py", line 47, in <module>
connect = get_connect(request)
File "H:\pythonProject\pythonProject2\练习3\ajax_get.py", line 30, in get_connect
response = urllib.request.urlopen(request)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2032.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2032.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 525, in open
response = meth(req, response)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2032.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 634, in http_response
response = self.parent.error(
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2032.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 563, in error
return self._call_chain(*args)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2032.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 496, in _call_chain
result = func(*args)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2032.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 643, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 500: INTERNAL SERVER ERROR
问题解决:
发现报错代码:
修改代码:
# @author tianyi
# {Time}-2022-09-11 08:40
import urllib.parse
import urllib.request
def create_request(page):
base_url = 'https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&'
data = {
'start' :(page-1)*20,
'limit' :20
}
print(data)
print('---------------------------------------------')
data = urllib.parse.urlencode(data)
url = base_url +data
print(url)
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36'
}
request = urllib.request.Request(url=url, headers=header)
return request
def get_connect(request):
response = urllib.request.urlopen(request)
connect = response.read().decode('utf-8')
return connect
def down_load(page,connect):
with open('douban_'+str(page)+'.json','w',encoding='utf-8') as fp:
fp.write(connect)
if __name__ == '__main__':
start_page = int(input('请输入开始的页数:'))
end_page = int(input('请输入结束的页数:'))
for page in range(start_page, end_page+1):
request = create_request(page)
print(request)
connect = get_connect(request)
down_load(page, connect)
运行成功:文章来源:https://www.toymoban.com/news/detail-593027.html
C:\Users\27964\AppData\Local\Microsoft\WindowsApps\python3.10.exe H:/pythonProject/pythonProject2/练习3/ajax_get.py
请输入开始的页数:1
请输入结束的页数:10
{'start': 0, 'limit': 20}
---------------------------------------------
https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&start=0&limit=20
<urllib.request.Request object at 0x000001D9044B3FD0>
{'start': 20, 'limit': 20}
---------------------------------------------
https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&start=20&limit=20
<urllib.request.Request object at 0x000001D904867220>
{'start': 40, 'limit': 20}
---------------------------------------------
https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&start=40&limit=20
<urllib.request.Request object at 0x000001D9044B3FD0>
{'start': 60, 'limit': 20}
---------------------------------------------
https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&start=60&limit=20
<urllib.request.Request object at 0x000001D904866A10>
{'start': 80, 'limit': 20}
---------------------------------------------
https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&start=80&limit=20
<urllib.request.Request object at 0x000001D9044B3FD0>
{'start': 100, 'limit': 20}
---------------------------------------------
https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&start=100&limit=20
<urllib.request.Request object at 0x000001D904866A40>
{'start': 120, 'limit': 20}
---------------------------------------------
https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&start=120&limit=20
<urllib.request.Request object at 0x000001D9044B3FD0>
{'start': 140, 'limit': 20}
---------------------------------------------
https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&start=140&limit=20
<urllib.request.Request object at 0x000001D904864B20>
{'start': 160, 'limit': 20}
---------------------------------------------
https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&start=160&limit=20
<urllib.request.Request object at 0x000001D9044B3FD0>
{'start': 180, 'limit': 20}
---------------------------------------------
https://movie.douban.com/j/chart/top_list?type=7&interval_id=100%3A90&action=&start=180&limit=20
<urllib.request.Request object at 0x000001D904867220>
爬到的数据:
文章来源地址https://www.toymoban.com/news/detail-593027.html
到了这里,关于[Python3]爬虫HTTP Error 500错误,报错信息:urllib.error.HTTPError: HTTP Error 500: INTERNAL SERVER ERROR的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!