新手刚开始学爬虫,不知道问题出在哪,requests已经是最新版本,用get获取公司内部网页不会报错,是否是防火墙的问题?
安装了Anaconda3后,在Jupyter notebook里写入如下代码:
import requests res = requests.get('http://www.sina.com.cn/china') res.encoding='utf-8' print(res.text)
ConnectionRefusedError Traceback (most recent call last)
D:Anaconda3libsite-packagesrequestspackagesurllib3connection.py in _new_conn(self)
140 conn = connection.create_connection(
--> 141 (self.host, self.port), self.timeout, **extra_kw)
142
D:Anaconda3libsite-packagesrequestspackagesurllib3utilconnection.py in create_connection(address, timeout, source_address, socket_options)
82 if err is not None:
---> 83 raise err
84
D:Anaconda3libsite-packagesrequestspackagesurllib3utilconnection.py in create_connection(address, timeout, source_address, socket_options)
72 sock.bind(source_address)
---> 73 sock.connect(sa)
74 return sock
ConnectionRefusedError: [WinError 10061] No connection could be made because the target machine actively refused it
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
D:Anaconda3libsite-packagesrequestspackagesurllib3connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
D:Anaconda3libsite-packagesrequestspackagesurllib3connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
355 else:
--> 356 conn.request(method, url, **httplib_request_kw)
357
D:Anaconda3libhttpclient.py in request(self, method, url, body, headers, encode_chunked)
1238 """Send a complete request to the server."""
-> 1239 self._send_request(method, url, body, headers, encode_chunked)
1240
D:Anaconda3libhttpclient.py in _send_request(self, method, url, body, headers, encode_chunked)
1284 body = _encode(body, 'body')
-> 1285 self.endheaders(body, encode_chunked=encode_chunked)
1286
D:Anaconda3libhttpclient.py in endheaders(self, message_body, encode_chunked)
1233 raise CannotSendHeader()
-> 1234 self._send_output(message_body, encode_chunked=encode_chunked)
1235
D:Anaconda3libhttpclient.py in _send_output(self, message_body, encode_chunked)
1025 del self._buffer[:]
-> 1026 self.send(msg)
1027
D:Anaconda3libhttpclient.py in send(self, data)
963 if self.auto_open:
--> 964 self.connect()
965 else:
D:Anaconda3libsite-packagesrequestspackagesurllib3connection.py in connect(self)
165 def connect(self):
--> 166 conn = self._new_conn()
167 self._prepare_conn(conn)
D:Anaconda3libsite-packagesrequestspackagesurllib3connection.py in _new_conn(self)
149 raise NewConnectionError(
--> 150 self, "Failed to establish a new connection: %s" % e)
151
NewConnectionError:
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
D:Anaconda3libsite-packagesrequestsadapters.py in send(self, request, stream, timeout, verify, cert, proxies)
422 retries=self.max_retries,
--> 423 timeout=timeout
424 )
D:Anaconda3libsite-packagesrequestspackagesurllib3connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
648 retries = retries.increment(method, url, error=e, _pool=self,
--> 649 _stacktrace=sys.exc_info()[2])
650 retries.sleep()
D:Anaconda3libsite-packagesrequestspackagesurllib3utilretry.py in increment(self, method, url, response, error, _pool, _stacktrace)
375 if new_retry.is_exhausted():
--> 376 raise MaxRetryError(_pool, url, error or ResponseError(cause))
377
MaxRetryError: HTTPConnectionPool(host='www.sina.com.cn', port=80): Max retries exceeded with url: /china (Caused by NewConnectionError('
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
1 import requests
----> 2 res = requests.get('http://www.sina.com.cn/china')
3 res.encoding='utf-8' 4 print(res.text)
D:Anaconda3libsite-packagesrequestsapi.py in get(url, params, **kwargs)
68 69 kwargs.setdefault('allow_redirects', True)
---> 70 return request('get', url, params=params, **kwargs)
71 72
D:Anaconda3libsite-packagesrequestsapi.py in request(method, url, **kwargs)
54 # cases, and look like a memory leak in others. 55 with sessions.Session() as session:
---> 56 return session.request(method=method, url=url, **kwargs)
57 58
D:Anaconda3libsite-packagesrequestssessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
486 } 487 send_kwargs.update(settings)
--> 488 resp = self.send(prep, **send_kwargs)
489 490 return resp
D:Anaconda3libsite-packagesrequestssessions.py in send(self, request, **kwargs)
607 608 # Send the request
--> 609 r = adapter.send(request, **kwargs)
610 611 # Total elapsed time of the request (approximately)
D:Anaconda3libsite-packagesrequestsadapters.py in send(self, request, stream, timeout, verify, cert, proxies)
485 raise ProxyError(e, request=request) 486
--> 487 raise ConnectionError(e, request=request)
488 489 except ClosedPoolError as e:
ConnectionError: HTTPConnectionPool(host='www.sina.com.cn', port=80): Max retries exceeded with url: /china (Caused by NewConnectionError('
从报错信息看,被拒绝访问了。
建议用官方Python 不要使用第三方安装包。
No connection could be made because the target machine actively refused it
建立连接失败,有可能是你们 ip 是黑名单,有可能是你违法了 robot.txt,有可能就是不让你的 UA 建立连接。