Skip to content

Commit

Permalink
fix ext
Browse files Browse the repository at this point in the history
  • Loading branch information
xadupre committed Sep 7, 2024
1 parent 1c03557 commit 441e09c
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 7 deletions.
4 changes: 2 additions & 2 deletions _doc/sg_execution_times.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

Computation times
=================
**00:01.098** total execution time for 1 file **from all galleries**:
**00:00.945** total execution time for 1 file **from all galleries**:

.. container::

Expand All @@ -33,5 +33,5 @@ Computation times
- Time
- Mem (MB)
* - :ref:`sphx_glr_auto_examples_plot_logistic_decision.py` (``examples/plot_logistic_decision.py``)
- 00:01.098
- 00:00.945
- 0.0
164 changes: 164 additions & 0 deletions mlstatpy/ext_test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,170 @@
from numpy.testing import assert_allclose


class InternetException(RuntimeError):
"""
Exception for the function @see fn get_url_content_timeout
"""


def get_url_content_timeout(
url,
timeout=10,
output=None,
encoding="utf8",
raise_exception=True,
chunk=None,
fLOG=None,
):
"""
Downloads a file from internet (by default, it assumes
it is text information, otherwise, encoding should be None).
:param url: (str) url
:param timeout: (int) in seconds, after this time,
the function drops an returns None, -1 for forever
:param output: (str) if None, the content is stored in that file
:param encoding: (str) utf8 by default, but if it is None,
the returned information is binary
:param raise_exception: (bool) True to raise an exception, False to send a warnings
:param chunk: (int|None) save data every chunk (only if output is not None)
:param fLOG: logging function (only applies when chunk is not None)
:return: content of the url
If the function automatically detects that the downloaded data is in gzip
format, it will decompress it.
The function raises the exception :class:`InternetException`.
"""
import gzip
import socket
import urllib.error as urllib_error
import urllib.request as urllib_request
import http.client as http_client

try:
from http.client import InvalidURL
except ImportError:
InvalidURL = ValueError

def save_content(content, append=False):
"local function"
app = "a" if append else "w"
if encoding is not None:
with open(output, app, encoding=encoding) as f:
f.write(content)
else:
with open(output, app + "b") as f:
f.write(content)

try:
if chunk is not None:
if output is None:
raise ValueError("output cannot be None if chunk is not None")
app = [False]
size = [0]

def _local_loop(ur):
while True:
res = ur.read(chunk)
size[0] += len(res) # pylint: disable=E1137
if fLOG is not None:
fLOG("[get_url_content_timeout] downloaded", size, "bytes")
if len(res) > 0:
if encoding is not None:
res = res.decode(encoding=encoding)
save_content(res, app)
else:
break
app[0] = True # pylint: disable=E1137

if timeout != -1:
with urllib_request.urlopen(url, timeout=timeout) as ur:
_local_loop(ur)
else:
with urllib_request.urlopen(url) as ur:
_local_loop(ur)
app = app[0]
size = size[0]
else:
if timeout != -1:
with urllib_request.urlopen(url, timeout=timeout) as ur:
res = ur.read()
else:
with urllib_request.urlopen(url) as ur:
res = ur.read()
except (
urllib_error.HTTPError,
urllib_error.URLError,
ConnectionRefusedError,
socket.timeout,
ConnectionResetError,
http_client.BadStatusLine,
http_client.IncompleteRead,
ValueError,
InvalidURL,
) as e:
if raise_exception:
raise InternetException(f"Unable to retrieve content url='{url}'") from e
warnings.warn(
f"Unable to retrieve content from '{url}' because of {e}",
ResourceWarning,
stacklevel=0,
)
return None
except Exception as e:
if raise_exception: # pragma: no cover
raise InternetException(
f"Unable to retrieve content, url='{url}', exc={e}"
) from e
warnings.warn(
f"Unable to retrieve content from '{url}' "
f"because of unknown exception: {e}",
ResourceWarning,
stacklevel=0,
)
raise e

if chunk is None:
if len(res) >= 2 and res[:2] == b"\x1f\x8B":
# gzip format
res = gzip.decompress(res)

if encoding is not None:
try:
content = res.decode(encoding)
except UnicodeDecodeError as e: # pragma: no cover
# it tries different encoding

laste = [e]
othenc = ["iso-8859-1", "latin-1"]

for encode in othenc:
try:
content = res.decode(encode)
break
except UnicodeDecodeError as ee:
laste.append(ee)
content = None

if content is None:
mes = [f"Unable to parse text from '{url}'."]
mes.append("tried:" + str([*encoding, othenc]))
mes.append("beginning:\n" + str([res])[:50])
for e in laste:
mes.append("Exception: " + str(e))
raise ValueError("\n".join(mes)) from e
else:
content = res
else:
content = None

if output is not None and chunk is None:
save_content(content)

return content


def unit_test_going():
"""
Enables a flag telling the script is running while testing it.
Expand Down
7 changes: 2 additions & 5 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
astroid
autopep8
black
blockdiag
coverage
Expand All @@ -17,14 +16,14 @@ jupyter_sphinx
jupyter
jupyter-black
lifelines
mako
matplotlib
memory_profiler
mlinsights
nbconvert
nbsphinx
notebook
onnx_array_api
onnx-array-api
onnx-extended
onnxruntime>=1.12
pandas
pillow
Expand All @@ -35,15 +34,13 @@ pyinstrument
pytest
ruff
seaborn
setuptools
snakeviz
scikit-learn>=1.1
sklearn-onnx
sphinx
sphinx-gallery
sphinx-issues
git+https://github.com/sdpython/sphinx-runpython.git
sphinxcontrib.imagesvg
stack_data
statsmodels
tqdm
Expand Down

0 comments on commit 441e09c

Please sign in to comment.