wg-backend-django/dell-env/lib/python3.11/site-packages/_plotly_utils/utils.py
2023-10-30 14:40:43 +07:00

453 lines
14 KiB
Python

import decimal
import json as _json
import sys
import re
from functools import reduce
from _plotly_utils.optional_imports import get_module
from _plotly_utils.basevalidators import ImageUriValidator
def cumsum(x):
"""
Custom cumsum to avoid a numpy import.
"""
def _reducer(a, x):
if len(a) == 0:
return [x]
return a + [a[-1] + x]
ret = reduce(_reducer, x, [])
return ret
class PlotlyJSONEncoder(_json.JSONEncoder):
"""
Meant to be passed as the `cls` kwarg to json.dumps(obj, cls=..)
See PlotlyJSONEncoder.default for more implementation information.
Additionally, this encoder overrides nan functionality so that 'Inf',
'NaN' and '-Inf' encode to 'null'. Which is stricter JSON than the Python
version.
"""
def coerce_to_strict(self, const):
"""
This is used to ultimately *encode* into strict JSON, see `encode`
"""
# before python 2.7, 'true', 'false', 'null', were include here.
if const in ("Infinity", "-Infinity", "NaN"):
return None
else:
return const
def encode(self, o):
"""
Load and then dump the result using parse_constant kwarg
Note that setting invalid separators will cause a failure at this step.
"""
# this will raise errors in a normal-expected way
encoded_o = super(PlotlyJSONEncoder, self).encode(o)
# Brute force guessing whether NaN or Infinity values are in the string
# We catch false positive cases (e.g. strings such as titles, labels etc.)
# but this is ok since the intention is to skip the decoding / reencoding
# step when it's completely safe
if not ("NaN" in encoded_o or "Infinity" in encoded_o):
return encoded_o
# now:
# 1. `loads` to switch Infinity, -Infinity, NaN to None
# 2. `dumps` again so you get 'null' instead of extended JSON
try:
new_o = _json.loads(encoded_o, parse_constant=self.coerce_to_strict)
except ValueError:
# invalid separators will fail here. raise a helpful exception
raise ValueError(
"Encoding into strict JSON failed. Did you set the separators "
"valid JSON separators?"
)
else:
return _json.dumps(
new_o,
sort_keys=self.sort_keys,
indent=self.indent,
separators=(self.item_separator, self.key_separator),
)
def default(self, obj):
"""
Accept an object (of unknown type) and try to encode with priority:
1. builtin: user-defined objects
2. sage: sage math cloud
3. pandas: dataframes/series
4. numpy: ndarrays
5. datetime: time/datetime objects
Each method throws a NotEncoded exception if it fails.
The default method will only get hit if the object is not a type that
is naturally encoded by json:
Normal objects:
dict object
list, tuple array
str, unicode string
int, long, float number
True true
False false
None null
Extended objects:
float('nan') 'NaN'
float('infinity') 'Infinity'
float('-infinity') '-Infinity'
Therefore, we only anticipate either unknown iterables or values here.
"""
# TODO: The ordering if these methods is *very* important. Is this OK?
encoding_methods = (
self.encode_as_plotly,
self.encode_as_sage,
self.encode_as_numpy,
self.encode_as_pandas,
self.encode_as_datetime,
self.encode_as_date,
self.encode_as_list, # because some values have `tolist` do last.
self.encode_as_decimal,
self.encode_as_pil,
)
for encoding_method in encoding_methods:
try:
return encoding_method(obj)
except NotEncodable:
pass
return _json.JSONEncoder.default(self, obj)
@staticmethod
def encode_as_plotly(obj):
"""Attempt to use a builtin `to_plotly_json` method."""
try:
return obj.to_plotly_json()
except AttributeError:
raise NotEncodable
@staticmethod
def encode_as_list(obj):
"""Attempt to use `tolist` method to convert to normal Python list."""
if hasattr(obj, "tolist"):
return obj.tolist()
else:
raise NotEncodable
@staticmethod
def encode_as_sage(obj):
"""Attempt to convert sage.all.RR to floats and sage.all.ZZ to ints"""
sage_all = get_module("sage.all")
if not sage_all:
raise NotEncodable
if obj in sage_all.RR:
return float(obj)
elif obj in sage_all.ZZ:
return int(obj)
else:
raise NotEncodable
@staticmethod
def encode_as_pandas(obj):
"""Attempt to convert pandas.NaT / pandas.NA"""
pandas = get_module("pandas", should_load=False)
if not pandas:
raise NotEncodable
if obj is pandas.NaT:
return None
# pandas.NA was introduced in pandas 1.0
if hasattr(pandas, "NA") and obj is pandas.NA:
return None
raise NotEncodable
@staticmethod
def encode_as_numpy(obj):
"""Attempt to convert numpy.ma.core.masked"""
numpy = get_module("numpy", should_load=False)
if not numpy:
raise NotEncodable
if obj is numpy.ma.core.masked:
return float("nan")
elif isinstance(obj, numpy.ndarray) and obj.dtype.kind == "M":
try:
return numpy.datetime_as_string(obj).tolist()
except TypeError:
pass
raise NotEncodable
@staticmethod
def encode_as_datetime(obj):
"""Convert datetime objects to iso-format strings"""
try:
return obj.isoformat()
except AttributeError:
raise NotEncodable
@staticmethod
def encode_as_date(obj):
"""Attempt to convert to utc-iso time string using date methods."""
try:
time_string = obj.isoformat()
except AttributeError:
raise NotEncodable
else:
return iso_to_plotly_time_string(time_string)
@staticmethod
def encode_as_decimal(obj):
"""Attempt to encode decimal by converting it to float"""
if isinstance(obj, decimal.Decimal):
return float(obj)
else:
raise NotEncodable
@staticmethod
def encode_as_pil(obj):
"""Attempt to convert PIL.Image.Image to base64 data uri"""
image = get_module("PIL.Image")
if image is not None and isinstance(obj, image.Image):
return ImageUriValidator.pil_image_to_uri(obj)
else:
raise NotEncodable
class NotEncodable(Exception):
pass
def iso_to_plotly_time_string(iso_string):
"""Remove timezone info and replace 'T' delimeter with ' ' (ws)."""
# make sure we don't send timezone info to plotly
if (iso_string.split("-")[:3] == "00:00") or (iso_string.split("+")[0] == "00:00"):
raise Exception(
"Plotly won't accept timestrings with timezone info.\n"
"All timestrings are assumed to be in UTC."
)
iso_string = iso_string.replace("-00:00", "").replace("+00:00", "")
if iso_string.endswith("T00:00:00"):
return iso_string.replace("T00:00:00", "")
else:
return iso_string.replace("T", " ")
def template_doc(**names):
def _decorator(func):
if not sys.version_info[:2] == (3, 2):
if func.__doc__ is not None:
func.__doc__ = func.__doc__.format(**names)
return func
return _decorator
def _natural_sort_strings(vals, reverse=False):
def key(v):
v_parts = re.split(r"(\d+)", v)
for i in range(len(v_parts)):
try:
v_parts[i] = int(v_parts[i])
except ValueError:
# not an int
pass
return tuple(v_parts)
return sorted(vals, key=key, reverse=reverse)
def _get_int_type():
np = get_module("numpy", should_load=False)
if np:
int_type = (int, np.integer)
else:
int_type = (int,)
return int_type
def split_multichar(ss, chars):
"""
Split all the strings in ss at any of the characters in chars.
Example:
>>> ss = ["a.string[0].with_separators"]
>>> chars = list(".[]_")
>>> split_multichar(ss, chars)
['a', 'string', '0', '', 'with', 'separators']
:param (list) ss: A list of strings.
:param (list) chars: Is a list of chars (note: not a string).
"""
if len(chars) == 0:
return ss
c = chars.pop()
ss = reduce(lambda x, y: x + y, map(lambda x: x.split(c), ss))
return split_multichar(ss, chars)
def split_string_positions(ss):
"""
Given a list of strings split using split_multichar, return a list of
integers representing the indices of the first character of every string in
the original string.
Example:
>>> ss = ["a.string[0].with_separators"]
>>> chars = list(".[]_")
>>> ss_split = split_multichar(ss, chars)
>>> ss_split
['a', 'string', '0', '', 'with', 'separators']
>>> split_string_positions(ss_split)
[0, 2, 9, 11, 12, 17]
:param (list) ss: A list of strings.
"""
return list(
map(
lambda t: t[0] + t[1],
zip(range(len(ss)), cumsum([0] + list(map(len, ss[:-1])))),
)
)
def display_string_positions(p, i=None, offset=0, length=1, char="^", trim=True):
"""
Return a string that is whitespace except at p[i] which is replaced with char.
If i is None then all the indices of the string in p are replaced with char.
Example:
>>> ss = ["a.string[0].with_separators"]
>>> chars = list(".[]_")
>>> ss_split = split_multichar(ss, chars)
>>> ss_split
['a', 'string', '0', '', 'with', 'separators']
>>> ss_pos = split_string_positions(ss_split)
>>> ss[0]
'a.string[0].with_separators'
>>> display_string_positions(ss_pos,4)
' ^'
>>> display_string_positions(ss_pos,4,offset=1,length=3,char="~",trim=False)
' ~~~ '
>>> display_string_positions(ss_pos)
'^ ^ ^ ^^ ^'
:param (list) p: A list of integers.
:param (integer|None) i: Optional index of p to display.
:param (integer) offset: Allows adding a number of spaces to the replacement.
:param (integer) length: Allows adding a replacement that is the char
repeated length times.
:param (str) char: allows customizing the replacement character.
:param (boolean) trim: trims the remaining whitespace if True.
"""
s = [" " for _ in range(max(p) + 1 + offset + length)]
maxaddr = 0
if i is None:
for p_ in p:
for l in range(length):
maxaddr = p_ + offset + l
s[maxaddr] = char
else:
for l in range(length):
maxaddr = p[i] + offset + l
s[maxaddr] = char
ret = "".join(s)
if trim:
ret = ret[: maxaddr + 1]
return ret
def chomp_empty_strings(strings, c, reverse=False):
"""
Given a list of strings, some of which are the empty string "", replace the
empty strings with c and combine them with the closest non-empty string on
the left or "" if it is the first string.
Examples:
for c="_"
['hey', '', 'why', '', '', 'whoa', '', ''] -> ['hey_', 'why__', 'whoa__']
['', 'hi', '', "I'm", 'bob', '', ''] -> ['_', 'hi_', "I'm", 'bob__']
['hi', "i'm", 'a', 'good', 'string'] -> ['hi', "i'm", 'a', 'good', 'string']
Some special cases are:
[] -> []
[''] -> ['']
['', ''] -> ['_']
['', '', '', ''] -> ['___']
If reverse is true, empty strings are combined with closest non-empty string
on the right or "" if it is the last string.
"""
def _rev(l):
return [s[::-1] for s in l][::-1]
if reverse:
return _rev(chomp_empty_strings(_rev(strings), c))
if not len(strings):
return strings
if sum(map(len, strings)) == 0:
return [c * (len(strings) - 1)]
class _Chomper:
def __init__(self, c):
self.c = c
def __call__(self, x, y):
# x is list up to now
# y is next item in list
# x should be [""] initially, and then empty strings filtered out at the
# end
if len(y) == 0:
return x[:-1] + [x[-1] + self.c]
else:
return x + [y]
return list(filter(len, reduce(_Chomper(c), strings, [""])))
# taken from
# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python
def levenshtein(s1, s2):
if len(s1) < len(s2):
return levenshtein(s2, s1) # len(s1) >= len(s2)
if len(s2) == 0:
return len(s1)
previous_row = range(len(s2) + 1)
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
# j+1 instead of j since previous_row and current_row are one character longer
# than s2
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]
def find_closest_string(string, strings):
def _key(s):
# sort by levenshtein distance and lexographically to maintain a stable
# sort for different keys with the same levenshtein distance
return (levenshtein(s, string), s)
return sorted(strings, key=_key)[0]