'Python type hints on mapping to mutable Set
I have written a mapping to a mutable Set in Python. I've used it as a datastructure dealing with file hashes and map the hashes to duplicate files.
I want to apply type hints and have read a lot about Generics and that stuff. Finally, I am not able to solve it well and I ask help from the community:
How does one type annotate the code below correctly?
I know, I could inherit from collections.abc thereby taking advantage of some type hints. However, in the mixin of Mapping and MutableSet I would end up in ducktyped methods such as __iter__ that are difficut to trace (at least for me). Therefore I chose to write all necessary methods by hand.
I want to supply all possible types, so kind of
K = typing.TypeVar('K', bound = typing.Hashable)
should be involved. This is necessary for keys in a dict and members of a set. The code below just deals with Integers.
Thank you very much for your comments!
class MtoS():
def __init__(self, x = None):
self._hashlu= {}
if isinstance(x, list):
for t in x:
self.add(t)
elif x:
self.add(x)
def __ior__(self, other):
for i in other:
self.add(i)
return self
def __len__(self):
return sum([len(x) for x in self._hashlu.values()])
def add(self, item):
if item[0] not in self._hashlu:
self._hashlu[item[0]] = set()
self._hashlu[item[0]].add(item[1])
def __iter__(self):
for hash in self._hashlu.keys():
for file in sorted(self._hashlu[hash]):
yield hash, file
def __str__(self):
return(str(self._hashlu))
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, list(self))
def __getitem__(self, key):
return self._hashlu[key]
if __name__ == "__main__":
m = MtoS([(1,2),(3,4)])
o = MtoS([(3,4),(3,5),(4,5),(4,6)])
print(m) # {1: {2}, 3: {4}}
print(m[1]) # {2}
m |= o
print(m) # {1: {2}, 3: {4, 5}, 4: {5, 6}}
print(len(m)) # 5
Solution 1:[1]
Generally, a data structure that can hold many types of items should use typing.Generic to let its users specify exactly which types it contains. If there are limits on the types that are valid (such as requiring them to be hashable), you can use bound types and typing.Protocols to specify the requirements.
So I'd annotate your code like this:
import typing
K = typing.TypeVar('K', bound=typing.Hashable) # typevar for keys
V = typing.TypeVar('V', bound="HashableAndSortable") # typevar for values
class HashableAndSortable(typing.Protocol, typing.Hashable):
def __lt__(self: V, other:V) -> bool: ...
KV = tuple[K, V] # alias for (key, value)
class MtoS(typing.Generic[K, V]):
_hashlu: dict[K, set[V]]
def __init__(self, x:typing.Optional[KV]|list[KV]=None) -> None: ...
def __ior__(self, other: "MtoS[K, V]") -> "MtoS[K, V]": ...
def __len__(self) -> int: ...
def add(self, item: KV) -> None: ...
def __iter__(self) -> typing.Iterator[KV]: ...
def __str__(self) -> str: ...
def __repr__(self) -> str: ...
def __getitem__(self, key: K) -> set[V]: ...
The hints for __ior__ are pretty ugly because we need to use strings for forward references. A nicer approach will be possible in Python 3.11, which implements PEP 673, allowing typing.Self to be used to annotate a type that's the same as the type of the self argument.
Solution 2:[2]
Finally, thanks to Blckknght, I've come up with this specialized solution:
import typing as tp
import pathlib as pl
K = tp.TypeVar('K', bound = tp.Hashable)
V = tp.TypeVar('V', bound = pl.PurePath)
class MtoS(tp.Generic[K,V]):
_hashlu: dict[K, set[V]]
def __init__(self, x: tp.Union[None, tuple[K,V], list[tuple[K,V]]] = None) -> None:
self._hashlu= {}
if isinstance(x, list):
for t in x:
self.add(t)
elif x:
self.add(x)
def __ior__(self, other: "MtoS[K,V]") -> "MtoS[K,V]":
for i in other:
self.add(i)
return self
def __len__(self) -> int:
return sum([len(x) for x in self._hashlu.values()])
def add(self, item: tuple[K,V]) -> None:
if item[0] not in self._hashlu:
self._hashlu[item[0]] = set()
self._hashlu[item[0]].add(item[1])
def __iter__(self) -> tp.Iterator[tuple[K,V]]:
for hash in self._hashlu.keys():
for file in sorted(self._hashlu[hash]):
yield hash, file
def __str__(self) -> str:
return(str(self._hashlu))
def __repr__(self) -> str:
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, list(self))
def __getitem__(self, key: K) -> set[V]:
return self._hashlu[key]
if __name__ == "__main__":
m = MtoS([(1,pl.Path.home() / "dummy2"),(3,pl.Path.home() / "dummy4")])
o = MtoS([
(3,pl.Path.home() / "dummy4"),
(3,pl.Path.home() / "dummy5"),
(4,pl.Path.home() / "dummy5"),
(4,pl.Path.home() / "dummy6")])
print(m)
print(m[1])
m |= o
print(m)
print(len(m))
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Blckknght |
| Solution 2 | de_chris |
