diff --git a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.py b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.py deleted file mode 100644 index e76c27145dbb..000000000000 --- a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.py +++ /dev/null @@ -1,16 +0,0 @@ -#Incorrect unhashable class -class MyMutableThing(object): - - def __init__(self): - pass - - def __hash__(self): - raise NotImplementedError("%r is unhashable" % self) - -#Make class unhashable in the standard way -class MyCorrectMutableThing(object): - - def __init__(self): - pass - - __hash__ = None diff --git a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.qhelp b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.qhelp index f4f0cd6920ab..42d7d421b0a6 100644 --- a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.qhelp +++ b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.qhelp @@ -9,7 +9,7 @@ When the expression a + b is evaluated the Python virtual machine w is not implemented it will call type(b).__radd__(b, a).

Since the virtual machine calls these special methods for common expressions, users of the class will expect these operations to raise standard exceptions. -For example, users would expect that the expression a.b might raise an AttributeError +For example, users would expect that the expression a.b may raise an AttributeError if the object a does not have an attribute b. If a KeyError were raised instead, then this would be unexpected and may break code that expected an AttributeError, but not a KeyError. @@ -20,18 +20,18 @@ Therefore, if a method is unable to perform the expected operation then its resp

-

If the method is meant to be abstract, then declare it so using the @abstractmethod decorator. +

If the method is intended to be abstract, and thus always raise an exception, then declare it so using the @abstractmethod decorator. Otherwise, either remove the method or ensure that the method raises an exception of the correct type.

@@ -39,31 +39,29 @@ Otherwise, either remove the method or ensure that the method raises an exceptio

-This example shows two unhashable classes. The first class is unhashable in a non-standard way which may cause maintenance problems. -The second, corrected, class uses the standard idiom for unhashable classes. +In the following example, the __add__ method of A raises a TypeError if other is of the wrong type. +However, it should return NotImplemented instead of rising an exception, to allow other classes to support adding to A. +This is demonstrated in the class B.

- +

-In this example, the first class is implicitly abstract; the __add__ method is unimplemented, -presumably with the expectation that it will be implemented by sub-classes. -The second class makes this explicit with an @abstractmethod decoration on the unimplemented __add__ method. +In the following example, the __getitem__ method of C raises a ValueError, rather than a KeyError or IndexError as expected.

- +

-In this last example, the first class implements a collection backed by the file store. -However, should an IOError be raised in the __getitem__ it will propagate to the caller. -The second class handles any IOError by reraising a KeyError which is the standard exception for -the __getitem__ method. +In the following example, the class __hash__ method of D raises NotImplementedError. +This causes D to be incorrectly identified as hashable by isinstance(obj, collections.abc.Hashable); so the correct +way to make a class unhashable is to set __hash__ = None.

- +
  • Python Language Reference: Special Method Names.
  • -
  • Python Library Reference: Exceptions.
  • +
  • Python Library Reference: Exceptions.
  • diff --git a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.ql b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.ql index 4bf52af9061f..3cd7e0fe9871 100644 --- a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.ql +++ b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod.ql @@ -7,114 +7,188 @@ * error-handling * @problem.severity recommendation * @sub-severity high - * @precision very-high + * @precision high * @id py/unexpected-raise-in-special-method */ import python +import semmle.python.ApiGraphs +import semmle.python.dataflow.new.internal.DataFlowDispatch -private predicate attribute_method(string name) { - name = "__getattribute__" or name = "__getattr__" or name = "__setattr__" +/** Holds if `name` is the name of a special method for attribute access such as `a.b`, that should raise an `AttributeError`. */ +private predicate attributeMethod(string name) { + name = ["__getattribute__", "__getattr__", "__delattr__"] // __setattr__ excluded as it makes sense to raise different kinds of errors based on the `value` parameter } -private predicate indexing_method(string name) { - name = "__getitem__" or name = "__setitem__" or name = "__delitem__" +/** Holds if `name` is the name of a special method for indexing operations such as `a[b]`, that should raise a `LookupError`. */ +private predicate indexingMethod(string name) { + name = ["__getitem__", "__delitem__"] // __setitem__ excluded as it makes sense to raise different kinds of errors based on the `value` parameter } -private predicate arithmetic_method(string name) { - name in [ - "__add__", "__sub__", "__or__", "__xor__", "__rshift__", "__pow__", "__mul__", "__neg__", - "__radd__", "__rsub__", "__rdiv__", "__rfloordiv__", "__div__", "__rdiv__", "__rlshift__", - "__rand__", "__ror__", "__rxor__", "__rrshift__", "__rpow__", "__rmul__", "__truediv__", - "__rtruediv__", "__pos__", "__iadd__", "__isub__", "__idiv__", "__ifloordiv__", "__idiv__", - "__ilshift__", "__iand__", "__ior__", "__ixor__", "__irshift__", "__abs__", "__ipow__", - "__imul__", "__itruediv__", "__floordiv__", "__div__", "__divmod__", "__lshift__", "__and__" +/** Holds if `name` is the name of a special method for arithmetic operations. */ +private predicate arithmeticMethod(string name) { + name = + [ + "__add__", "__sub__", "__and__", "__or__", "__xor__", "__lshift__", "__rshift__", "__pow__", + "__mul__", "__div__", "__divmod__", "__truediv__", "__floordiv__", "__matmul__", "__radd__", + "__rsub__", "__rand__", "__ror__", "__rxor__", "__rlshift__", "__rrshift__", "__rpow__", + "__rmul__", "__rdiv__", "__rdivmod__", "__rtruediv__", "__rfloordiv__", "__rmatmul__", + "__iadd__", "__isub__", "__iand__", "__ior__", "__ixor__", "__ilshift__", "__irshift__", + "__ipow__", "__imul__", "__idiv__", "__idivmod__", "__itruediv__", "__ifloordiv__", + "__imatmul__", "__pos__", "__neg__", "__abs__", "__invert__", ] } -private predicate ordering_method(string name) { - name = "__lt__" - or - name = "__le__" - or - name = "__gt__" - or - name = "__ge__" - or - name = "__cmp__" and major_version() = 2 +/** Holds if `name is the name of a special method for ordering operations such as `a < b`. */ +private predicate orderingMethod(string name) { + name = + [ + "__lt__", + "__le__", + "__gt__", + "__ge__", + ] } -private predicate cast_method(string name) { - name = "__nonzero__" and major_version() = 2 - or - name = "__int__" - or - name = "__float__" - or - name = "__long__" - or - name = "__trunc__" - or - name = "__complex__" +/** Holds if `name` is the name of a special method for casting an object to a numeric type, such as `int(x)` */ +private predicate castMethod(string name) { + name = + [ + "__int__", + "__float__", + "__index__", + "__trunc__", + "__complex__" + ] // __bool__ excluded as it makes sense to allow it to always raise } -predicate correct_raise(string name, ClassObject ex) { - ex.getAnImproperSuperType() = theTypeErrorType() and +/** Holds if we allow a special method named `name` to raise `exec` as an exception. */ +predicate correctRaise(string name, Expr exec) { + execIsOfType(exec, "TypeError") and ( - name = "__copy__" or - name = "__deepcopy__" or - name = "__call__" or - indexing_method(name) or - attribute_method(name) + indexingMethod(name) or + attributeMethod(name) or + // Allow add methods to raise a TypeError, as they can be used for sequence concatenation as well as arithmetic + name = ["__add__", "__iadd__", "__radd__"] ) or - preferred_raise(name, ex) - or - preferred_raise(name, ex.getASuperType()) + exists(string execName | + preferredRaise(name, execName, _) and + execIsOfType(exec, execName) + ) } -predicate preferred_raise(string name, ClassObject ex) { - attribute_method(name) and ex = theAttributeErrorType() - or - indexing_method(name) and ex = Object::builtin("LookupError") - or - ordering_method(name) and ex = theTypeErrorType() - or - arithmetic_method(name) and ex = Object::builtin("ArithmeticError") - or - name = "__bool__" and ex = theTypeErrorType() +/** Holds if it is preferred for `name` to raise exceptions of type `execName`. `message` is the alert message. */ +predicate preferredRaise(string name, string execName, string message) { + attributeMethod(name) and + execName = "AttributeError" and + message = "should raise an AttributeError instead." + or + indexingMethod(name) and + execName = "LookupError" and + message = "should raise a LookupError (KeyError or IndexError) instead." + or + orderingMethod(name) and + execName = "TypeError" and + message = "should raise a TypeError or return NotImplemented instead." + or + arithmeticMethod(name) and + execName = "ArithmeticError" and + message = "should raise an ArithmeticError or return NotImplemented instead." + or + name = "__bool__" and + execName = "TypeError" and + message = "should raise a TypeError instead." } -predicate no_need_to_raise(string name, string message) { - name = "__hash__" and message = "use __hash__ = None instead" - or - cast_method(name) and message = "there is no need to implement the method at all." +/** Holds if `exec` is an exception object of the type named `execName`. */ +predicate execIsOfType(Expr exec, string execName) { + // Might make sense to have execName be an IPA type here. Or part of a more general API modeling builtin/stdlib subclass relations. + exists(string subclass | + execName = "TypeError" and + subclass = "TypeError" + or + execName = "LookupError" and + subclass = ["LookupError", "KeyError", "IndexError"] + or + execName = "ArithmeticError" and + subclass = ["ArithmeticError", "FloatingPointError", "OverflowError", "ZeroDivisionError"] + or + execName = "AttributeError" and + subclass = "AttributeError" + | + exec = API::builtin(subclass).getACall().asExpr() + or + exec = API::builtin(subclass).getASubclass().getACall().asExpr() + ) +} + +/** + * Holds if `meth` need not be implemented if it always raises. `message` is the alert message, and `allowNotImplemented` is true + * if we still allow the method to always raise `NotImplementedError`. + */ +predicate noNeedToAlwaysRaise(Function meth, string message, boolean allowNotImplemented) { + meth.getName() = "__hash__" and + message = "use __hash__ = None instead." and + allowNotImplemented = false + or + castMethod(meth.getName()) and + message = "this method does not need to be implemented." and + allowNotImplemented = true and + // Allow an always raising cast method if it's overriding other behavior + not exists(Function overridden | + overridden.getName() = meth.getName() and + overridden.getScope() = getADirectSuperclass+(meth.getScope()) and + not alwaysRaises(overridden, _) + ) +} + +/** Holds if `func` has a decorator likely marking it as an abstract method. */ +predicate isAbstract(Function func) { func.getADecorator().(Name).getId().matches("%abstract%") } + +/** Holds if `f` always raises the exception `exec`. */ +predicate alwaysRaises(Function f, Expr exec) { + directlyRaises(f, exec) and + strictcount(Expr e | directlyRaises(f, e)) = 1 and + not exists(f.getANormalExit()) } -predicate is_abstract(FunctionObject func) { - func.getFunction().getADecorator().(Name).getId().matches("%abstract%") +/** Holds if `f` directly raises `exec` using a `raise` statement. */ +predicate directlyRaises(Function f, Expr exec) { + exists(Raise r | + r.getScope() = f and + exec = r.getException() and + exec instanceof Call + ) } -predicate always_raises(FunctionObject f, ClassObject ex) { - ex = f.getARaisedType() and - strictcount(f.getARaisedType()) = 1 and - not exists(f.getFunction().getANormalExit()) and - /* raising StopIteration is equivalent to a return in a generator */ - not ex = theStopIterationType() +/** Holds if `exec` is a `NotImplementedError`. */ +predicate isNotImplementedError(Expr exec) { + exec = API::builtin("NotImplementedError").getACall().asExpr() } -from FunctionObject f, ClassObject cls, string message +/** Gets the name of the builtin exception type `exec` constructs, if it can be determined. */ +string getExecName(Expr exec) { result = exec.(Call).getFunc().(Name).getId() } + +from Function f, Expr exec, string message where - f.getFunction().isSpecialMethod() and - not is_abstract(f) and - always_raises(f, cls) and + f.isSpecialMethod() and + not isAbstract(f) and + directlyRaises(f, exec) and ( - no_need_to_raise(f.getName(), message) and not cls.getName() = "NotImplementedError" + exists(boolean allowNotImplemented, string subMessage | + alwaysRaises(f, exec) and + noNeedToAlwaysRaise(f, subMessage, allowNotImplemented) and + (allowNotImplemented = true implies not isNotImplementedError(exec)) and // don't alert if it's a NotImplementedError and that's ok + message = "This method always raises $@ - " + subMessage + ) or - not correct_raise(f.getName(), cls) and - not cls.getName() = "NotImplementedError" and - exists(ClassObject preferred | preferred_raise(f.getName(), preferred) | - message = "raise " + preferred.getName() + " instead" + not isNotImplementedError(exec) and + not correctRaise(f.getName(), exec) and + exists(string subMessage | preferredRaise(f.getName(), _, subMessage) | + if alwaysRaises(f, exec) + then message = "This method always raises $@ - " + subMessage + else message = "This method raises $@ - " + subMessage ) ) -select f, "Function always raises $@; " + message, cls, cls.toString() +select f, message, exec, getExecName(exec) diff --git a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod2.py b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod2.py deleted file mode 100644 index 405400bfe614..000000000000 --- a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod2.py +++ /dev/null @@ -1,15 +0,0 @@ - -#Abstract base class, but don't declare it. -class ImplicitAbstractClass(object): - - def __add__(self, other): - raise NotImplementedError() - -#Make abstractness explicit. -class ExplicitAbstractClass: - __metaclass__ = ABCMeta - - @abstractmethod - def __add__(self, other): - raise NotImplementedError() - diff --git a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod3.py b/python/ql/src/Functions/IncorrectRaiseInSpecialMethod3.py deleted file mode 100644 index 048d5043b4dc..000000000000 --- a/python/ql/src/Functions/IncorrectRaiseInSpecialMethod3.py +++ /dev/null @@ -1,27 +0,0 @@ - -#Incorrect file-backed table -class FileBackedTable(object): - - def __getitem__(self, key): - if key not in self.index: - raise IOError("Key '%s' not in table" % key) - else: - #May raise an IOError - return self.backing.get_row(key) - -#Correct by transforming exception -class ObjectLikeFileBackedTable(object): - - def get_from_key(self, key): - if key not in self.index: - raise IOError("Key '%s' not in table" % key) - else: - #May raise an IOError - return self.backing.get_row(key) - - def __getitem__(self, key): - try: - return self.get_from_key(key) - except IOError: - raise KeyError(key) - diff --git a/python/ql/src/Functions/examples/IncorrectRaiseInSpecialMethod.py b/python/ql/src/Functions/examples/IncorrectRaiseInSpecialMethod.py new file mode 100644 index 000000000000..d565a86cab27 --- /dev/null +++ b/python/ql/src/Functions/examples/IncorrectRaiseInSpecialMethod.py @@ -0,0 +1,22 @@ +class A: + def __init__(self, a): + self.a = a + + def __add__(self, other): + # BAD: Should return NotImplemented instead of raising + if not isinstance(other,A): + raise TypeError(f"Cannot add A to {other.__class__}") + return A(self.a + other.a) + +class B: + def __init__(self, a): + self.a = a + + def __add__(self, other): + # GOOD: Returning NotImplemented allows for the operation to fallback to other implementations to allow other classes to support adding to B. + if not isinstance(other,B): + return NotImplemented + return B(self.a + other.a) + + + diff --git a/python/ql/src/Functions/examples/IncorrectRaiseInSpecialMethod2.py b/python/ql/src/Functions/examples/IncorrectRaiseInSpecialMethod2.py new file mode 100644 index 000000000000..ba5f90f46708 --- /dev/null +++ b/python/ql/src/Functions/examples/IncorrectRaiseInSpecialMethod2.py @@ -0,0 +1,7 @@ +class C: + def __getitem__(self, idx): + if self.idx < 0: + # BAD: Should raise a KeyError or IndexError instead. + raise ValueError("Invalid index") + return self.lookup(idx) + diff --git a/python/ql/src/Functions/examples/IncorrectRaiseInSpecialMethod3.py b/python/ql/src/Functions/examples/IncorrectRaiseInSpecialMethod3.py new file mode 100644 index 000000000000..33541adc7e64 --- /dev/null +++ b/python/ql/src/Functions/examples/IncorrectRaiseInSpecialMethod3.py @@ -0,0 +1,4 @@ +class D: + def __hash__(self): + # BAD: Use `__hash__ = None` instead. + raise NotImplementedError(f"{self.__class__} is unhashable.") \ No newline at end of file diff --git a/python/ql/src/change-notes/2025-07-25-unexpected-raise-special-method.md b/python/ql/src/change-notes/2025-07-25-unexpected-raise-special-method.md new file mode 100644 index 000000000000..4b79dbc3b81e --- /dev/null +++ b/python/ql/src/change-notes/2025-07-25-unexpected-raise-special-method.md @@ -0,0 +1,5 @@ +--- +category: minorAnalysis +--- +* The `py/unexpected-raise-in-special-method` query has been modernized. It produces additional results in cases where the exception is +only raised conditionally. Its precision has been changed from `very-high` to `high`. \ No newline at end of file diff --git a/python/ql/test/query-tests/Functions/IncorrectRaiseInSpecialMethod/IncorrectRaiseInSpecialMethod.expected b/python/ql/test/query-tests/Functions/IncorrectRaiseInSpecialMethod/IncorrectRaiseInSpecialMethod.expected new file mode 100644 index 000000000000..3907a725ee18 --- /dev/null +++ b/python/ql/test/query-tests/Functions/IncorrectRaiseInSpecialMethod/IncorrectRaiseInSpecialMethod.expected @@ -0,0 +1,6 @@ +| test.py:6:5:6:33 | Function __getitem__ | This method always raises $@ - should raise a LookupError (KeyError or IndexError) instead. | test.py:7:15:7:33 | ZeroDivisionError() | ZeroDivisionError | +| test.py:9:5:9:32 | Function __getattr__ | This method always raises $@ - should raise an AttributeError instead. | test.py:10:15:10:33 | ZeroDivisionError() | ZeroDivisionError | +| test.py:12:5:12:23 | Function __bool__ | This method always raises $@ - should raise a TypeError instead. | test.py:13:15:13:26 | ValueError() | ValueError | +| test.py:15:5:15:22 | Function __int__ | This method always raises $@ - this method does not need to be implemented. | test.py:16:15:16:26 | ValueError() | ValueError | +| test.py:24:5:24:23 | Function __hash__ | This method always raises $@ - use __hash__ = None instead. | test.py:25:15:25:35 | NotImplementedError() | NotImplementedError | +| test.py:28:5:28:29 | Function __sub__ | This method raises $@ - should raise an ArithmeticError or return NotImplemented instead. | test.py:30:19:30:29 | TypeError() | TypeError | diff --git a/python/ql/test/query-tests/Functions/IncorrectRaiseInSpecialMethod/IncorrectRaiseInSpecialMethod.qlref b/python/ql/test/query-tests/Functions/IncorrectRaiseInSpecialMethod/IncorrectRaiseInSpecialMethod.qlref new file mode 100644 index 000000000000..a81e499ea66b --- /dev/null +++ b/python/ql/test/query-tests/Functions/IncorrectRaiseInSpecialMethod/IncorrectRaiseInSpecialMethod.qlref @@ -0,0 +1,2 @@ +query: Functions/IncorrectRaiseInSpecialMethod.ql +postprocess: utils/test/InlineExpectationsTestQuery.ql \ No newline at end of file diff --git a/python/ql/test/query-tests/Functions/IncorrectRaiseInSpecialMethod/test.py b/python/ql/test/query-tests/Functions/IncorrectRaiseInSpecialMethod/test.py new file mode 100644 index 000000000000..d5b1bc585f62 --- /dev/null +++ b/python/ql/test/query-tests/Functions/IncorrectRaiseInSpecialMethod/test.py @@ -0,0 +1,66 @@ +class A: + + def __add__(self, other): # No alert - Always allow NotImplementedError + raise NotImplementedError() + + def __getitem__(self, index): # $ Alert + raise ZeroDivisionError() + + def __getattr__(self, name): # $ Alert + raise ZeroDivisionError() + + def __bool__(self): # $ Alert + raise ValueError() + + def __int__(self): # $ Alert # Cast method need not be defined to always raise + raise ValueError() + + def __float__(self): # No alert - OK to raise conditionally + if self.z: + return 0 + else: + raise ValueError() + + def __hash__(self): # $ Alert # should use __hash__=None rather than stub implementation to make class unhashable + raise NotImplementedError() + +class B: + def __sub__(self, other): # $ Alert # should return NotImplemented instead + if not isinstance(other,B): + raise TypeError() + return self + + def __add__(self, other): # No alert - allow add to raise a TypeError, as it is sometimes used for sequence concatenation as well as arithmetic + if not isinstance(other,B): + raise TypeError() + return self + + def __setitem__(self, key, val): # No alert - allow setitem to raise arbitrary exceptions as they could be due to the value, rather than a LookupError relating to the key + if val < 0: + raise ValueError() + + def __getitem__(self, key): # No alert - indexing method allowed to raise TypeError or subclasses of LookupError. + if not isinstance(key, int): + raise TypeError() + if key < 0: + raise KeyError() + return 3 + + def __getattribute__(self, name): + if name != "a": + raise AttributeError() + return 2 + + def __div__(self, other): + if other == 0: + raise ZeroDivisionError() + return self + + +class D: + def __int__(self): + return 2 + +class E(D): + def __int__(self): # No alert - cast method may override to raise exception + raise TypeError() \ No newline at end of file diff --git a/python/ql/test/query-tests/Functions/general/IncorrectRaiseInSpecialMethod.expected b/python/ql/test/query-tests/Functions/general/IncorrectRaiseInSpecialMethod.expected deleted file mode 100644 index dd4429de02e9..000000000000 --- a/python/ql/test/query-tests/Functions/general/IncorrectRaiseInSpecialMethod.expected +++ /dev/null @@ -1,3 +0,0 @@ -| protocols.py:98:5:98:33 | Function __getitem__ | Function always raises $@; raise LookupError instead | file://:Compiled Code:0:0:0:0 | builtin-class ZeroDivisionError | builtin-class ZeroDivisionError | -| protocols.py:101:5:101:26 | Function __getattr__ | Function always raises $@; raise AttributeError instead | file://:Compiled Code:0:0:0:0 | builtin-class ZeroDivisionError | builtin-class ZeroDivisionError | -| protocols.py:104:5:104:23 | Function __bool__ | Function always raises $@; raise TypeError instead | file://:Compiled Code:0:0:0:0 | builtin-class ZeroDivisionError | builtin-class ZeroDivisionError | diff --git a/python/ql/test/query-tests/Functions/general/IncorrectRaiseInSpecialMethod.qlref b/python/ql/test/query-tests/Functions/general/IncorrectRaiseInSpecialMethod.qlref deleted file mode 100644 index 07fd22a93767..000000000000 --- a/python/ql/test/query-tests/Functions/general/IncorrectRaiseInSpecialMethod.qlref +++ /dev/null @@ -1 +0,0 @@ -Functions/IncorrectRaiseInSpecialMethod.ql \ No newline at end of file