Python-Aware Python

Ned Batchelder

@nedbat

Python code understanding Python code

What will happen here

What do we mean by code?

Kinds of code

Source code

  .. compiled ..

Bytecode in .pyc files

  .. interpreted ..

Function objects in memory

  .. executed ..

Action!

Source code

            class Greeter:
                def __init__(self, who):
                    self.who = who

                def greet(self):
                    print "Hello, %s!" % self.who

            greeter = Greeter("world")
            greeter.greet()
            

Abstract syntax tree

            Module(
              body=[
                ClassDef(
                  name='Greeter', bases=[], 
                  body=[
                    FunctionDef(
                      name='__init__', 
                      args=arguments(
                        args=[
                          Name(id='self', ctx=Param()), 
                          Name(id='who', ctx=Param())], 
                        vararg=None, kwarg=None, defaults=[]), 
                      body=[
                        Assign(
                          targets=[
                            Attribute(
                              value=Name(id='self', ctx=Load()), 
                              attr='who', ctx=Store()) ], 
                          value=Name(id='who', ctx=Load()))], 
            

Code object

            >>> def my_fun(x=12):
            ...     y = x * 3
            ...     return y
            ...
            >>> code_obj = my_fun.func_code
            >>> code_obj.co_name
            'my_fun'
            >>> code_obj.co_filename
            '<stdin>'
            >>> code_obj.co_argcount
            1
            >>> code_obj.co_varnames
            ('x', 'y')
            >>> code_obj.co_consts
            (None, 3)
            >>> code_obj.co_code
            '|\x00\x00d\x01\x00\x14}\x01\x00|\x01\x00S'
            

.pyc file

Function object

            >>> def my_fun(x=12):
            ...     y = x * 3
            ...     return y
            ...
            >>> my_fun.func_code
            <code object my_fun at 000000000217A918, file "<stdin>", line 1>
            >>> my_fun.func_name
            'my_fun'
            >>> my_fun.func_defaults
            (12,)
            >>> my_fun.func_defaults = (27,)
            >>> my_fun()
            81
            

Frame object

            Traceback (most recent call last):
              File "oops.py", line 10, in 
                i_hope_it_works(17)
              File "oops.py", line 8, in i_hope_it_works
                try_it(x)
              File "oops.py", line 5, in try_it
                try_again(x)
              File "oops.py", line 2, in try_again
                raise Exception("oops")
            Exception: oops
            

Let's build stuff

Trace function

Trace example: show1.py

            import sys

            def trace(frame, event, arg):
                filename = frame.f_code.co_filename
                lineno = frame.f_lineno
                print "%s @ %s: %s" % (filename, lineno, event)
                return trace

            def run_py(py_file):
                sys.settrace(trace)
                exec open(py_file) in {}
                sys.settrace(None)

            if __name__ == '__main__':
                run_py(sys.argv[1])
            

Output of show1.py

            greeter.py @ 1: call
            greeter.py @ 1: line
            greeter.py @ 1: call
            greeter.py @ 1: line
            greeter.py @ 2: line
            greeter.py @ 5: line
            greeter.py @ 5: return
            greeter.py @ 8: line
            greeter.py @ 2: call
            greeter.py @ 3: line
            greeter.py @ 3: return
            greeter.py @ 9: line
            greeter.py @ 5: call
            greeter.py @ 6: line
            Hello, world!
            greeter.py @ 6: return
            greeter.py @ 9: return
            

Show source: show2.py

            import linecache, sys

            class ShowAndTell(object):
                def __init__(self):
                    self.nest = 0

                def trace_fn(self, frame, event, arg):
                    filename = frame.f_code.co_filename
                    lineno = frame.f_lineno
                    indent = "  " * self.nest
                    info = "%s%s @ %s: %s" % (indent, filename, lineno, event)
                    src = linecache.getline(filename, lineno)
                    print "%-30s | %s" % (info, src[:-1])
                    if event == "call":
                        self.nest += 1
                    elif event == "return":
                        self.nest -= 1
                    return self.trace_fn
            greeter.py @ 1: call           | class Greeter:
              greeter.py @ 1: line         | class Greeter:
              greeter.py @ 1: call         | class Greeter:
                greeter.py @ 1: line       | class Greeter:
                greeter.py @ 2: line       |     def __init__(self, who):
                greeter.py @ 5: line       |     def greet(self):
                greeter.py @ 5: return     |     def greet(self):
              greeter.py @ 8: line         | greeter = Greeter("world")
              greeter.py @ 2: call         |     def __init__(self, who):
                greeter.py @ 3: line       |         self.who = who
                greeter.py @ 3: return     |         self.who = who
              greeter.py @ 9: line         | greeter.greet()
              greeter.py @ 5: call         |     def greet(self):
                greeter.py @ 6: line       |         print "Hello, %s!" % self.who
            Hello, world!
                greeter.py @ 6: return     |         print "Hello, %s!" % self.who
              greeter.py @ 9: return       | greeter.greet()
            
            class Greeter:
                def __init__(self, who):
                    self.who = who

                def greet(self):
                    print "Hello, %s!" % self.who

            greeter = Greeter("world")
            greeter.greet()
            

Trace function details

Finding expressions

Working with ASTs

            class MyAstVisitor(ast.NodeVisitor):
                def visit_Expr(self, node):
                    # .. do something with Expr node ..

                def visit_Assignment(self, node):
                    # .. do something with Assignment node ..

                # .. other visit_XXX as needed ..
            

Collecting safe expressions

Collecting safe expressions

            class ExpressionFinder(ast.NodeVisitor):

                def visit_Str(self, node):
                    return repr(node.s)

                def visit_Name(self, node):
                    self.add_expr(node, node.id)
                    return node.id

                def visit_BinOp(self, node):
                    left = self.visit(node.left)
                    right = self.visit(node.right)
                    if left and right:
                        expr = "(%s %s %s)" % (left, self.op(node.op), right)
                        self.add_expr(node, expr)
                        return expr

                # .. 10 more node types ..
            

Collecting safe expressions

            class ExpressionFinder(ast.NodeVisitor):
                # .. continued .. 

                def add_expr(self, node, expr):
                    """Add the expression `expr` found in node `node`."""
                    self.exprs[node.lineno].add(expr)

                def expressions(self, node):
                    """Return a dict mapping line numbers to expressions."""
                    self.exprs = collections.defaultdict(set)
                    self.visit(node)
                    return self.exprs


            def line_expressions(filename):
                source = open(filename).read()
                node = ast.parse(source, mode="exec")
                return ExpressionFinder().expressions(node)
            

Showing expressions: show3.py

            class ShowAndTell(object):
                #...

                def trace_fn(self, frame, event, arg):
                    
                    #...

                    vals = []
                    for expr in self.expressions(filename, lineno):
                        try:
                            val = eval(expr, frame.f_globals, frame.f_locals)
                            vals.append((expr, val))
                        except:
                            # Eval'ing can die, just move on.
                            pass
                    if vals:
                        print
                        print "\n".join(":: %s : %r" % ev for ev in vals)
                        print
            
            greeter.py @ 1: call           | class Greeter:
              greeter.py @ 1: line         | class Greeter:
              greeter.py @ 1: call         | class Greeter:
                greeter.py @ 1: line       | class Greeter:
                greeter.py @ 2: line       |     def __init__(self, who):
                greeter.py @ 5: line       |     def greet(self):
                greeter.py @ 5: return     |     def greet(self):
              greeter.py @ 8: line         | greeter = Greeter("world")

            :: Greeter : <class __builtin__.Greeter at 0x000000000200E780>

              greeter.py @ 2: call         |     def __init__(self, who):

            :: who : 'world'
            :: self : <__builtin__.Greeter instance at 0x000000000202B608>

                greeter.py @ 3: line       |         self.who = who

            :: who : 'world'
            :: self : <__builtin__.Greeter instance at 0x000000000202B608>

                greeter.py @ 3: return     |         self.who = who

            :: who : 'world'
            :: self : <__builtin__.Greeter instance at 0x000000000202B608>
            :: self.who : 'world'

              greeter.py @ 9: line         | greeter.greet()
            :: self.who : 'world'

              greeter.py @ 9: line         | greeter.greet()

            :: greeter : <__builtin__.Greeter instance at 0x000000000202B608>
            :: greeter.greet : <bound method Greeter.greet of <__builtin__.Greeter instance at 0x000000000202B608>>

              greeter.py @ 5: call         |     def greet(self):

            :: self : <__builtin__.Greeter instance at 0x000000000202B608>

                greeter.py @ 6: line       |         print "Hello, %s!" % self.who

            :: self : <__builtin__.Greeter instance at 0x000000000202B608>
            :: self.who : 'world'
            :: ('Hello, %s!' % self.who) : 'Hello, world!'

            Hello, world!
                greeter.py @ 6: return     |         print "Hello, %s!" % self.who

            :: self : <__builtin__.Greeter instance at 0x000000000202B608>
            :: self.who : 'world'
            :: ('Hello, %s!' % self.who) : 'Hello, world!'

              greeter.py @ 9: return       | greeter.greet()

            :: greeter : <__builtin__.Greeter instance at 0x000000000202B608>
            :: greeter.greet : <bound method Greeter.greet of <__builtin__.Greeter instance at 0x000000000202B608>>

            

Knowing more: inspect

Hiding less-interesting stuff

            def should_show_value(self, expr, val):
                if inspect.isroutine(val):
                    return False
                if inspect.isclass(val):
                    return False
                return True

            def trace_fn(self, frame, event, arg):
                #...
                vals = []
                for expr in self.expressions(filename, lineno):
                    try:
                        val = eval(expr, frame.f_globals, frame.f_locals)
                        if self.should_show_value(expr, val):
                            vals.append((expr, val))
                    except:
                        # Eval'ing can die, just move on.
                        pass
                if vals:
                    print
                    print "\n".join(":: %s : %r" % ev for ev in vals)
                    print

                #...
            
            greeter.py @ 1: call       | class Greeter:
            greeter.py @ 1: line       | class Greeter:
            greeter.py @ 1: call       | class Greeter:
            greeter.py @ 1: line       | class Greeter:
            greeter.py @ 2: line       |     def __init__(self, who):
            greeter.py @ 5: line       |     def greet(self):
            greeter.py @ 5: return     |     def greet(self):
            greeter.py @ 8: line       | greeter = Greeter("world")
            greeter.py @ 2: call       |     def __init__(self, who):

            :: who : 'world'
            :: self : <__builtin__.Greeter instance at 0x0000000002108F48>

            greeter.py @ 3: line       |         self.who = who

            :: who : 'world'
            :: self : <__builtin__.Greeter instance at 0x0000000002108F48>

            greeter.py @ 3: return     |         self.who = who

            :: who : 'world'
            :: self : <__builtin__.Greeter instance at 0x0000000002108F48>
            :: self.who : 'world'

            greeter.py @ 9: line       | greeter.greet()

            :: greeter : <__builtin__.Greeter instance at 0x0000000002108F48>

            greeter.py @ 5: call       |     def greet(self):

Odds & Ends

tokenize, token, symbol

            reader = open(filename, 'r').readline
            for typ, tok, (sr, sc), (er, ec), line in tokenize.generate_tokens(reader):
                #.. do something with each token ..
            

dis, opcode

            $ python -m dis greeter.py
            1           0 LOAD_CONST               0 ('Greeter')
                        3 LOAD_CONST               4 (())
                        6 LOAD_CONST               1 (<code object Greeter at 00000000023D34E0, file "greeter.py", line 1>)
                        9 MAKE_FUNCTION            0
                       12 CALL_FUNCTION            0
                       15 BUILD_CLASS
                       16 STORE_NAME               0 (Greeter)

            8          19 LOAD_NAME                0 (Greeter)
                       22 LOAD_CONST               2 ('world')
                       25 CALL_FUNCTION            1
                       28 STORE_NAME               1 (greeter)

            9          31 LOAD_NAME                1 (greeter)
                       34 LOAD_ATTR                2 (greet)
                       37 CALL_FUNCTION            0
                       40 POP_TOP
                       41 LOAD_CONST               3 (None)
                       44 RETURN_VALUE
            

Caution

Python is dynamic

Python is dynamic

            class Chameleon(object):
                def __init__(self, other):
                    self.other = other

                def __getattr__(self, name):
                    return getattr(self.other, name)
            

Python is dynamic

            import os

            if os.environ["EXTRA_LOGGING"]:
                import my_logger
                my_logger.init()

            ...
            

Easy to approximate

            :: greeter : <__builtin__.Greeter instance at 0x00000000023A3E88>
            

With great power...

Really: don't ever do this...

            # Example: Breaking out from a deeply nested loop:
            from goto import goto, label
            for i in range(1, 10):
                for j in range(1, 20):
                    for k in range(1, 30):
                        print i, j, k
                        if k == 3:
                            goto .end
            label .end
            print "Finished\n"
            

But what about this?

            def get_request():
              """Find the nearest caller's first "request" arg."""
              try:
                for f in inspect.stack():
                  frame = f[0]
                  code = frame.f_code
                  if code.co_argcount > 0 and code.co_varnames[0] == "request":
                    return frame.f_locals['request']
              finally:
                del frame
            

Wrapping up

Wrapping up

Thank you

http://nedbatchelder.com/text/aware.html

@nedbat

PS: I'm available for freelance work :)

Made with Cog, Slippy, and Fontin.