Wednesday, December 29, 2021

Idle puzzles

I've been amusing myself with these little puzzles. They're simple enough you can do them in your head, but I coded them up just for fun and to see if they worked.

;;; -*- Lisp -*-

(defpackage "PUZZLE"
  (:use)
  (:import-from "COMMON-LISP"
                "AND"
                "COND"
                "DEFUN"
                "IF"
                "FUNCALL"
                "FUNCTION"
                "LAMBDA"
                "LET"
                "MULTIPLE-VALUE-BIND"
                "NIL"
                "NOT"
                "OR"
                "T"
                "VALUES"
                "ZEROP"))

(defun puzzle::shr (n) (floor n 2))
(defun puzzle::shl0 (n) (* n 2))
(defun puzzle::shl1 (n) (1+ (* n 2)))

(in-package "PUZZLE")

;;; You can only use the symbols you can access in the PUZZLE package.

;;; Problem -1 (Example).  Define =

(defun = (l r)
  (cond ((zerop l) (zerop r))
        ((zerop r) nil)
        (t (multiple-value-bind (l* l0) (shr l)
             (multiple-value-bind (r* r0) (shr r)
               (if (zerop l0)
                   (and (zerop r0)
                        (= l* r*))
                   (and (not (zerop r0))
                        (= l* r*))))))))

;;; Problem 0.  Define >

;;; Problem 1.  Define (inc n), returns n + 1 for any non-negative n.

;;; Problem 2.  Define (dec n), returns n - 1 for any positive n.

;;; Problem 3.  Define (add l r), returns the sum of l and r where l
;;;             and r each are non-negative numbers.

;;; Problem 4.  Define (sub l r), returns the difference of l and r
;;;             where l and r are non-negative numbers and l >= r.

;;; Problem 5.  Define (mul l r), returns the product of l and r where
;;;             l and r are non-negative integers.

;;; Problem 6.  Define (pow l r), returns l raised to the r power,
;;;             where l is positive and r is non-negative.

;;; Problem 7.  Define (div l r), returns the quotient and remainder
;;;             of l/r.  l is non-negative and r is positive.

;;; Problem 8.  Define (log l r), returns the integer logarithm of l
;;;             base r 

My Solutions

;;; Problem 0.  Define >

(defun > (l r)
  (cond ((zerop l) nil)
        ((zerop r) t)
        (t (multiple-value-bind (l* l0) (shr l)
             (multiple-value-bind (r* r0) (shr r)
               (if (and (not (zerop l0)) (zerop r0))
                   (not (> r* l*))
                   (> l* r*)))))))

This one turned out to be trickier than I thought. I figured you’d basically discard the low order bit and just compare the high ones. And you do, but for this one case where the left bit is one and the right bit is zero. In this case, l > r if l* >= r*, so we swap the arguments and invert the sense of the conditional.

;;; Problem 1.  Define (inc n), returns n + 1 for any non-negative n.

(defun inc (n)
  (multiple-value-bind (n* n0) (shr n)
    (if (zerop n0)
        (shl1 n*)
        (shl0 (inc n*)))))

;;; Problem 2.  Define (dec n), returns n - 1 for any positive n.

(defun dec (n)
  (multiple-value-bind (n* n0) (shr n)
    (if (zerop n0)
        (shl1 (dec n*))
        (shl0 n*))))
;;; Problem 3.  Define (add l r), returns the sum of l and r where l
;;;             and r each are non-negative numbers.

(defun add (l r)
  (cond ((zerop l) r)
        ((zerop r) l)
        (t (multiple-value-bind (l* l0) (shr l)
             (multiple-value-bind (r* r0) (shr r)
               (if (zerop l0)
                   (if (zerop r0)
                       (shl0 (add l* r*))
                       (shl1 (add l* r*)))
                   (if (zerop r0)
                       (shl1 (add l* r*))
                       (shl0 (addc l* r*)))))))))

(defun addc (l r)
  (cond ((zerop l) (inc r))
        ((zerop r) (inc l))
        (t (multiple-value-bind (l* l0) (shr l)
             (multiple-value-bind (r* r0) (shr r)
               (if (zerop l0)
                   (if (zerop r0)
                       (shl1 (add l* r*))
                       (shl0 (addc l* r*)))
                   (if (zerop r0)
                       (shl0 (addc l* r*))
                       (shl1 (addc l* r*)))))))))

;;; Problem 4.  Define (sub l r), returns the difference of l and r
;;;             where l and r are non-negative numbers and l >= r.

(defun sub (l r)
  (cond ((zerop l) 0)
        ((zerop r) l)
        (t (multiple-value-bind (l* l0) (shr l)
             (multiple-value-bind (r* r0) (shr r)
               (if (zerop l0)
                   (if (zerop r0)
                       (shl0 (sub l* r*))
                       (shl1 (subb l* r*)))
                   (if (zerop r0)
                       (shl1 (sub l* r*))
                       (shl0 (sub l* r*)))))))))

(defun subb (l r)
  (cond ((zerop l) 0)
        ((zerop r) (dec l))
        (t (multiple-value-bind (l* l0) (shr l)
             (multiple-value-bind (r* r0) (shr r)
               (if (zerop l0)
                   (if (zerop r0)
                       (shl1 (subb l* r*))
                       (shl0 (subb l* r*)))
                   (if (zerop r0)
                       (shl0 (sub l* r*))
                       (shl1 (subb l* r*)))))))))

The presence of a carry or borrow is encoded by which procedure you are in. Effectively, we're encoding the carry or borrow in the program counter.

;;; Problem 5.  Define (mul l r), returns the product of l and r where
;;;             l and r are non-negative integers.

(defun fma (l r a)
  (if (zerop r)
      a
      (multiple-value-bind (r* r0) (shr r)
        (fma (shl0 l) r* (if (zerop r0) a (add l a))))))

(defun mul (l r) (fma l r 0))

This has a nice iterative solution if we define a “fused multiply add” operation that given l, r, and a, computes (+ (* l r) a).

Exponentiation has an obvious analogy to multiplication, but instead of doubling l each iteration, we square it, and we multiply into the accumulator rather than adding into it.

;;; Problem 6.  Define (pow l r), returns l raised to the r power,
;;;             where l is positive and r is non-negative.

(defun fem (b e m)
  (if (zerop e)
       m
       (multiple-value-bind (e* e0) (shr e)
         (fem (mul b b) e* (if (zerop e0) m (mul b m))))))

(defun pow (b e) (fem b e 1))

For division we use a curious recursion. To divide a big number n by a divisor d, we first pass the buck and divide by (* d 2) and get a quotient and remainder. The quotient we return is twice the quotient we got back, plus 1 if the remainder we got back is bigger than d. We either return the remainder we got back or we subtract d from it.

I find this curious because one usually performs a recursion by making one of the arguments in some way smaller on each recursive call. The recursion bottoms out when the argument can get no smaller. In this recursion, however, we keep trying to divide by bigger and bigger divisors until we cannot anymore.

;;; Problem 7.  Define (div l r), returns the quotient and remainder
;;;             of l/r.  l is non-negative and r is positive.

(defun div (n d)
  (if (> d n)
      (values 0 n)
      (multiple-value-bind (q r) (div n (shl0 d))
        (if (> d r)
            (values (shl0 q) r)
            (values (shl1 q) (sub r d))))))

Logarithm should be analagous (mutatis mutandis).

;;; Problem 8.  Define (log l r), returns the integer logarithm of l
;;;             base r 

(defun log (l r)
  (if (> r l)  
      (values 0 l)
      (multiple-value-bind (lq lr) (log l (mul r r))
        (if (> r lr)
            (values (shl0 lq) lr)
            (values (shl1 lq) (div lr r))))))

Thursday, October 14, 2021

Update October 2021

Here's a few things I've been playing with lately.

jrm-code-project/utilities has a few utilities that I commonly use. Included are utilities/lisp/promise and utilities/lisp/stream which provide S&ICP-style streams (lazy lists). utilities/lisp/utilities is a miscellaneous grab bag of functions and macros.

jrm-code-project/homographic is a toolkit for linear fractional transforms (homographic functions). In addition to basic LFT functionality, it provides examples of exact real arithmetic using streams of LFTs.

jrm-code-project/LambdaCalculus has some code for exploring lambda calculus.

jrm-code-project/CLRLisp is an experimental Lisp based on the .NET Common Language Runtime. The idea is that instead of trying to adapt a standard Lisp implementation to run on the .NET CLR, we just add a bare-bones eval and apply that use the CLR reflection layer and see what sort of Lisp naturally emerges. At this point, it only just shows signs of life: there are lambda expressions and function calls, but no definitions, conditionals, etc. You can eval lists: (System.Console.WriteLine "Hello World."), but I haven't written a reader and printer, so it is impractical for coding.

Monday, August 30, 2021

Tail recursion and fold-left

fold-left has this basic recursion:

(fold-left f init ())      = init
(fold-left f init (a . d)) = (fold-left f (f init a) d)
A straightforward implementation of this is
(defun fold-left (f init list)
  (if (null list)
      init
      (fold-left f (funcall f init (car list)) (cdr list))))
The straightforward implementation uses a slightly more space than necessary. The call to f occurs in a subproblem position, so there the stack frame for fold-left is preserved on each call and the result of the call is returned to that stack frame.

But the result of fold-left is the result of the last call to f, so we don't need to retain the stack frame for fold-left on the last call. We can end the iteration on a tail call to f on the final element by unrolling the loop once:

(defun fold-left (f init list)
  (if (null list)
      init
      (fold-left-1 f init (car list) (cdr list))))

(defun fold-left-1 (f init head tail)
  (if (null tail)
      (funcall f init head)
      (fold-left-1 f (funcall f init head) (car tail) (cdr tail))))

There aren't many problems where this would make a difference (a challenge to readers is to come up with a program that runs fine with the unrolled loop but causes a stack overflow with the straightforward implementation), but depending on how extreme your position on tail recursion is, this might be worthwhile.

Friday, August 27, 2021

A Floating-point Problem

Here's a 2x2 matrix:

[64919121   -159018721]
[41869520.5 -102558961]
We can multiply it by a 2 element vector like this:
(defun mxv (a b
            c d

            x
            y

            receiver)
  (funcall receiver
           (+ (* a x) (* b y))
           (+ (* c x) (* d y))))

* (mxv 64919121     -159018721
       41869520.5d0 -102558961
 
       3
       1

       #'list)

(35738642 2.30496005d7)
Given a matrix and a result, we want to find the 2 element vector that produces that result. To do this, we compute the inverse of the matrix:
(defun m-inverse (a b
                  c d

                  receiver)
  (let ((det (- (* a d) (* b c))))
    (funcall receiver
             (/ d det) (/ (- b) det)
             (/ (- c) det) (/ a det))))
and multiply the inverse matrix by the result:
(defun solve (a b
              c d

              x
              y

              receiver)
  (m-inverse a b
             c d
             (lambda (ia ib
                      ic id)
               (mxv ia ib
                    ic id

                    x
                    y
                    receiver))))
So we can try this on our matrix
* (solve 64919121     -159018721
         41869520.5d0 -102558961

         1
         0
         #'list)

(1.02558961d8 4.18695205d7)
and we get the wrong answer.

What's the right answer?

* (solve 64919121         -159018721
         (+ 41869520 1/2) -102558961

         1
         0
         #'list)

(205117922 83739041)
If we use double precision floating point, we get the wrong answer by a considerable margin.

I'm used to floating point calculations being off a little in the least significant digits, and I've seen how the errors can accumulate in an iterative calculation, but here we've lost all the significant digits in a straightforward non-iterative calculation. Here's what happened: The determinant of our matrix is computed by subtracting the product of the two diagonals. One diagonal is (* 64919121 -102558961) = -6658037598793281, where the other diagonal is (* (+ 41869520 1/2) -159018721) = -6658037598793280.5 This second diagonal product cannot be represented in double precision floating point, so it is rounded down to -6658037598793280. This is where the error is introduced. An error of .5 in a quantity of -6658037598793281 is small indeed, but we amplify this error when we subtract out the other diagonal. We still have an absolute error of .5, but now it occurs within a quantity of 1, which makes it relatively huge. This is called “catastrophic cancellation” because the subtraction “cancelled” all the significant digits (the “catastrophe” is presumably the amplification of the error).

I don't care for the term “catastrophic cancellation” because it places the blame on the operation of subtraction. But the subtraction did nothing wrong. The difference betweeen -6658037598793280 and -6658037598793281 is 1 and that is the result we got. It was the rounding in the prior step that introduced an incorrect value into the calculation. The subtraction just exposed this and made it obvious.

One could be cynical and reject floating point operations as being too unreliable. When we used exact rationals, we got the exactly correct result. But rational numbers are much slower than floating point and they have a tendancy to occupy larger and larger amounts of memory as the computation continues. Floating point is fast and efficient, but you have to be careful when you use it.

Wednesday, August 18, 2021

Fold right

fold-left takes arguments like this:

(fold-left function init list)
and computes
* (fold-left (lambda (l r) `(f ,l ,r)) 'init '(a b c))
(F (F (F INIT A) B) C)
Notice how init is the leftmost of all the arguments to the function, and each argument appears left to right as it is folded in.

Now look at the usual way fold-right is defined:

(fold-right function init list)
It computes
* (fold-right (lambda (l r) `(f ,l ,r)) 'init '(a b c))
(F A (F B (F C INIT)))
although init appears first and to the left of '(a b c) in the arguments to fold-right, it is actually used as the rightmost argument to the last application.

It seems to me that the arguments to fold-right should be in this order:

; (fold-right function list final)
* (fold-right (lambda (l r) `(f ,l ,r)) '(a b c) 'final)
(F A (F B (F C FINAL)))
The argument lists to fold-left and fold-right would no longer match, but I think switching things around so that the anti-symmetry of the arguments matches the anti-symmetry of the folding makes things clearer.

Friday, May 21, 2021

Stupid Y operator tricks

Here is the delta function: δ = (lambda (f) (f f)). Delta takes a function and tail calls that function on itself. What happens if we apply the delta function to itself? Since the delta function is the argument, it is tail called and applied to itself. Which leads again to itself being tail called and applied to itself. We have a situation of infinite regression: the output of (δ δ) ends up being a restatement of the output of (δ δ). Now in this case, regression is infinite and there is no base case, but imagine that somehow there were a base case, or that somehow we identified a value that an infinite regression equated to. Then each stage of the infinite regression just replicates the previous stage exactly. It is like having a perfectly silvered mirror: it just replicates the image presented to it exactly. By calling delta on delta, we've arranged our perfectly silvered mirror to reflect an image of itself. This leads to the “infinite hall of mirrors” effect.

So let's tweak the delta function so that instead of perfectly replicating the infinite regression, it applies a function g around the replication: (lambda (f) (g (f f))). If we apply this modified delta function to itself, each expansion of the infinite regression ends up wrapping an application of the g around it: (g (f f)) = (g (g (f f))) = (g (g (g (f f)))) = (g (g (g (g … )))). So our modified delta function gives us a nested infinite regression of applications of g. This is like our perfectly silvered mirror, but now the reflected image isn't mirrored exactly: we've put a frame on the mirror. When we arrange for the mirror to reflect itself, each nested reflection also has an image of the frame around the reflection, so we get a set of infinitely nested frames.

An infinite regression of (g (g (g (g … )))) is confusing. What does it mean? We can untangle this by unwrapping an application. (g (g (g (g … )))) is just a call to g. The argument to that call is weird, but we're just calling (g <something>). The result of the infinite regression (g (g (g (g … )))) is simply the result of the outermost call to g. We can use this to build a recursive function.

;; If factorial = (g (g (g (g … )))), then
;; factorial = (g factorial), where

(defun g (factorial)
  (lambda (x)
    (if (zerop x)
        1
        (* x (funcall factorial (- x 1))))))
The value returned by an inner invocation of g is the value that will be funcalled in the altenative branch of the conditional.

Y is defined thus:

Y = λg.(λf.g(f f))(λf.g(f f))

A straightforward implementation attempt would be
;; Non working y operator
(defun y (g)
  (let ((d (lambda (f) (funcall g (funcall f f)))))
    (funcall d d)))
but since lisp is a call-by-value language, it will attempt to (funcall f f) before funcalling g, and this will cause runaway recursion. We can avoid the runaway recursion by delaying the (funcall f f) with a strategically placed thunk
;; Call-by-value y operator
;; returns (g (lambda () (g (lambda () (g (lambda () … ))))))
(defun y (g)
  (let ((d (lambda (f) (funcall g (lambda () (funcall f f))))))
    (funcall d d)))
Since the recursion is now wrapped in a thunk, we have to funcall the thunk to force the recursive call. Here is an example where we see that:
* (funcall (Y (lambda (thunk)
                (lambda (x)
                  (if (zerop x)
                      1
                      (* x (funcall (funcall thunk) (- x 1)))))))
           6)
720
the (funcall thunk) invokes the thunk in order to get the actual recursive function, which we when then funcall on (- x 1).

By wrapping the self-application with a thunk, we've made the call site where we use the thunk more complicated. We can clean that up by wrapping the call to the thunk in something nicer:

* (funcall
    (y (lambda (thunk)
         (flet ((factorial (&rest args)
                  (apply (funcall thunk) args)))

           (lambda (x)
             (if (zerop x)
                 1
                 (* x (factorial (- x 1))))))))
      6)
720
And we can even go so far as to hoist that wrapper back up into the definiton of y
(defun y1 (g)
  (let ((d (lambda (f) (funcall g (lambda (&rest args) (apply (funcall f f) args))))))
    (funcall d d)))

* (funcall
    (y1 (lambda (factorial)
          (lambda (x)           
            (if (zerop x)
                1
                (* x (funcall factorial x))))))
    6)
720
y1 is an alternative formulation of the Y operator where we've η-expanded the recursive call to avoid the runaway recursion.

The η-expanded version of the applicative order Y operator has the advantage that it is convenient for defining recursive functions. The thunkified version is less convenient because you have to force the thunk before using it, but it allows you to use the Y operator to define recursive data structures as well as functions:

(Y
  (lambda (delayed-ones)
    (cons-stream 1 (delayed-ones))))
{1 …}

The argument to the thunkified Y operator is itself a procedure of one argument, the thunk. Y returns the result of calling its argument. Y should return a procedure, so the argument to Y should return a procedure. But it doesn't have to immediately return a procedure, it just has to eventually return a procedure, so we could, for example, print something before returning the procedure:

* (funcall (Y (lambda (thunk)
                (format t "~%Returning a procedure")
                (lambda (x)
                  (if (zerop x)
                      1
                      (* x (funcall (funcall thunk) (- x 1)))))))
         6)
Returning a procedure
Returning a procedure
Returning a procedure
Returning a procedure
Returning a procedure
Returning a procedure
720
There is one caveat. You must be able to return the procedure without attempting to make the recursive call.

Let's transform the returned function before returning it by applying an arbitrary function h to it:

(Y (lambda (thunk)
     (h (lambda (x)
          (if (zerop x)
              1
              … )))))
Ok, so now when we (funcall thunk) we don't get what we want, we've got an invocation of h around it. If we have an inverse to h, h-1, available, we can undo it:
(y (lambda (thunk)
      (h (lambda (x)
           (if (zerop x)
               1
               (* (funcall (h-1 (funcall thunk)) (- x 1))))))))
As a concrete example, we return a list and at the call site we extract the first element of that list before calling it:
* (funcall (car (y (lambda (thunk)
                     (list (lambda (x)
                             (if (zerop x)
                                 1
                                 (* x (funcall (car (funcall thunk)) (- x 1))))))))
           6)
720
So we can return a list of mutually recursive functions:
(y (lambda (thunk)
    (list
      ;; even?
      (lambda (n)
        (or (zerop n)
            (funcall (cadr (funcall thunk)) (- n 1))))

      ;; odd?
      (lambda (n)
        (and (not (zerop n))
             (funcall (car (funcall thunk)) (- n 1))))
      )))
If we use the η-expanded version of the Y operator, then we can adapt it to expect a list of mutually recursive functions on the recursive call:
(defun y* (&rest g-list)
  (let ((d (lambda (f)
             (map 'list (lambda (g)
                          (lambda (&rest args)
                            (apply (apply g (funcall f f)) args)))
                  g-list))))
     (funcall d d)))
which we could use like this:
* (let ((eo (y* (lambda (even? odd?)
                  (declare (ignore even?))
                  (lambda (n)
                    (or (zerop n)
                        (funcall odd? (- n 1)))))

                (lambda (even? odd?)
                  (declare (ignore odd?))
                  (lambda (n)
                    (and (not (zerop n))
                         (funcall even? (- n 1))))))))
     (let ((even? (car eo))
           (odd?  (cadr eo)))
       (do ((i 0 (+ i 1)))
           ((>= i 5))
         (format t "~%~d, ~s ~s"
                 i
                 (funcall even? i)
                 (funcall odd? i)))))))
                 
0, T NIL
1, NIL T
2, T NIL
3, NIL T
4, T NIL
Instead of returning a list of mutually recursive functions, we could return them as multiple values. We just have to be expecting multiple values at the call site:
(defun y* (&rest gs)
  (let ((d (lambda (f)
             (apply #'values
                    (map 'list
                         (lambda (g)
                           (lambda (&rest args)
                             (apply (multiple-value-call g (funcall f f)) args)))
                         gs)))))
    (funcall d d)))

MIT Scheme used to have a construct called a named lambda. A named lambda has an extra first argument that is automatically filled in with the function itself. So during evaluation of the body of a named lambda, the name is bound to the named lambda, enabling the function to call itself recursively:

(defmacro named-lambda ((name &rest args) &body body)
  `(y1 (lambda (,name)
         (lambda ,args
           ,@body))))

* (funcall (named-lambda (factorial x)
            (if (zerop x)
                1
                (* x (funcall factorial (- x 1)))))
         6)
720
This leads us to named let expressions. In a named let, the implicit lambda that performs the let bindings is a named lambda. Using that name to invoke the lambda on a different set of arguments is like recursively re-doing the let.
* (named-let fact ((x 6)) (if (zerop x) 1 (* x (funcall fact (- x 1)))))

720

In Scheme, you use letrec to define recursive or mutually recursive procedures. Internal definitions expand into an appropriate letrec. letrec achieves the necessary circularity not through the Y operator, but through side effects. It is hard to tell the difference, but there is a difference. Using the Y operator would allow you to have recursion, but avoid the implicit side effects in a letrec.

Oleg Kiselyov has more to say about the Y operator at http://okmij.org/ftp/Computation/fixed-point-combinators.html

Saturday, May 15, 2021

β-conversion

If you have an expression that is an application, and the operator of the application is a lambda expression, then you can β-reduce the application by substituting the arguments of the application for the bound variables of the lambda within the body of the lambda.

(defun beta (expression if-reduced if-not-reduced)
  (if (application? expression)
      (let ((operator (application-operator expression))
            (operands (application-operands expression)))
        (if (lambda? operator)
            (let ((bound-variables (lambda-bound-variables operator))
                  (body (lambda-body operator)))
              (if (same-length? bound-variables operands)
                  (funcall if-reduced
                           (xsubst body
                                   (table/extend* (table/empty)
                                                  bound-variables
                                                  operands)))
                  (funcall if-not-reduced)))
            (funcall if-not-reduced)))
      (funcall if-not-reduced)))

* (beta '((lambda (x y) (lambda (z) (* x y z))) a (+ z 3))
        #'identity (constantly nil))
(LAMBDA (#:Z460) (* A (+ Z 3) #:Z460))

A large, complex expression may or may not have subexpressions that can be β-reduced. If neither an expression or any of its subexpressions can be β-reduced, then we say the expression is in “β-normal form”. We may be able to reduce an expression to β-normal form by β-reducing where possible. A β-reduction can introduce further reducible expressions if we substitute a lambda expression for a symbol in operator position, so reducing to β-normal form is an iterative process where we continue to reduce any reducible expressions that arise from substitution.

(defun beta-normalize-step (expression)
  (expression-dispatch expression

    ;; case application
    (lambda (subexpressions)
      ;; Normal order reduction
      ;; First, try to beta reduce the outermost application,
      ;; otherwise, recursively descend the subexpressions, working
      ;; from left to right.
      (beta expression
            #'identity
            (lambda ()
              (labels ((l (subexpressions)
                         (if (null subexpressions)
                             '()
                             (let ((new-sub (beta-normalize-step (car subexpressions))))
                               (if (eq new-sub (car subexpressions))
                                   (let ((new-tail (l (cdr subexpressions))))
                                     (if (eq new-tail (cdr subexpressions))
                                         subexpressions
                                         (cons (car subexpressions) new-tail)))
                                   (cons new-sub (cdr subexpressions)))))))
                (let ((new-subexpressions (l subexpressions)))
                  (if (eq new-subexpressions subexpressions)
                      expression
                      (make-application new-subexpressions)))))))

    ;; case lambda
    (lambda (bound-variables body)
      (let ((new-body (beta-normalize-step body)))
        (if (eql new-body body)
            expression
            (make-lambda bound-variables new-body))))

    ;; case symbol
    (constantly expression)))

;;; A normalized expression is a fixed point of the
;;; beta-normalize-step function.
(defun beta-normalize (expression)
  (do ((expression expression (beta-normalize-step expression))
       (expression1 '() expression)
       (count 0 (+ count 1)))
      ((eq expression expression1)
       (format t "~%~d beta reductions" (- count 1))
       expression)))

You can compute just by using β-reduction. Here we construct an expression that reduces to the factorial of 3. We only have β-reduction, so we have to encode numbers with Church encoding.

(defun test-form ()
  (let ((table
         (table/extend*
          (table/empty)
          '(one
            three
            *
            pred
            zero?
            y)
          '(
            ;; Church numeral one
            (lambda (f) (lambda (x) (f x)))

            ;; Church numeral three 
            (lambda (f) (lambda (x) (f (f (f x)))))

            ;; * (multiply Church numerals)
            (lambda (m n)
              (lambda (f)
                (m (n f))))

            ;; pred (subtract 1 from Church numeral)
            (lambda (n)
              (lambda (f)
                (lambda (x) (((n (lambda (g)
                                   (lambda (h)
                                     (h (g f)))))
                              (lambda (u) x))
                             (lambda (u) u)))))

            ;; zero? (test if Church numeral is zero)
            (lambda (n t f) ((n (lambda (x) f)) t))

            ;; Y operator for recursion
            (lambda (f)
              ((lambda (x) (f (x x)))
               (lambda (x) (f (x x)))))

            )))
        (expr
         '((lambda (factorial)
             (factorial three))
           (y (lambda (fact)
                (lambda (x)
                  (zero? x
                   one
                   (* (fact (pred x)) x))))))))
    (xsubst expr table)))

* (test-form)

((LAMBDA (FACTORIAL) (FACTORIAL (LAMBDA (F) (LAMBDA (X) (F (F (F X)))))))
 ((LAMBDA (F) ((LAMBDA (X) (F (X X))) (LAMBDA (X) (F (X X)))))
  (LAMBDA (FACT)
    (LAMBDA (X)
      ((LAMBDA (N T F) ((N (LAMBDA (X) F)) T)) X
       (LAMBDA (F) (LAMBDA (X) (F X)))
       ((LAMBDA (M N) (LAMBDA (F) (M (N F))))
        (FACT
         ((LAMBDA (N)
            (LAMBDA (F)
              (LAMBDA (X)
                (((N (LAMBDA (G) (LAMBDA (H) (H (G F))))) (LAMBDA (U) X))
                 (LAMBDA (U) U)))))
          X))
        X))))))

* (beta-normalize (test-form))

127 beta reductions
(LAMBDA (F) (LAMBDA (X) (F (F (F (F (F (F X))))))))

This is the Church numeral for 6.

I find it pretty amazing that we can bootstrap ourselves up to arithmetic just by repeatedly β-reducing where we can. It doesn't seem like we're actually doing any work. We're just replacing names with what they stand for.

The β-substitution above replaces all the bound variables with their arguments if there is the correct number of arguments. One could easily implement a partial β-substitution that replaced only some of the bound variables. You'd still have an application, but some of the bound variables in the lambda would be eliminated and the corresponding argument would be removed.

Monday, May 10, 2021

Substitution

In McCarthy's early papers on Lisp, he notes that he needs a modified version of subst which needs to be aware of quoted expressions (and avoid substituting within them). He would also need a subst that was aware of lambda expressions. It would have to avoid substituting within the lambda if the name substituted matches one of the bound variables. To be useful for evaluation, it will have to deal with accidental variable capture when substituting within a lambda.

The root problem is that expressions are actually structured objects, but we are working with the list representation of those objects. Instead of substituting by operating on objects, we substitute on the list representation. We have to arrange for the syntactic substitution on the list representation to preserve the semantics of substitution on the objects they represent.

In the substitution model, we take a symbolic expression and replace some of the atoms in the expression with other expressions. We first need a way to discriminate between the different kinds of expressions. An expression is either an atomic symbol, or a list of expressions called an application. There are no other kinds of expressions.

(defun expression-dispatch (expression if-symbol if-application)
  (cond ((symbolp expression) (funcall if-symbol expression))
        ((consp expression)   (funcall if-application expression))
        (t (error "~s is not an expression." expression))))
Substitution is straightforward:
(defun xsubst (table expression)
  (expression-dispatch expression
    (lambda (symbol)
      (funcall table symbol #'identity (constantly symbol)))

    (lambda (subexpressions)
      (map 'list (lambda (subexpression) (xsubst table subexpression)) subexpressions))))

* (let ((table (table/extend (table/empty) 'x '(* a 42))))
    (xsubst table '(+ x y)))  
(+ (* A 42) Y)
We need a table of multiple substitutions so that we can substitute in parallel:
* (let ((table (table/extend
                 (table/extend (table/empty) 'x 'y)
                 'y 'x)))
    (xsubst table '(+ x y)))
(+ Y X)

So far, so good. Let's add lambda expressions. First, we need to add a new expression kind:

(defun expression-dispatch (expression if-symbol if-lambda if-application) 
  (cond ((symbolp expression) (funcall if-symbol expression))
        ((consp expression)
         (cond ((eq (car expression) 'lambda)
                (funcall if-lambda (cadr expression) (caddr expression)))
               (t (funcall if-application expression))))
        (t (error "~s is not an expression." expression))))

Substitution within a lambda expression is a bit tricky. First, you don't want to substitute a symbol if it is one of the bound variables of the lambda expression. Second, substituting a symbol may introduce more symbols. We don't want the new symbols to be accidentally captured by the bound variables in the lambda. If any new symbol has the same name as a bound variable, we have to rename the bound variable (and all its occurrances) to a fresh name so that it doesn't capture the new symbol being introduced. We'll need a helper function

(defun free-variables (expression)
   (expression-dispatch expression
     (lambda (symbol) (list symbol))
     (lambda (bound-variables body)
       (set-difference (free-variables body) bound-variables))
     (lambda (subexpressions)
       (fold-left #'union '() (map 'list #'free-variables subexpressions)))))
Now when we substitute within a lambda, we first find each free variable in the lambda, look it up in the substitution table, and collect the free variables of the substituted value:
(map 'list (lambda (var)
             (funcall table var #'free-variables (constantly '())))
      (free-variables expression))
This gives us the new free variables for each substitution. The union of all of these is the set of all the new free variables
(fold-left #'union '()
           (map 'list (lambda (var)
                        (funcall table var #'free-variables (constantly '())))
                 (free-variables expression)))
We have to rename the bound variables that are in this set:
 
(intersection 
  bound-variables
  (fold-left #'union '()
             (map 'list (lambda (var)
                          (funcall table var #'free-variables (constantly '())))
                  (free-variables expression))))
So we make a little table for renaming:
(defun make-alpha-table (variables)
  (fold-left (lambda (table variable)
               (table/extend table variable (gensym (symbol-name variable))))
             (table/empty)
             variables))

(let ((alpha-table
       (make-alpha-table
        (intersection 
          bound-variables
          (fold-left #'union '()
                     (map 'list (lambda (var)
                                  (funcall table var #'free-variables (constantly '())))
                          (free-variables expression)))))))
  …)
We rename the bound variables as necessary:
  (make-lambda
    (map 'list (lambda (symbol)
                 (funcall alpha-table symbol #'identity (constantly symbol)))
         bound-variables)
    …)
Finally, we redact the bound variables from the substitution table and append the alpha-table to make the substitutions we need for the lambda body
  (make-lambda
   (map 'list (lambda (symbol)
                (funcall alpha-table symbol #'identity (constantly symbol)))
        bound-variables)
   (xsubst (table/append alpha-table (table/redact* table bound-variables))
           body))))
The entire definition of xsubst is now this:
(defun xsubst (table expression)
  (expression-dispatch expression
    (lambda (symbol)
      (funcall table symbol #'identity (constantly symbol)))

    (lambda (bound-variables body)
      (let ((alpha-table
             (make-alpha-table
              (intersection
               bound-variables
               (fold-left #'union '()
                          (map 'list (lambda (var)
                                       (funcall table var
                                                #'free-variables
                                                (constantly '())))
                               (set-difference (free-variables body) bound-variables)))))))
        (make-lambda
         (map 'list (lambda (symbol)
                      (funcall alpha-table symbol #'identity (constantly symbol)))
              bound-variables)
         (xsubst (table/append alpha-table (table/redact* table bound-variables))
                 body))))

    (lambda (subexpressions)
      (make-application
       (map 'list (lambda (subexpression)
                    (xsubst table subexpression))
            subexpressions)))))
This is certainly more complicated than simple substitution, but we can see it does the right thing here:
* (xsubst (table/extend (table/empty) 'x '(* a y)) '(lambda (y) (+ x y)))
(LAMBDA (#:Y234) (+ (* A Y) #:Y234))

It should be obvious how to add quoted forms. This would require adding a new kind of expression to expression-dispatch and a new handling clause in xsubst that avoids substitution.

I'm not completely happy with how we've added lambda expressions to the expression syntax. Using the symbol lambda as a syntactic marker for lambda expressions causes problems if we also want to use that symbol as an argument or variable. Initially, it seems reasonable to be able to name an argument “lambda”. Within the body of the function, references to the variable lambda would refer to that argument. But what about references in the operator position? By defining lambda expressions as three element lists beginning with the symbol lambda we've made it ambiguous with two-argument applications whose operator is the variable lambda. We have to resolve this ambiguity. The current behavior is that we always interpret the symbol lambda as a syntactic marker so you simply cannot use a variable named lambda as a function.

Monday, May 3, 2021

Lightweight table

You don't need a data structure to make a lookup table. You can make a table just out of the lookup function. In this example, we start with a continuation passing style lookup function:

lookup (key if-found if-not-found)
  
    Invokes (funcall if-found value) if key is in the table,
    invokes (funcall if-not-found) otherwise.
An empty table just invokes the if-not-found continuation:
(defun table/empty ()
  (lambda (key if-found if-not-found)
    (declare (ignore key if-found))
    (funcall if-not-found)))
A table can be extended by wrapping it:
(defun table/extend (table key* value)
  (lambda (key if-found if-not-found)
    (if (eql key key*)
        (funcall if-found value)
        (funcall table key if-found if-not-found))))
So let's try it out:
(defvar *table-1* (table/extend 
                    (table/extend
                      (table/empty)
                      'foo 42)
                    'bar 69))

* (funcall *table-1* 'foo #'identity (constantly 'not-found))
42

* (funcall *table-1* 'quux #'identity (constantly 'not-found))
NOT-FOUND
You can also redact an entry from a table by wrapping the table:
(defun table/redact (table redacted)
  (lambda (key if-found if-not-found)
    (if (eql key redacted)
        (funcall if-not-found)
        (funcall table key if-found if-not-found))))

(defvar *table-2* (table/redact *table-1* 'foo))

* (funcall *table-2* 'foo #'identity (constantly 'not-found))
NOT-FOUND

Are there any advantages to implementing a table in this curious manner? Building a table by nesting a series of lookup steps leads to a linear lookup in linear space, so this kind of table should be more or less comparable to an alist for individual entries. Unlike a traditional table made with a data structure, you cannot enumerate the keys and values in the table. On the other hand, you gain the ability to map keys to values without having to enumerate the keys:

(defun table/bind-predicate (table predicate value)
  (lambda (key if-found if-not-found)
    (if (funcall predicate key)
        (funcall if-found value)
        (funcall table key if-found if-not-found))))

;;; bind all even numbers to the symbol 'EVEN
(defvar *table-3* 
  (table/bind-predicate *table-2* (lambda (n) (and (numberp n) (evenp n))) 'even))

* (funcall *table-3* 6 #'identity (constantly 'not-found))
EVEN
Or you can add a default value to an existing table:
(defun table/add-default (table default-value)
  (lambda (key if-found if-not-found)
    (declare (ignore if-not-found))
    (funcall table key
      if-found
      (lambda () (funcall if-found default-value)))))

(defvar *table-4* (table/add-default *table-3* 'default))

* (funcall *table-4* 'bar #'identity (constantly 'not-found))
69
    
* (funcall *table-4* 'xyzzy #'identity (constantly 'not-found))
DEFAULT

Perhaps the biggest disadvantage of this implementation is the difficulty in inspecting a table.

* *table-4*
#<CLOSURE (LAMBDA (KEY IF-FOUND IF-NOT-FOUND) :IN TABLE/ADD-DEFAULT) {1003CD641B}>
We can use the object inspector to peek inside the closure and maybe sleuth out what this table is made out of, but it isn't just an alist where we can print out the entries.

So far, we've defined a table as being a procedure with the (key if-found if-not-found) signature, but we can flip this around and say that any procedure with a (key if-found if-not-found) signature can be thought of as a table. For example, a regular expression matcher could be considered to be a table of strings (if that were a more useful model).

Monday, April 19, 2021

η-conversion and tail recursion

Consider this lambda expression: (lambda (x) (sqrt x)). This function simply calls sqrt on its argument and returns whatever sqrt returns. There is no argument you could provide to this function that would cause it to return a different result than you would get from calling sqrt directly. We say that this function and the sqrt function are extensionally equal. We can replace this lambda expression with a literal reference to the sqrt function without changing the value produced by our code.

You can go the other way, too. If you find a literal reference to a function, you can replace it with a lambda expression that calls the function. This is η-conversion. η-reduction is removing an unnecessary lambda wrapper, η-expansion is introducting one.

η-conversion comes with caveats. First, it only works on functions. If I have a string "foo", and I attempt to η-expand this into (lambda (x) ("foo" x)), I get nonsense. Second, a reduction strategy that incorporates η-reduction can be weaker than one that does not. Consider this expression: (lambda (x) ((compute-f) x)). We can η-reduce this to (compute-f), but this makes a subtle difference. When wrapped with the lambda, (compute-f) is evaluated just before it is applied to x. In fact, we won't call (compute-f) unless we invoke the result of the lambda expression somewhere. However, once η-reduced, (compute-f) is evaluated at the point the original lambda was evaluated, which can be quite a bit earlier.


When a function foo calls another function bar as a subproblem, an implicit continuation is passed to bar. bar invokes this continuation on the return value that it computes. We can characterize this continuation like this:

kbar = (lambda (return-value)
         (kfoo (finish-foo return-value)))
this just says that when bar returns, we'll finish running the code in foo and further continue by invoking the continuation supplied to foo.

If foo makes a tail call to bar, then foo is just returning what bar computes. There is no computation for foo to finish, so the continuation is just

kbar = (lambda (return-value)
         (kfoo return-value))
But this η-reduces to just kfoo, so we don't have to allocate a new trivial continuation when foo tail calls bar, we can just pass along the continuation that was passed to foo.

Tail recursion is equivalent to η-reducing the implicit continuations to functions where possible. A Scheme aficionado might prefer to say avoiding η-expanding where unnecessary.

This is a mathematical curiosity, but does it have practical significance? If you're programming in continuation passing style, you should be careful to η-reduce (or avoid η-expanding) your code.

Years ago I was writing an interpreter for the REBOL language. I was getting frustrated trying to make it tail recursive. I kept finding places in the interpreter where the REBOL source code was making a tail call, but the interpreter itself wasn't, so the stack would grow without bound. I decided to investigate the problem by rewriting the interpreter in continuation passing style and seeing why I couldn't η-convert the tail calls. Once in CPS, I could see that eval took two continuations and I could achieve tail recursion by η-reducing one of them.

Saturday, April 10, 2021

Can continuation passing style code perform well?

Continuation passing style is a powerful technique that allows you to abstract over control flow in your program. Here is a simple example: We want to look things up in a table, but sometimes the key we use is not associated with any value. In that case, we have to do something different, but the lookup code doesn't know what the caller wants to do, and the caller doesn't know how the lookup code works. Typically, we would arrange for the lookup code to return a special “key not found” value:

(let ((answer (lookup key table)))
   (if (eq answer 'key-not-found)
       ... handle missing key ...    
       ... compute something with answer...)

There are two minor problems with this approach. First, the “key not found” value has to be within the type returned by lookup. Consider a table that can only contain integers. Unfortunately, we cannot declare answer to be an integer because it might be the “key not found” value. Alternatively, we might decide to reserve a special integer to indicate “key not found”. The answer can then be declared an integer, but there is now a magic integer that cannot be stored in the table. Either way, answer is a supertype of what can be stored in the table, and we have to project it back down by testing it against “key not found”.

The second problem is one of redundancy. Presumably, somewhere in the code for lookup there is a conditional for the case that the key hasn't been found. We take a branch and return the “key not found” value. But now the caller tests the return value against “key not found” and it, too, takes a branch. We only take the true branch in the caller if the true branch was taken in the callee and we only take the false branch in the caller if the false branch was taken in the callee. In essence, we are branching on the exact same condition twice. We've reified the control flow, injected the reified value into the space of possible return values, passed it through the function call boundary, then projected and reflected the value back into control flow at the call site.

If we write this in continuation passing style, the call looks like this

(lookup key table
   (lambda (answer)
     …compute something with answer)
   (lambda ()
     …handle missing key…))
lookup will invoke the first lambda expression on the answer if it is found, but it will invoke the second lambda expression if the answer is not found. We no longer have a special “key not found” value, so answer can be exactly the type of what is stored in the table and we don't have to reserve a magic value. There is also no redundant conditional test in the caller.

This is pretty cool, but there is a cost. The first is that it takes practice to read continuation passing style code. I suppose it takes practice to read any code, but some languages make it extra cumbersome to pass around the lambda expressions. (Some seem actively hostile to the idea.) It's just more obscure to be passing around continuations when direct style will do.

The second cost is one of performance and efficiency. The lambda expressions that you pass in to a continuation passing style program will have to be closed in the caller's environment, and this likely means storage allocation. When the callee invokes one of the continuations, it has to perform a function call. Finally, the lexically scoped variables in the continuation will have to be fetched from the closure's environment. Direct style performs better because it avoids all the lexical closure machinery and can keep variables in the local stack frame. For these reasons, you might have reservations about writing code in continuation passing style if it needs to perform.

Continuation passing style looks complicated, but you don't need a Sufficiently Smart compiler to generate efficient code from it. Here is lookup coded up to illustrate:

(defun lookup (key table if-found if-not-found)
   (labels ((scan-entries (entries)
              (cond ((null entries) (funcall if-not-found))
                    ((eq (caar entries) key) (funcall if-found (cdar entries)))
                    (t (scan-entries (cdr entries))))))
     (scan-entries table)))
and a sample use might be
(defun probe (thing)
  (lookup thing *special-table*
    (lambda (value) (format t "~s maps to ~s." thing value))
    (lambda () (format t "~s is not special." thing))))

Normally, probe would have to allocate two closures to pass in to lookup, and the code in each closure would have to fetch the lexical value of key from the closure. But without changing either lookup or probe we can (declaim (inline lookup)). Obviously, inlining the call will eliminate the overhead of a function call, but watch what happens to the closures:

(defun probe (thing)
  ((lambda (key table if-found if-not-found)
     (labels ((scan-entries (table)
                (cond ((null entries) (funcall if-not-found))
                      ((eq (caar entries) key) (funcall if-found (cdar entries)))
                      (t (scan-entries (cdr entries))))))
        (scan-entries table)))
    thing *special-table*
    (lambda (value) (format t "~s maps to ~s." thing value))
    (lambda () (format t "~s has no mapping." thing))))
A Decent Compiler will easily notice that key is just an alias for thing and that table is just an alias for *special-table*, so we get:
(defun probe (thing)
  ((lambda (if-found if-not-found)
     (labels ((scan-entries (entries)
                (cond ((null entries) (funcall if-not-found))
                      ((eq (caar entries) thing) (funcall if-found (cdar entries)))
                      (t (scan-entries (cdr entries))))))
        (scan-entries *special-table*)))
    (lambda (value) (format t "~s maps to ~s." thing value))
    (lambda () (format t "~s has no mapping." thing))))
and the expressions for if-found and if-not-found are side-effect free, so they can be inlined (and we expect the compiler to correctly avoid unexpected variable capture):
(defun probe (thing)
  ((lambda ()
     (labels ((scan-entries (entries)
                (cond ((null entries)
                       (funcall (lambda () (format t "~s has no mapping." thing))))
                      ((eq (caar entries) thing)
                       (funcall (lambda (value) (format t "~s maps to ~s." thing value))
                                (cdar entries)))
                      (t (scan-entries (cdr entries))))))
        (scan-entries *special-table*)))))
and the immediate calls to literal lambdas can be removed:
(defun probe (thing)
   (labels ((scan-entries (entries)
              (cond ((null entries) (format t "~s has no mapping." thing))
                    ((eq (caar entries) thing)
                     (format t "~s maps to ~s." thing (cdar value))))
                    (t (scan-entries (cdr entries))))))
      (scan-entries *special-table*)))

Our Decent Compiler has removed all the lexical closure machinery and turned the calls to the continuations into direct code. This code has all the features we desire: there is no special “key not found” value to screw up our types, there is no redundant branch: the (null entries) test directly branches into the appropriate handling code, we do not allocate closures, and the variables that would have been closed over are now directly apparent in the frame.

It's a bit vacuous to observe that an inlined function performs better. Of course it does. At the very least you avoid a procedure call. But if you inline a continuation passing style function, any Decent Compiler will go to town and optimize away the continuation overhead. It's an unexpected bonus.

On occasion I find that continuation passing style is just the abstraction for certain code that is also performance critical. I don't give it a second thought. Continuation passing style can result in high-performance code if you simply inline the critical calls.

Saturday, April 3, 2021

Early LISP Part II (Apply redux)

By April of 1959, issues with using subst to implement β-reduction became apparent. In the April 1959 Quarterly Progress Report of the Research Laboratory of Electronics, McCarthy gives an updated definition of the universal S-function apply:

    apply[f;args]=eval[cons[f;appq[args]];NIL]
where
    appq[m]=[null[m]→NIL;T→cons[list[QUOTE;car[m]];appq[cdr[m]]]]
and
       eval[e;a]=[
atom[e]→eval[assoc[e;a];a];
atom[car[e]]→[
car[e]=QUOTE→cadr[e];
car[e]=ATOM→atom[eval[cadr[e];a]];
car[e]=EQ→[eval[cadr[e];a]=eval[caddr[e];a]];
car[e]=COND→evcon[cdr[e];a];
car[e]=CAR→car[eval[cadr[e];a]];
car[e]=CDR→cdr[eval[cadr[e];a]];
car[e]=CONS→cons[eval[cadr[e];a];eval[caddr[e];a]];
T→eval[cons[assoc[car[e];a];evlis[cdr[e];a]];a]];
caar[e]=LABEL→eval[cons[caddar[e];cdr[e]];cons[list[cadar[e];car[e]];a]];
caar[e]=LAMBDA→eval[caddar[e];append[pair[cadar[e];cdr[e]];a]]

and
    evcon[c;a]=[eval[caar[c];a]→eval[cadar[c];a];T→evcon[cdr[c];a]]
and
    evlis[m;a]= [null[m]→NIL;T→cons[list[QUOTE;eval[car[m];a]];
evlis[cdr[m];a]]

I find this a lot easier to understand if we transcribe it into modern Common LISP:

;;; Hey Emacs, this is -*- Lisp -*-

(in-package "CL-USER")

;; Avoid smashing the standard definitions.
(shadow "APPLY")
(shadow "ASSOC")
(shadow "EVAL")

(defun apply (f args)
  (eval (cons f (appq args)) nil))

(defun appq (m)
  (cond ((null m) nil)
        (t (cons (list 'QUOTE (car m)) (appq (cdr m))))))

(defun eval (e a)
  (cond ((atom e) (eval (assoc e a) a))
        ((atom (car e))
         (cond ((eq (car e) 'QUOTE) (cadr e))
               ((eq (car e) 'ATOM)  (atom (eval (cadr e) a)))
               ((eq (car e) 'EQ)    (eq (eval (cadr e) a) (eval (caddr e) a)))
               ((eq (car e) 'COND)  (evcon (cdr e) a))
               ((eq (car e) 'CAR)   (car (eval (cadr e) a)))
               ((eq (car e) 'CDR)   (cdr (eval (cadr e) a)))
               ((eq (car e) 'CONS)  (cons (eval (cadr e) a) (eval (caddr e) a)))
               (t (eval (cons (assoc (car e) a) (evlis (cdr e) a)) a))))
        ((eq (caar e) 'LABEL) (eval (cons (caddar e) (cdr e))
                                    (cons (list (cadar e) (car e)) a)))
        ((eq (caar e) 'LAMBDA) (eval (caddar e)
                                     (append (pair (cadar e) (cdr e)) a)))))

(defun evcon (c a)
  (cond ((eval (caar c) a) (eval (cadar c) a))
        (t (evcon (cdr c) a))))

(defun evlis (m a)
  (cond ((null m) nil)
        (t (cons (list 'QUOTE (eval (car m) a)) (evlis (cdr m) a)))))

;;; Modern helpers
(defun assoc (k l)
  (cadr (cl:assoc k l)))

(defun pair (ls rs)
  (map 'list #'list ls rs))

(defun testit ()
  (apply '(label ff (lambda (x) (cond ((atom x) x) ((quote t) (ff (car x))))))
         (list '((a . b) . c))))

There are a few things to notice about this. First, there is no code that inspects the value cell or function cell of a symbol. All symbols are evaluated by looking up the value in the association list a, so this evaluator uses one namespace. Second, the recursive calls to eval when evaluating combinations (the last clause of the inner cond and the LABEL and LAMBDA clauses) are in tail position, so this evaluator could be coded up tail-recursively. (It is impossible to say without inspecting the IBM 704 assembly code.)

What is most curious about this evaluator is the first clause in the outer cond in eval. This is where variable lookup happens. As you can see, we look up the variable by calling assoc, but once we obtain the value, we call eval on it. This LISP isn't storing values in the environment, but rather expressions that evaluate to values. If we look at the LAMBDA clause of the cond, the one that handles combinations that begin with lambda expressions, we can see that it does not evaluate the arguments to the lambda but instead associates the bound variables with the arguments' expressions. This therefore has call-by-name semantics rather than the modern call-by-value semantics.

By April 1960 we see these changes:

(defun eval (e a)
  (cond ((atom e) (assoc e a))
        ((atom (car e))
         (cond ((eq (car e) 'QUOTE) (cadr e))
               ((eq (car e) 'ATOM)  (atom (eval (cadr e) a)))
               ((eq (car e) 'EQ)    (eq (eval (cadr e) a) (eval (caddr e) a)))
               ((eq (car e) 'COND)  (evcon (cdr e) a))
               ((eq (car e) 'CAR)   (car (eval (cadr e) a)))
               ((eq (car e) 'CDR)   (cdr (eval (cadr e) a)))
               ((eq (car e) 'CONS)  (cons (eval (cadr e) a) (eval (caddr e) a)))
               (t (eval (cons (assoc (car e) a) (evlis (cdr e) a)) a))))
        ((eq (caar e) 'LABEL) (eval (cons (caddar e) (cdr e))
                                    (cons (list (cadar e) (car e)) a)))
        ((eq (caar e) 'LAMBDA) (eval (caddar e)
                                     (append (pair (cadar e) (evlis (cdr e) a)) a)))))
Note how evaluating an atom now simply looks up the value of the atom in the association list and evaluation of a combination of a lambda involves evaluating the arguments eagerly. This is a call-by-value interpreter.

Monday, March 29, 2021

Early LISP

In AI Memo 8 of the MIT Research Laboratory of Electronics (March 4, 1959), John McCarthy gives a definition of the universal S-function apply:

     apply is defined by
     apply[f;args]=eval[combine[f;args]]
     eval is defined by
eval[e]=[
first[e]=NULL→[null[eval[first[rest[e]]]]→T;1→F]
first[e]=ATOM→[atom[eval[first[rest[e]]]]→T;1→F]
first[e]=EQ→[eval[first[rest[e]]]=eval[first[rest[rest[e]]]]→T;
     1→F]
first[e]=QUOTE→first[rest[e]];
first[e]=FIRST→first[eval[first[rest[e]]]];
first[e]=REST→rest[eval[first[rest[e]]];
first[e]=COMBINE→combine[eval[first[rest[e]]];eval[first[rest[rest
     [e]]]]];
first[e]=COND→evcon[rest[e]]
first[first[e]]=LAMBDA→evlam[first[rest[first[e]]];first[rest[rest
    [first[e]]]];rest[e]];
first[first[e]]=LABELS→eval[combine[subst[first[e];first[rest
    [first[e]]];first[rest[rest[first[e]]]]];rest[e]]]]
where: evcon[c]=[eval[first[first[c]]]=1→eval[first[rest[first[c]]]];
           1→evcon[rest[c]]]
and
evlam[vars;exp;args]=[null[vars]→eval[exp];1→evlam[
     rest[vars];subst[first[vars];first[args];exp];rest[args]]]
McCarthy asserts that “if f is an S-expression for an S-function φ and args is a list of the form (arg1, …, argn) where arg1, ---, argn are arbitrary S-expressions then apply[f,args] and φ(arg1, …, argn) are defined for the same values of arg1, … argn and are equal when defined.”

I find it hard to puzzle through these equations, so I've transcribed them into S-expressions to get the following:

;;; Hey Emacs, this is -*- Lisp -*-

(in-package "CL-USER")

;; Don't clobber the system definitions.
(shadow "APPLY")
(shadow "EVAL")

(defun apply (f args)
  (eval (combine f args)))

(defun eval (e)
  (cond ((eq (first e) 'NULL)    (cond ((null (eval (first (rest e)))) t)
                                       (1 nil)))
        ((eq (first e) 'ATOM)    (cond ((atom (eval (first (rest e)))) t)
                                       (1 nil)))
        ((eq (first e) 'EQ)      (cond ((eq (eval (first (rest e)))
                                            (eval (first (rest (rest e))))) t)
                                       (1 nil)))
        ((eq (first e) 'QUOTE)   (first (rest e)))
        ((eq (first e) 'FIRST)   (first (eval (first (rest e)))))
        ((eq (first e) 'REST)    (rest  (eval (first (rest e)))))
        ((eq (first e) 'COMBINE) (combine (eval (first (rest e)))
                                          (eval (first (rest (rest e))))))
        ((eq (first e) 'COND)    (evcon (rest e)))
        ((eq (first (first e)) 'LAMBDA) (evlam (first (rest (first e)))
                                               (first (rest (rest (first e))))
                                               (rest e)))
        ((eq (first (first e)) 'LABELS) (eval (combine (subst (first e)
                                                              (first (rest (first e)))
                                                              (first (rest (rest (first e)))))
                                                       (rest e))))))

(defun evcon (c)
  (cond ((eval (first (first c))) (eval (first (rest (first c)))))
        (1 (evcon (rest c)))))

(defun evlam (vars exp args)
  (cond ((null vars) (eval exp))
        (1 (evlam (rest vars)
                  (subst (first args)
                         (first vars)
                         exp)
                  (rest args)))))
We just have to add a definition for combine as a synonym for cons and this should run:
* (eval '(eq (first (combine 'a 'b) (combine 'a 'c))))
T

As Steve “Slug” Russell observed, eval is an interpreter for Lisp. This version of eval uses an interesting evaluation strategy. If you look carefully, you'll see that there is no conditional clause for handling variables. Instead, when a lambda expression appears as the operator in a combination, the body of the lambda expression is walked and the bound variables are substituted with the expressions (not the values!) that represent the arguments. This is directly inspired by β-reduction from lambda calculus.

This is buggy, as McCarthy soon discovered. In the errata published one week later, McCarthy points out that the substitution process doesn't respect quoting, as we can see here:

* (eval '((lambda (name) (combine 'your (combine 'name (combine 'is (combine name nil))))) 'john))
(YOUR 'JOHN IS JOHN)
With a little thought, we can easily generate other name collisions. Notice, for example, that the substitution will happily substitute within the bound variable list of nested lambdas.

Substitution like this is inefficient. The body of the lambda is walked once for each bound variable to be substituted, then finally walked again to evaluate it. Later versions of Lisp will save the bound variables in an environment structure and substitute them incrementally during a single evaluation pass of the lambda body.