Friday, August 8, 2014

Mini regex golf 3: set cover

I'm computing the set cover by incrementally adding items to be covered. Naturally, the order in which you add items changes the way the program progresses. I added code that picks an item to be added each iteration rather than just pulling the car off the front of a list.
(define (cover8 value->keys-table better-solution)

  (define (add-v-k-entry solution-set v-k-entry)
    (let ((value (car v-k-entry))
          (keys  (cdr v-k-entry)))

      (write-string "Adding value ") (write value) (newline)
      (write-string "   with keys ") (write keys) (newline)
      (write-string "   to ") (write (length solution-set))
      (write-string " partial solutions.") (newline)

      (let ((new-solutions
             (map make-new-solution (cartesian-product solution-set keys))))

        (let ((trimmed-solutions 
                (trim-partial-solutions value->keys-table new-solutions)))

          (write-string "Returning ") (write (length trimmed-solutions))
          (write-string " of ") (write (length new-solutions))
          (write-string " new partial solutions.") (newline)

          trimmed-solutions))))

  (define (cover v-k-entries)
    (cond ((pair? v-k-entries)
           (pick-v-k-entry value->keys-table v-k-entries
                           (lambda (selected remaining)
                             (add-v-k-entry (cover remaining) selected))))
          ((null? v-k-entries)
           (list '()))
          (else (improper-list-error 'cover v-k-entries))))

  (let ((minimized (minimize-vktable value->keys-table better-solution)))
    (least-elements (cover minimized) better-solution)))

(define (pick-v-k-entry value->keys-table v-k-entries receiver)
  (define (score v-k-entry)
    (let* ((matched-all 
     (count-matching-items value->keys-table
      (lambda (other)
        (there-exists? (cdr v-k-entry)
                 (lambda (key) (member key (cdr other)))))))
           (matched-remaining
            (count-matching-items v-k-entries
                                  (lambda (other)
                                    (there-exists? (cdr v-k-entry)
                                       (lambda (key) (member key (cdr other)))))))
           (matched-forward (- matched-all matched-remaining)))
      (cons matched-remaining matched-forward)))

  (let ((scored (map (lambda (v-k-entry) (cons (score v-k-entry) v-k-entry))
                      v-k-entries)))

    (let ((picked 
    (cdar
     (least-elements scored
       (lambda (left right)
         (let* ((len-l (length (cdr left)))
         (len-r (length (cdr right)))
         (lmr (caar left))
         (lmf (cdar left))
         (rmr (caar right))
         (rmf (cdar right)))
    (or (> len-l len-r)
        (and (= len-l len-r)
      (or (> lmf rmf)
          (and (= lmf rmf)
        (< lmr rmr)))))
    ))))))

      (display "Picking ") (write picked) (newline)
      (receiver picked (delete picked v-k-entries)))))

(define (trim-partial-solutions value->keys-table partial-solutions)
    (let ((equivalent-solutions
           (map (lambda (entry) (cons (cdr entry) (car entry)))
                (collect-equivalent-partial-solutions value->keys-table
                                                      partial-solutions))))
      (write-string "  Deleting ")
      (write (- (length partial-solutions) (length equivalent-solutions)))
      (write-string " equivalent partial solutions.")
      (newline)

      (remove-dominated-solutions value->keys-table
                                  (map lowest-scoring-equivalent-partial-solution
                                       equivalent-solutions))))
Finally, it turns out that computing dominating partial solutions is expensive, so I changed the set operations to use a bitmap representation:
(define (remove-dominated-solutions value->keys-table partial-solutions)
  (let ((before-length (length partial-solutions))
        (all-values (get-values value->keys-table))) 
    (let ((table
           ;; put the long ones in first
           (sort
            (map (lambda (partial-solution)
                   (cons partial-solution
                     (lset->bset all-values 
                       (map car (partial-solution-matches value->keys-table 
                                                          partial-solution)))))
                 partial-solutions)
            (lambda (left right)
              (> (length (bset->lset all-values (cdr left)))
                 (length (bset->lset all-values (cdr right))))))))

      (let ((answer (map car
                         (fold-left (lambda (answer solution)
                                      (if (there-exists? answer 
                                                         (dominates-solution? solution))
                                          answer
                                          (cons solution answer)))
                                    '()
                                    table))))
        (let ((after-length (length answer)))
          (write-string "  Removing ") (write (- before-length after-length))
          (write-string " dominated solutions.")
          (newline)
          answer)))))

(define (dominates-solution? solution)
  (let* ((partial-solution (car solution))
         (partial-solution-score (score partial-solution))
         (solution-matches-raw (cdr solution)))
    (lambda (other-solution)
      (let* ((other-partial-solution (car other-solution))
             (other-matches-raw (cdr other-solution)))
        (and
         (bset-superset? other-matches-raw solution-matches-raw)
         (<= (score other-partial-solution) partial-solution-score))))))

(define (get-values v-k-table)
  (fold-left (lambda (answer entry) (lset-adjoin equal? answer (car entry)))
             '()
             v-k-table))

(define (bset-element->bit universe element)
  (cond ((null? element) 0)
        (else (expt 2 (list-index (lambda (item) (eq? item element)) universe)))))

(define (bset-adjoin universe bset element)
  (bset-union bset (bset-element->bit universe element)))

(define (lset->bset universe lset)
  (fold-left (lambda (answer element)
               (bset-adjoin universe answer element))
             0
             lset))

(define (bset->lset universe bset)
  (cond ((zero? bset) '())
        ((even? bset) (bset->lset (cdr universe) (/ bset 2)))
        (else (cons (car universe) (bset->lset (cdr universe) (/ (- bset 1) 2))))))

(define (bset-union left right) (bitwise-ior left right))

(define (bset-superset? bigger smaller)
  ;; Is every element of smaller in bigger?
  (zero? (bitwise-andc2 smaller bigger)))
This code can now find the shortest regular expression consisting of letters and dots (and ^$) that matches one set of strings but not another.

Depending on the strings, this can take quite a bit of time to run. Dotted expressions cause a combinatorical explosion in matching regexps (or substrings), but what makes it worse is that the dotted expressions tend to span different sets of strings. If two different dotted expressions, each with different matching sets of strings, appear in a single string, then the number of partial solutions will be multiplied by two as we try each different dotted expression.

It is characteristic of NP problems that it is easy to determine if you have a good solution, but quite hard to find it among a huge number of other, poor solutions. This problem exhibits this characteristic, but there is a bit more structure in the problem that we are exploiting. The word lists are drawn from the English language. This makes some bigrams, trigrams, etc. far, far, more likely to appear than others.

Short words are much easier to process than longer ones because they simply contain fewer things to match. On the other hand, longer words tend to be dominated by shorter ones anyway.

To be continued...

Thursday, August 7, 2014

Mini regex golf 2: adding regular expressions

It wasn't too hard to add regular expressions to the substring version. What took a while was just tinkering with the code, breaking it, fixing it again, noticing an optimization, tinkering, etc. etc. In any case it works and here is some of it.
(define (make-extended-ngram-table winners losers)
  (let* ((initial-ngrams (generate-ngrams winners losers)))
    (write-string "Initial ngrams: ") (write (length initial-ngrams))
    (newline)
    (map (lambda (winner)
           (cons winner
                 (keep-matching-items initial-ngrams
                    (lambda (ngram) (re-string-search-forward ngram winner)))))
         winners)))

(define (generate-ngrams winners losers)
  (write-string "Generating ngrams...")(newline)
  (let ((losing-ngram? (string-list-matcher losers)))
    (fold-left (lambda (answer winner)
                 (lset-union equal? answer (extended-ngrams losing-ngram? winner)))
               '()
               winners)))

(define (string-list-matcher string-list)
  (lambda (test-ngram)
    (there-exists? string-list
                   (lambda (string)
                     (re-string-search-forward test-ngram string)))))

(define *dotification-limit* 4)
(define *generate-ends-of-words* #t)
(define *generate-dotted* #t)

(define (ngrams-of-length n string)
  (do ((start    0 (1+ start))
       (end      n (1+ end))
       (answer '() (lset-adjoin string=? answer (substring string start end))))
      ((> end (string-length string)) answer)))

(define (generate-dotted answer losing-ngram?)
  (do ((tail answer (cdr tail))
       (answer '() (let ((item (car tail)))
                     (fold-left (lambda (answer dotted)
                                  (if (losing-ngram? dotted)
                                      answer
                                      (lset-adjoin string=? answer dotted)))
                                answer
                                (dotify item)))))
      ((not (pair? tail))
       (if (null? tail)
           answer
           (improper-list-error 'generate-dotted tail)))))

(define (dotify word)
  (cond ((string=? word "") (list ""))
        ((> (string-length word) *dotification-limit*) (list word))
        (else
         (fold-left (lambda (answer dotified)
                      (fold-left (lambda (answer replacement)
                                   (lset-adjoin equal? answer 
                                        (string-append replacement dotified)))
                                 answer
                                 (replacements (substring word 0 1))))
                    '()
                    (dotify (substring word 1 (string-length word)))))))

(define (replacements string)
  (if (or (string=? string "^")
          (string=? string "$"))
      (list string)
      (list string ".")))

(define (extended-ngrams losing-ngram? string)
  (let ((string (if *generate-ends-of-words*
                    (string-append "^" string "$")
                    string)))
    (do ((n 1    (+ n 1))
         (answer '() (lset-union
                      string=? answer
                      (delete-matching-items (ngrams-of-length n string)
                                             losing-ngram?))))
        ((> n (string-length string))
         (if *generate-dotted*
             (generate-dotted answer losing-ngram?)
             answer)))))
Adding the dotification greatly increases the number of ways to match words:
1 ]=> (extended-ngrams (string-list-matcher losers) "lincoln")

;Value 15: ("li" "ln" "ln$" "oln" ".ln" "col" "lin" "li." "^li" "o.n$" "oln$" ".ln$" "col." "c.ln" "..ln" "coln" ".oln" "co.n" "n.ol" "..ol" "ncol" ".col" "nc.l" "i.co" "inco" "i..o" "in.o" "lin." "li.." "l.nc" "linc" "l..c" "li.c" "^li." "^lin" "coln$" "ncoln" "incol" "linco" "^linc" "ncoln$" "incoln" "lincol" "^linco" "incoln$" "lincoln" "^lincol" "lincoln$" "^lincoln" "^lincoln$")
The table that maps words to their extended ngrams is quite large, but it can be reduced in size without affecting the solution to the set cover problem. If two regexps match exactly the same set of winning strings, then one can be substituted for the other in any solution, so we can discard all but the shortest of these. If a regexp matches a proper superset of another regexp, and the other regexp is at least the same length or longer, then the first regexp dominates the second one, so we can discard the second one.
(define (minimize-keys value->keys-table better-solution)
  (let* ((all-keys (get-keys value->keys-table))
         (equivalents (collect-equivalent-partial-solutions value->keys-table
                         (map list all-keys)))
         (reduced (map (lambda (equivalent)
                         (cons (car equivalent)
                               (car (least-elements (cdr equivalent)
                                                    better-solution))))
                       equivalents))
         (dominants (collect-dominant-partial-solutions reduced better-solution))
         (good-keys (fold-left (lambda (answer candidate)
                                 (lset-adjoin equal? answer (cadr candidate)))
                               '()
                               dominants)))

    (define (rebuild-entry entry)
      (cons (car entry) (keep-matching-items (cdr entry)
                             (lambda (item) (member item good-keys)))))

    (write-string "Deleting ") (write (- (length all-keys) (length good-keys)))
    (write-string " of ") (write (length all-keys)) (write-string " keys.  ")
    (write (length good-keys)) (write-string " keys remain.")(newline)
    (map rebuild-entry value->keys-table)))

(define (partial-solution-matches value->keys-table partial-solution)
  (keep-matching-items
   value->keys-table
   (lambda (entry)
     (there-exists? partial-solution (lambda (key) (member key (cdr entry)))))))

(define (collect-equivalent-partial-solutions value->keys-table partial-solutions)
  (let ((answer-table (make-equal-hash-table)))

    (for-each (lambda (partial-solution)
                (hash-table/modify! answer-table
                                   (map car (partial-solution-matches 
                                               value->keys-table 
                                               partial-solution))
                                    (list)
                                    (lambda (other)
                                      (lset-adjoin equal? other partial-solution))))
              partial-solutions)

    (hash-table->alist answer-table)))

(define (collect-dominant-partial-solutions equivalents better-solution)
  (define (dominates? left right)
    (and (superset? (car left) (car right))
         (not (better-solution (cdr right) (cdr left)))))

  (let ((sorted (sort equivalents 
                      (lambda (l r) (> (length (car l)) (length (car r)))))))
    (fold-left (lambda (answer candidate)
                 (if (there-exists? answer (lambda (a) (dominates? a candidate)))
                     answer
                     (lset-adjoin equal? answer candidate)))
               '()
               sorted)))
We can minimize the value->key-table in another way. If two values in the table are matched by the exact same set of keys, then we can delete one without changing the solution. If a value is matched by a small set of keys, and if another values is matched by a superset of these keys, then we can delete the larger one because if the smaller one matches, the larger one must match as well.
(define (minimize-values v-k-table)
  (let ((size-before (length v-k-table)))

    (define (dominated-value? entry)
      (let ((entry-value (car entry))
            (entry-keylist (cdr entry)))
        (there-exists? v-k-table
          (lambda (other-entry)
            (and (not (eq? entry other-entry))
                 (let ((other-value (car other-entry))
                       (other-keylist (cdr other-entry)))
                   (let ((result (and (superset? entry-keylist other-keylist)
                                      (not (superset? other-keylist entry-keylist)))))
                     (if result
                         (begin (display "Removing ")
                                (write entry-value)
                                (display " dominated by ")
                                (write other-value)
                                (display ".")
                                (newline)
                                ))
                     result)))))))

    (define (equivalent-value-in-answer? answer entry)
      (let ((entry-value (car entry))
            (entry-keylist (cdr entry)))
        (there-exists? answer
          (lambda (other-entry)
            (let ((other-value (car other-entry))
                  (other-keylist (cdr other-entry)))
              (let ((result (equal? entry-keylist other-keylist)))
                (if result
                    (begin (display "Removing ")
                           (write entry-value)
                           (display " equivalent to ")
                           (write other-value)
                           (display ".")
                           (newline)
                           ))
                result))))))

    (define (add-entry answer entry)
      (if (or (equivalent-value-in-answer? answer entry)
              (dominated-value? entry))
          answer
          (cons entry answer)))

    (let ((answer (fold-left add-entry '() v-k-table)))
      (write-string "Removed ") (write (- size-before (length answer)))
      (write-string " dominated and equivalent values.")
      (newline)
      answer)))
Each time we remove values or keys, we might make more keys and values equivalent or dominated, so we iterate until we can no longer remove anything.
(define (minimize-vktable value->keys-table better-solution)
  (let* ((before-size (fold-left + 0 (map length value->keys-table)))
         (new-table
          (minimize-values
           (minimize-keys value->keys-table better-solution)))
         (after-size (fold-left + 0 (map length new-table))))
    (if (= before-size after-size)
        value->keys-table
        (minimize-vktable new-table better-solution))))
The minimized table for the presidents looks like this:
(("washington" "sh" "g..n" "n..o" ".h.n" "a..i")
 ("adams" "a.a" "am" "ad")
 ("madison" "m..i" "i..n" "is." "i.o" "di" "ma" "ad")
 ("monroe" "r.e$" "oe")
 ("van-buren" "u..n" "r.n" ".b" "bu" "-")
 ("harrison" "r..s" "r.i" "i..n" "is." "i.o" "a..i")
 ("polk" "po")
 ("taylor" "ay." "ta")
 ("pierce" "ie." "rc" "r.e$")
 ("buchanan" "bu" "a.a" ".h.n")
 ("lincoln" "i..o" "li")
 ("grant" "an.$" "a.t" "ra" "r.n" "g..n")
 ("hayes" "h..e" "ye" "ay.")
 ("garfield" "el.$" "i.l" "ga" "ie." "r.i" ".f" "a..i")
 ("cleveland" "v.l" "an.$")
 ("mckinley" "n.e" "nl" "i.l" "m..i")
 ("roosevelt" ".se" "oo" "v.l" "el.$" "r..s")
 ("taft" "a.t" "ta" ".f")
 ("wilson" "ls" "i..o")
 ("harding" "r.i" "di" "a..i")
 ("coolidge" "oo" "li")
 ("hoover" "ho" "oo")
 ("truman" "u..n" "ma")
 ("eisenhower" "ho" ".se" "h..e" "i..n" "is.")
 ("kennedy" "nn" "n.e")
 ("johnson" "j")
 ("nixon" "^n" "i..n" "i.o" "n..o")
 ("carter" "rt" "a.t")
 ("reagan" "ga" "a.a")
 ("bush" "bu" "sh")
 ("obama" ".b" "ma" "a.a" "am"))
As you can see, we have reduced the original 2091 matching regexps to fifty.

Changes to the set-cover code coming soon....

Friday, August 1, 2014

Mini regex golf

I was intrigued by Peter Norvig's articles about regex golf.

To make things easier to think about, I decided to start with the simpler problem of looking for substrings. Here's code to extract the ngrams of a string:
(define (ngrams-of-length n string)
  (do ((start    0 (1+ start))
       (end      n (1+ end))
       (answer '() (lset-adjoin string=? answer (substring string start end))))
      ((> end (string-length string)) answer)))

(define (ngrams string)
  (do ((n 1 (+ n 1))
       (answer '() (append (ngrams-of-length n string) answer)))
      ((> n (string-length string)) answer)))
A solution is simply a list of ngrams. (Although not every list of ngrams is a solution!)
(define (solution? solution winners losers)
  (let ((matches-solution? (ngram-list-matcher solution)))
    (and (for-all? winners matches-solution?)
         (not (there-exists? losers matches-solution?)))))

(define (ngram-list-matcher ngram-list)
  (lambda (test-string)
    (there-exists? ngram-list 
     (lambda (ngram)
       (string-search-forward ngram test-string)))))
We also want to know if an ngram appears in a given list of strings.
(define (string-list-matcher string-list)
  (lambda (test-ngram)
    (there-exists? string-list
     (lambda (string)
       (string-search-forward test-ngram string)))))

(fluid-let ((*unparser-list-breadth-limit* 10))
    (let ((matches-loser? (string-list-matcher losers)))
      (for-each
       (lambda (winner) (write-string winner) (write-string ": ") 
        (write (reverse (delete-matching-items (ngrams winner) matches-loser?)))
        (newline))
       winners)))

washington: ("sh" "hi" "gt" "to" "was" "ash" "shi" "hin" "ngt" "gto" ...)
adams: ("ad" "am" "ms" "ada" "dam" "ams" "adam" "dams" "adams")
jefferson: ("j" "je" "ef" "ff" "fe" "rs" "jef" "eff" "ffe" "fer" ...)
madison: ("ma" "ad" "di" "mad" "adi" "dis" "iso" "madi" "adis" "diso" ...)
monroe: ("oe" "onr" "nro" "roe" "monr" "onro" "nroe" "monro" "onroe" "monroe")
jackson: ("j" "ja" "ac" "ks" "jac" "ack" "cks" "kso" "jack" "acks" ...)
van-buren: ("-" "va" "n-" "-b" "bu" "van" "an-" "n-b" "-bu" "bur" ...)
harrison: ("har" "arr" "rri" "ris" "iso" "harr" "arri" "rris" "riso" "ison" ...)
polk: ("po" "pol" "olk" "polk")
taylor: ("ta" "yl" "lo" "tay" "ayl" "ylo" "lor" "tayl" "aylo" "ylor" ...)
pierce: ("rc" "ce" "pie" "ier" "erc" "rce" "pier" "ierc" "erce" "pierc" ...)
buchanan: ("bu" "uc" "ch" "na" "buc" "uch" "cha" "ana" "nan" "buch" ...)
lincoln: ("li" "ln" "lin" "col" "oln" "linc" "inco" "ncol" "coln" "linco" ...)
grant: ("ra" "gra" "ran" "ant" "gran" "rant" "grant")
hayes: ("ye" "hay" "aye" "yes" "haye" "ayes" "hayes")
garfield: ("ga" "rf" "fi" "gar" "arf" "rfi" "fie" "iel" "eld" "garf" ...)
cleveland: ("lev" "vel" "ela" "clev" "leve" "evel" "vela" "elan" "cleve" "level" ...)
mckinley: ("nl" "mck" "inl" "nle" "mcki" "kinl" "inle" "nley" "mckin" "ckinl" ...)
roosevelt: ("oo" "os" "lt" "roo" "oos" "ose" "sev" "vel" "elt" "roos" ...)
taft: ("ta" "af" "ft" "taf" "aft" "taft")
wilson: ("ls" "ils" "lso" "wils" "ilso" "lson" "wilso" "ilson" "wilson")
harding: ("di" "har" "ard" "rdi" "din" "hard" "ardi" "rdin" "ding" "hardi" ...)
coolidge: ("oo" "li" "coo" "ool" "oli" "lid" "cool" "ooli" "olid" "lidg" ...)
hoover: ("ho" "oo" "hoo" "oov" "hoov" "oove" "hoove" "oover" "hoover")
truman: ("tr" "ru" "ma" "tru" "rum" "uma" "man" "trum" "ruma" "uman" ...)
eisenhower: ("ei" "nh" "ho" "ow" "eis" "ise" "sen" "enh" "nho" "how" ...)
kennedy: ("nn" "ed" "dy" "ken" "enn" "nne" "ned" "edy" "kenn" "enne" ...)
johnson: ("j" "jo" "oh" "hn" "joh" "ohn" "hns" "john" "ohns" "hnso" ...)
nixon: ("ni" "ix" "xo" "nix" "ixo" "xon" "nixo" "ixon" "nixon")
carter: ("rt" "car" "art" "rte" "cart" "arte" "rter" "carte" "arter" "carter")
reagan: ("ea" "ag" "ga" "rea" "eag" "aga" "gan" "reag" "eaga" "agan" ...)
bush: ("bu" "us" "sh" "bus" "ush" "bush")
clinton: ("li" "to" "cli" "lin" "int" "nto" "ton" "clin" "lint" "into" ...)
obama: ("ob" "ba" "am" "ma" "oba" "bam" "ama" "obam" "bama" "obama")
We can discard ngrams like "shi" because the shorter ngram "sh" will also match.
(define (dominant-ngrams string losing-ngram?)
  (do ((n 1 (+ n 1))
       (answer '() (append
                     (delete-matching-items
                      (ngrams-of-length n string)
                      (lambda (item)
                        (or (there-exists? answer
                                           (lambda (ngram)
                                             (string-search-forward ngram item)))
                            (losing-ngram? item))))
                    answer)))
      ((> n (string-length string)) answer)))


(fluid-let ((*unparser-list-breadth-limit* 10))
    (let ((matches-loser? (string-list-matcher losers)))
      (for-each
       (lambda (winner) (write-string winner) (write-string ": ") 
        (write (dominant-ngrams winner matches-loser?))
        (newline))
       winners)))

washington: ("was" "to" "gt" "hi" "sh")
adams: ("ms" "am" "ad")
jefferson: ("rs" "fe" "ff" "ef" "j")
madison: ("iso" "di" "ad" "ma")
monroe: ("nro" "onr" "oe")
jackson: ("ks" "ac" "j")
van-buren: ("ren" "ure" "bu" "va" "-")
harrison: ("iso" "ris" "rri" "arr" "har")
polk: ("olk" "po")
taylor: ("lo" "yl" "ta")
pierce: ("ier" "pie" "ce" "rc")
buchanan: ("na" "ch" "uc" "bu")
lincoln: ("inco" "col" "ln" "li")
grant: ("ant" "ra")
hayes: ("hay" "ye")
garfield: ("eld" "iel" "fi" "rf" "ga")
cleveland: ("ela" "vel" "lev")
mckinley: ("mck" "nl")
roosevelt: ("vel" "sev" "lt" "os" "oo")
taft: ("ft" "af" "ta")
wilson: ("ls")
harding: ("ard" "har" "di")
coolidge: ("li" "oo")
hoover: ("oo" "ho")
truman: ("ma" "ru" "tr")
eisenhower: ("wer" "sen" "ise" "ow" "ho" "nh" "ei")
kennedy: ("ken" "dy" "ed" "nn")
johnson: ("hn" "oh" "j")
nixon: ("xo" "ix" "ni")
carter: ("car" "rt")
reagan: ("ga" "ag" "ea")
bush: ("sh" "us" "bu")
clinton: ("int" "to" "li")
obama: ("ma" "am" "ba" "ob")
It's time to tackle the set cover problem. We want a set of ngrams that match all the strings. Obviously, if we pick an ngram from each of the strings we want to cover, we'll have a solution. For instance,
(let ((matches-loser? (string-list-matcher losers)))
  (solution? (delete-duplicates
                 (map
                    (lambda (winner) (car (dominant-ngrams winner matches-loser?)))
                    winners))
                winners losers))
;Value: #t
We can cycle through all the possible solutions and then select the best one.
(define (mini-golf0 winners losers)
  (lowest-scoring
   (cover0 (make-dominant-ngram-table
            winners
            (delete-losing-superstrings winners losers)))))

(define (delete-losing-superstrings winners losers)
  (delete-matching-items
   losers
   (lambda (loser)
     (there-exists? winners
                    (lambda (winner)
                      (string-search-forward winner loser))))))

(define (make-dominant-ngram-table winners losers)
  (let ((losing-ngram? (string-list-matcher losers)))
    (map (lambda (winner)
           (cons winner (dominant-ngrams winner losing-ngram?)))
         winners)))

(define (cover0 v-k-table)
  (let ((empty-solution-set (list '())))
    (fold-left add-v-k-entry0 empty-solution-set v-k-table)))

(define (add-v-k-entry0 solution-set v-k-entry)
  (let ((value (car v-k-entry))
        (keys  (cdr v-k-entry)))

    (write-string "Adding value ") (write value) (newline)
    (write-string "   with keys ") (write keys) (newline)
    (write-string "   to ") (write (length solution-set))
    (write-string " partial solutions.") (newline)

    (let ((new-solutions
           (map make-new-solution (cartesian-product solution-set keys))))

      (write-string "Returning ") (write (length new-solutions))
      (write-string " new partial solutions.") (newline)

      new-solutions)))

(define (lowest-scoring list)
  (least-elements list (lambda (l r) (< (score l) (score r)))))

(define (cartesian-product left-list right-list)
  (fold-left (lambda (answer left)
               (fold-left (lambda (answer right)
                            (cons (cons left right) answer))
                          answer
                          right-list))
             '()
             left-list))

(define (make-new-solution cp-term)
  (let ((solution (car cp-term))
        (key (cdr cp-term)))
    (lset-adjoin equal? solution key)))

(define (improper-list-error procedure thing)
  (error (string-append "Improper list found by " procedure ": ") thing))

(define (least-elements list <)
  (define (accumulate-least answer item)
    (cond ((< (car answer) item) answer)
          ((< item (car answer)) (cons item '()))
          (else (cons item answer))))

  (cond ((pair? list) (fold-left accumulate-least
                                 (cons (car list) '())
                                 (cdr list)))
        ((null? list) (error "List must have at least one element." list))
        (else (improper-list-error 'LEAST-ELEMENTS list))))

(define (score solution)
  (do ((tail solution (cdr tail))
       (score -1      (+ score (string-length (car tail)) 1)))
      ((not (pair? tail))
       (if (null? tail)
           score
           (improper-list-error 'score solution)))))
This works for small sets:
1 ]=> (mini-golf0 boys girls)
Adding value "jacob"
   with keys ("ob" "c" "j")
   to 1 partial solutions.
Returning 3 new partial solutions.
Adding value "mason"
   with keys ("as")
   to 3 partial solutions.
Returning 3 new partial solutions.
Adding value "ethan"
   with keys ("an" "ha")
   to 3 partial solutions.
Returning 6 new partial solutions.
Adding value "noah"
   with keys ("ah" "oa" "no")
   to 6 partial solutions.
Returning 18 new partial solutions.
Adding value "william"
   with keys ("lia" "lli" "ill" "am" "w")
   to 18 partial solutions.
Returning 90 new partial solutions.
Adding value "liam"
   with keys ("lia" "am")
   to 90 partial solutions.
Returning 180 new partial solutions.
Adding value "jayden"
   with keys ("en" "de" "yd" "ay" "j")
   to 180 partial solutions.
Returning 900 new partial solutions.
Adding value "michael"
   with keys ("ae" "ha" "c")
   to 900 partial solutions.
Returning 2700 new partial solutions.
Adding value "alexander"
   with keys ("de" "nd" "an" "le" "al" "r" "x")
   to 2700 partial solutions.
Returning 18900 new partial solutions.
Adding value "aiden"
   with keys ("en" "de" "id")
   to 18900 partial solutions.
Returning 56700 new partial solutions.
;Value 41: (("de" "am" "ah" "ha" "as" "j")
            ("de" "am" "ah" "ha" "as" "j")
            ("de" "am" "oa" "ha" "as" "j")
            ("de" "am" "oa" "ha" "as" "j")
            ("de" "am" "no" "ha" "as" "j")
            ("de" "am" "no" "ha" "as" "j")
            ("de" "am" "ah" "ha" "as" "c")
            ("de" "am" "ah" "ha" "as" "c")
            ("de" "am" "oa" "ha" "as" "c")
            ("de" "am" "oa" "ha" "as" "c")
            ("de" "am" "no" "ha" "as" "c")
            ("de" "am" "no" "ha" "as" "c")
            ("de" "am" "ah" "an" "as" "c")
            ("de" "am" "ah" "an" "as" "c")
            ("en" "am" "ah" "an" "as" "c")
            ("de" "am" "oa" "an" "as" "c")
            ("de" "am" "oa" "an" "as" "c")
            ("en" "am" "oa" "an" "as" "c")
            ("de" "am" "no" "an" "as" "c")
            ("de" "am" "no" "an" "as" "c")
            ("en" "am" "no" "an" "as" "c"))
But you can see that we won't be able to go much beyond this because there are just too many combinations. We can cut down on the intermediate partial solutions by noticing that many of them are redundant. We don't need to keep partial solutions that cannot possibly lead to a shortest final solution. The various partial solutions each (potentially) match different sets of words. We only need keep the shortest solution for each different set of matched words. Furthermore, if a solution's matches are a superset of another's matches, and the other is the same length or longer, then the solution is dominated by the other and will always be at least the length of the longer.
(define (mini-golf1 winners losers)
  (cover1
   (make-dominant-ngram-table winners (delete-losing-superstrings winners losers))
   lowest-scoring))

(define (cover1 v-k-table lowest-scoring)
  (let ((empty-solution-set (list '())))

    (define (add-v-k-entry solution-set v-k-entry)
      (let ((value (car v-k-entry))
            (keys  (cdr v-k-entry)))

        (write-string "Adding value ") (write value) (newline)
        (write-string "   with keys ") (write keys) (newline)
        (write-string "   to ") (write (length solution-set))
        (write-string " partial solutions.") (newline)

        (let ((new-solutions
               (map make-new-solution (cartesian-product solution-set keys))))

          (let ((trimmed-solutions (trim-partial-solutions new-solutions)))

            (write-string "Returning ") (write (length trimmed-solutions))
            (write-string " of ") (write (length new-solutions))
            (write-string " new partial solutions.") (newline)

            trimmed-solutions))))

    (define (trim-partial-solutions partial-solutions)
      (let ((equivalent-solutions (collect-equivalent-partial-solutions partial-solutions)))
        (write-string "  Deleting ")
        (write (- (length partial-solutions) (length equivalent-solutions)))
        (write-string " equivalent partial solutions.")
        (newline)

        (remove-dominated-solutions
         (map lowest-scoring-equivalent-partial-solution equivalent-solutions))))

    (define (lowest-scoring-equivalent-partial-solution entry)
      (first (lowest-scoring (car entry))))

    (define (collect-equivalent-partial-solutions alist)
      ;; Add each entry in turn.
      (fold-left (lambda (equivalents partial-solution)
                   (add-equivalent-partial-solution
                    partial-solution
                    (partial-solution-matches partial-solution)
                    equivalents))
                 '() alist))

    (define (partial-solution-matches partial-solution)
      (keep-matching-items v-k-table
        (lambda (entry)
          (there-exists? partial-solution
                         (lambda (key) (member key (cdr entry)))))))

    (define (remove-dominated-solutions partial-solutions)
      (let ((before-length (length partial-solutions)))
        (let ((answer  (map car (fold-left (lambda (answer solution)
                                             (if (there-exists? answer (dominates-solution? solution))
                                                 answer
                                                 (cons solution answer)))
                                           '()
                                           (map (lambda (partial-solution)
                                                  (cons partial-solution (partial-solution-matches partial-solution)))
                                                partial-solutions)))))
          (let ((after-length (length answer)))
            (write-string "  Deleting ") (write (- before-length after-length))
            (write-string " dominated solutions.")
            (newline)
            answer))))

    (lowest-scoring
     (fold-left add-v-k-entry empty-solution-set v-k-table))))

(define (dominates-solution? solution)
  (let ((partial-solution (car solution))
        (solution-matches (cdr solution)))
    (lambda (other-solution)
      (let ((other-partial-solution (car other-solution))
            (other-matches (cdr other-solution)))
        (and (not (equal? solution-matches other-matches))
             (superset? other-matches solution-matches)
             (<= (score other-partial-solution) (score partial-solution)))))))

(define (add-equivalent-partial-solution solution value alist)
  (cond ((pair? alist)
         (let ((entry (car alist))
               (tail (cdr alist)))
           (let ((entry-solutions (car entry))
                 (entry-value (cdr entry)))
             (if (equal? value entry-value)
                 (if (member solution entry-solutions)
                     alist
                     (cons (cons (cons solution entry-solutions) value)
                           tail))
                 (cons entry (add-equivalent-partial-solution solution value tail))))))
        ((null? alist) (list (cons (list solution) value)))
        (else (improper-list-error 'collect-equivalents alist))))
1 ]=> (mini-golf1 winners losers)
Adding value "washington"
   with keys ("was" "to" "gt" "hi" "sh")
   to 1 partial solutions.
  Deleting 2 equivalent partial solutions.
  Removing 1 dominated solutions.
Returning 2 of 5 new partial solutions.
Adding value "adams"
   with keys ("ms" "am" "ad")
   to 2 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 2 dominated solutions.
Returning 4 of 6 new partial solutions.
Adding value "jefferson"
   with keys ("rs" "fe" "ff" "ef" "j")
   to 4 partial solutions.
  Deleting 12 equivalent partial solutions.
  Removing 4 dominated solutions.
Returning 4 of 20 new partial solutions.
Adding value "madison"
   with keys ("iso" "di" "ad" "ma")
   to 4 partial solutions.
  Deleting 2 equivalent partial solutions.
  Removing 2 dominated solutions.
Returning 12 of 16 new partial solutions.
Adding value "monroe"
   with keys ("nro" "onr" "oe")
   to 12 partial solutions.
  Deleting 24 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 12 of 36 new partial solutions.
Adding value "jackson"
   with keys ("ks" "ac" "j")
   to 12 partial solutions.
  Deleting 24 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 12 of 36 new partial solutions.
Adding value "van-buren"
   with keys ("ren" "ure" "bu" "va" "-")
   to 12 partial solutions.
  Deleting 36 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 24 of 60 new partial solutions.
Adding value "harrison"
   with keys ("iso" "ris" "rri" "arr" "har")
   to 24 partial solutions.
  Deleting 96 equivalent partial solutions.
  Removing 12 dominated solutions.
Returning 12 of 120 new partial solutions.
Adding value "polk"
   with keys ("olk" "po")
   to 12 partial solutions.
  Deleting 12 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 12 of 24 new partial solutions.
Adding value "taylor"
   with keys ("lo" "yl" "ta")
   to 12 partial solutions.
  Deleting 12 equivalent partial solutions.
  Removing 12 dominated solutions.
Returning 12 of 36 new partial solutions.
Adding value "pierce"
   with keys ("ier" "pie" "ce" "rc")
   to 12 partial solutions.
  Deleting 36 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 12 of 48 new partial solutions.
Adding value "buchanan"
   with keys ("na" "ch" "uc" "bu")
   to 12 partial solutions.
  Deleting 39 equivalent partial solutions.
  Removing 3 dominated solutions.
Returning 6 of 48 new partial solutions.
Adding value "lincoln"
   with keys ("inco" "col" "ln" "li")
   to 6 partial solutions.
  Deleting 15 equivalent partial solutions.
  Removing 6 dominated solutions.
Returning 3 of 24 new partial solutions.
Adding value "grant"
   with keys ("ant" "ra")
   to 3 partial solutions.
  Deleting 3 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 3 of 6 new partial solutions.
Adding value "hayes"
   with keys ("hay" "ye")
   to 3 partial solutions.
  Deleting 3 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 3 of 6 new partial solutions.
Adding value "garfield"
   with keys ("eld" "iel" "fi" "rf" "ga")
   to 3 partial solutions.
  Deleting 9 equivalent partial solutions.
  Removing 3 dominated solutions.
Returning 3 of 15 new partial solutions.
Adding value "cleveland"
   with keys ("ela" "vel" "lev")
   to 3 partial solutions.
  Deleting 3 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 6 of 9 new partial solutions.
Adding value "mckinley"
   with keys ("mck" "nl")
   to 6 partial solutions.
  Deleting 6 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 6 of 12 new partial solutions.
Adding value "roosevelt"
   with keys ("vel" "sev" "lt" "os" "oo")
   to 6 partial solutions.
  Deleting 24 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 6 of 30 new partial solutions.
Adding value "taft"
   with keys ("ft" "af" "ta")
   to 6 partial solutions.
  Deleting 12 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 6 of 18 new partial solutions.
Adding value "wilson"
   with keys ("ls")
   to 6 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 6 of 6 new partial solutions.
Adding value "harding"
   with keys ("ard" "har" "di")
   to 6 partial solutions.
  Deleting 12 equivalent partial solutions.
  Removing 2 dominated solutions.
Returning 4 of 18 new partial solutions.
Adding value "coolidge"
   with keys ("li" "oo")
   to 4 partial solutions.
  Deleting 4 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 4 of 8 new partial solutions.
Adding value "hoover"
   with keys ("oo" "ho")
   to 4 partial solutions.
  Deleting 4 equivalent partial solutions.
  Removing 2 dominated solutions.
Returning 2 of 8 new partial solutions.
Adding value "truman"
   with keys ("ma" "ru" "tr")
   to 2 partial solutions.
  Deleting 4 equivalent partial solutions.
  Removing 1 dominated solutions.
Returning 1 of 6 new partial solutions.
Adding value "eisenhower"
   with keys ("wer" "sen" "ise" "ow" "ho" "nh" "ei")
   to 1 partial solutions.
  Deleting 6 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 7 new partial solutions.
Adding value "kennedy"
   with keys ("ken" "dy" "ed" "nn")
   to 1 partial solutions.
  Deleting 3 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 4 new partial solutions.
Adding value "johnson"
   with keys ("hn" "oh" "j")
   to 1 partial solutions.
  Deleting 2 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 3 new partial solutions.
Adding value "nixon"
   with keys ("xo" "ix" "ni")
   to 1 partial solutions.
  Deleting 2 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 3 new partial solutions.
Adding value "carter"
   with keys ("car" "rt")
   to 1 partial solutions.
  Deleting 1 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 2 new partial solutions.
Adding value "reagan"
   with keys ("ga" "ag" "ea")
   to 1 partial solutions.
  Deleting 2 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 3 new partial solutions.
Adding value "bush"
   with keys ("sh" "us" "bu")
   to 1 partial solutions.
  Deleting 2 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 3 new partial solutions.
Adding value "clinton"
   with keys ("int" "to" "li")
   to 1 partial solutions.
  Deleting 2 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 3 new partial solutions.
Adding value "obama"
   with keys ("ma" "am" "ba" "ob")
   to 1 partial solutions.
  Deleting 3 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 4 new partial solutions.
;Value 47: (("rt" "ni" "nn" "ho" "ls" "nl" "vel" "ga" "ye" "ra" "li" "rc" "ta" "po" "har" "bu" "oe" "ma" "j" "ad" "sh"))
The cover procedure takes a table that maps values to the keys that cover them. If we can reduce the size of that table without changing the solution, we'll run faster. If there are two entries in the table such that the keys of one are a superset of the keys of the other, we can discard the superset: the smaller of the two entries will be in the solution, and any key that matches the smaller one will automatically match the larger one as well. Also, if two values have the same set of keys that match them, we need only include one of the values in the table.
(define (delete-dominated-values v-k-table)
  (let ((size-before (length v-k-table)))

    (define (dominated-value? entry)
      (let ((entry-value (car entry))
            (entry-keylist (cdr entry)))
        (there-exists? v-k-table
          (lambda (other-entry)
            (and (not (eq? entry other-entry))
                 (let ((other-value (car other-entry))
                       (other-keylist (cdr other-entry)))
                   (and (superset? entry-keylist other-keylist)
                        (not (equal? other-keylist entry-keylist)))))))))

    (define (equivalent-value-in-answer? answer entry)
      (let ((entry-value (car entry))
            (entry-keylist (cdr entry)))
        (there-exists? answer
          (lambda (other-entry)
            (let ((other-value (car other-entry))
                  (other-keylist (cdr other-entry)))
              (equal? entry-keylist other-keylist))))))

    (define (add-entry answer entry)
      (if (or (equivalent-value-in-answer? answer entry)
              (dominated-value? entry))
          answer
          (cons entry answer)))

    (let ((answer (fold-left add-entry '() v-k-table)))
      (write-string "Removed ") (write (- size-before (length answer)))
      (write-string " dominated and equivalent values.")
      (newline)
      answer)))

(define (superset? bigger smaller)
  (for-all? smaller (lambda (s) (member s bigger))))

(define (mini-golf2 winners losers)
  (cover1
   (delete-dominated-values
    (make-dominant-ngram-table winners (delete-losing-superstrings winners losers)))
   lowest-scoring))

;;;;;;;;
;; Delete dominated keys from the keylists.

(define (mini-golf3 winners losers)
  (cover1
   (delete-dominated-keys-and-values
    (make-dominant-ngram-table winners (delete-losing-superstrings winners losers))
    (lambda (left right)
      (or (< (string-length left) (string-length right))
          (and (= (string-length left) (string-length right))
               (string<? left right)))))
   lowest-scoring))

(define (delete-dominated-keys-and-values v-k-table better-key)
  (let ((before-size (fold-left * 1 (map length v-k-table))))
    (let ((new-table (delete-dominated-values
                      (delete-dominated-keys v-k-table better-key))))
      (let ((after-size (fold-left * 1 (map length new-table))))
        (if (= before-size after-size)
            v-k-table
            (delete-dominated-keys-and-values new-table better-key))))))

(define (delete-dominated-keys v-k-table better-key)
  (let ((all-keys (get-all-keys v-k-table)))

    (define (lookup-key key)
      (cons key
            (map car
                 (keep-matching-items v-k-table
                                      (lambda (v-k-entry)
                                        (member key (cdr v-k-entry)))))))

    (let ((k-v-table (map lookup-key all-keys)))

      (define (dominated-key? key)
        (let ((values (cdr (assoc key k-v-table))))
          (there-exists? k-v-table
                         (lambda (entry)
                           (let ((entry-key (car entry))
                                 (entry-values (cdr entry)))
                             (and (superset? entry-values values)
                                  (not (equal? values entry-values))
                                  (or (< (string-length entry-key) (string-length key))
                                      (and (= (string-length entry-key) (string-length key))
                                           (string<? entry-key key)))))))))

      (define (equivalent-key-in-answer? answer key)
        (let ((values (cdr (assoc key k-v-table))))
          (there-exists? answer
                         (lambda (entry-key)
                           (let ((entry-values (cdr (lookup-key entry-key))))
                             (equal? values entry-values))))))

      (define (add-keys answer key)
        (if (or (dominated-key? key)
                (equivalent-key-in-answer? answer key))
            answer
            (cons key answer)))

      (let ((good-keys (fold-left add-keys '() (sort all-keys better-key))))
        (write-string "Removed ") (write (- (length all-keys) (length good-keys)))
        (write-string " of ") (write (length all-keys)) (write-string " keys.")(newline)

        (map (lambda (entry)
               (cons (car entry)
                     (keep-matching-items (cdr entry) (lambda (key) (member key good-keys)))))
             v-k-table)))))

(define (get-all-keys v-k-table)
  (fold-left (lambda (answer entry)
               (fold-left (lambda (answer key)
                            (lset-adjoin equal? answer key))
                          answer
                          (cdr entry)))
             '()
             v-k-table))
Trimming the table this way helps a lot. We can now compute the dogs vs. cats.
1 ]=> (mini-golf3 dogs cats)

Removed 294 of 405 keys.
Removed 44 dominated and equivalent values.
Removed 25 of 93 keys.
Removed 15 dominated and equivalent values.
Removed 7 of 62 keys.
Removed 0 dominated and equivalent values.
Removed 0 of 55 keys.
Removed 0 dominated and equivalent values.
Adding value "BORZOIS"
   with keys ("OIS" "BOR" "RZ")
   to 1 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 3 of 3 new partial solutions.
Adding value "GIANT SCHNAUZERS"
   with keys ("SCH" "HN")
   to 3 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 6 of 6 new partial solutions.
Adding value "BASENJIS"
   with keys ("JI")
   to 6 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 6 of 6 new partial solutions.
Adding value "ENGLISH SETTERS"
   with keys ("TERS" "ETT")
   to 6 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 12 of 12 new partial solutions.
Adding value "JAPANESE CHIN"
   with keys ("CHI")
   to 12 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 12 of 12 new partial solutions.
Adding value "BOUVIERS DES FLANDRES"
   with keys ("S F" "DES" " DE" "IER" "FL" "VI")
   to 12 partial solutions.
  Deleting 8 equivalent partial solutions.
  Removing 8 dominated solutions.
Returning 56 of 72 new partial solutions.
Adding value "PEKINGESE"
   with keys ("EKI")
   to 56 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 56 of 56 new partial solutions.
Adding value "BELGIAN MALINOIS"
   with keys (" MAL" "OIS" "LG")
   to 56 partial solutions.
  Deleting 96 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 72 of 168 new partial solutions.
Adding value "GERMAN WIREHAIRED POINTERS"
   with keys ("TERS" "D P")
   to 72 partial solutions.
  Deleting 108 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 36 of 144 new partial solutions.
Adding value "CHOW CHOWS"
   with keys ("W ")
   to 36 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 36 of 36 new partial solutions.
Adding value "SAMOYEDS"
   with keys ("DS")
   to 36 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 36 of 36 new partial solutions.
Adding value "DOGUES DE BORDEAUX"
   with keys ("BOR" " DE" "GU")
   to 36 partial solutions.
  Deleting 88 equivalent partial solutions.
  Removing 2 dominated solutions.
Returning 18 of 108 new partial solutions.
Adding value "DALMATIANS"
   with keys ("ANS" "LM")
   to 18 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 36 of 36 new partial solutions.
Adding value "LHASA APSOS"
   with keys ("LH")
   to 36 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 36 of 36 new partial solutions.
Adding value "CANE CORSO"
   with keys (" COR" "ORS")
   to 36 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 72 of 72 new partial solutions.
Adding value "ALASKAN MALAMUTES"
   with keys (" MAL" "TES" "LAS" "KA")
   to 72 partial solutions.
  Deleting 184 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 104 of 288 new partial solutions.
Adding value "WHIPPETS"
   with keys ("IP")
   to 104 partial solutions.
  Deleting 0 equivalent partial solutions.
;GC #199: took:   0.20   (1%) CPU time,   0.10   (1%) real time; free: 16754359
  Removing 0 dominated solutions.
Returning 104 of 104 new partial solutions.
Adding value "SHIBA INU"
   with keys ("SHI" " I")
   to 104 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 208 of 208 new partial solutions.
Adding value "AKITAS"
   with keys ("AK")
   to 208 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 208 of 208 new partial solutions.
Adding value "RHODESIAN RIDGEBACKS"
   with keys ("DES" "DG" "OD")
   to 208 partial solutions.
  Deleting 304 equivalent partial solutions.
  Removing 144 dominated solutions.
Returning 176 of 624 new partial solutions.
Adding value "BICHONS FRISES"
   with keys ("S F" "FR")
   to 176 partial solutions.
  Deleting 224 equivalent partial solutions.
  Removing 16 dominated solutions.
Returning 112 of 352 new partial solutions.
Adding value "PAPILLONS"
   with keys ("API")
   to 112 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 112 of 112 new partial solutions.
Adding value "COLLIES"
   with keys ("IES")
   to 112 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 112 of 112 new partial solutions.
Adding value "VIZSLAS"
   with keys ("LAS" "IZ" "VI")
   to 112 partial solutions.
;GC #200: took:   0.10   (0%) CPU time,   0.10   (1%) real time; free: 16757322
  Deleting 272 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 64 of 336 new partial solutions.
Adding value "BRITTANYS"
   with keys ("ITT")
   to 64 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 64 of 64 new partial solutions.
Adding value "PUGS"
   with keys ("GS")
   to 64 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 64 of 64 new partial solutions.
Adding value "HAVANESE"
   with keys ("HAVANE")
   to 64 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 64 of 64 new partial solutions.
Adding value "COCKER SPANIELS"
   with keys ("ANI" "LS")
   to 64 partial solutions.
  Deleting 80 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 48 of 128 new partial solutions.
Adding value "MASTIFFS"
   with keys ("FS")
   to 48 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 48 of 48 new partial solutions.
Adding value "MALTESE"
   with keys ("TES" "LT")
   to 48 partial solutions.
  Deleting 72 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 24 of 96 new partial solutions.
Adding value "PEMBROKE WELSH CORGIS"
   with keys (" COR" "LS")
   to 24 partial solutions.
  Deleting 32 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 16 of 48 new partial solutions.
Adding value "BOSTON TERRIERS"
   with keys ("IER" " T")
   to 16 partial solutions.
  Deleting 24 equivalent partial solutions.
  Removing 4 dominated solutions.
Returning 4 of 32 new partial solutions.
Adding value "POMERANIANS"
   with keys ("ANS" "ANI")
   to 4 partial solutions.
  Deleting 6 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 2 of 8 new partial solutions.
Adding value "GREAT DANES"
   with keys ("GR")
   to 2 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 2 of 2 new partial solutions.
Adding value "DOBERMAN PINSCHERS"
   with keys ("SCH" " PI")
   to 2 partial solutions.
  Deleting 3 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 4 new partial solutions.
Adding value "SHIH TZU"
   with keys ("SHI" " T")
   to 1 partial solutions.
  Deleting 1 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 2 new partial solutions.
Adding value "ROTTWEILERS"
   with keys ("EI")
   to 1 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 1 new partial solutions.
Adding value "POODLES"
   with keys ("DL" "OD")
   to 1 partial solutions.
  Deleting 1 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 2 new partial solutions.
Adding value "BOXERS"
   with keys ("OX")
   to 1 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 1 new partial solutions.
Adding value "BEAGLES"
   with keys ("AGL")
   to 1 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 1 new partial solutions.
Adding value "LABRADOR RETRIEVERS"
   with keys ("VE")
   to 1 partial solutions.
  Deleting 0 equivalent partial solutions.
  Removing 0 dominated solutions.
Returning 1 of 1 new partial solutions.
;Value 50: (("VE" "AGL" "OX" "EI" "GR" " T" "FS" "LS" "HAVANE" "GS" "ITT" "IES" "API" "FR" "OD" "AK" " I" "IP" "TES" "ORS" "LH" "ANS" "GU" "DS" "W " "EKI" "VI" "CHI" "TERS" "JI" "SCH" "OIS"))
We appear to have the substring version of regex golf under control. Can we extend it to actual regular expressions? Of course we can. In the next installment...

Friday, October 18, 2013

Unexciting code

Source code control isn't supposed to be interesting or exciting for the user. Blogging about source code control isn't going to be interesting or surprising for the reader, either. I'll spare you the walkthrough. You're old enough to figure it out without me holding your hand. It'll be more interesting if I blog about the problems and bugs we encountered.

Conman was the configuration manager built on the core ChangeSafe code. The master catalog in conman holds the highest level objects being managed.
(defclass master-catalog ()
  ;; subsystems and products (PC's) in this repository
  ((products   :initform nil
               :version-technique :composite-set
               :accessor master-catalog/products)


   (subsystems :initform nil
               :version-technique :composite-set
               :accessor master-catalog/subsystems)

   ;; All satellite repositories known to this master.  We don't allow
   ;; satellite repository names to change, though the projects they
   ;; contain may be renamed.  **WARNING** BE CAREFUL IF YOU OPERATE
   ;; in non-latest metaversion views of this list or you might try to
   ;; create a satellite which already exists.  Only update this list
   ;; using the latest metaversion.
   (satellite-repositories :initform nil :version-technique :composite-set)

   ;; Projects (a.k.a. ChangeSafe "classes") contained in satellite
   ;; repositories.  The descriptors contain the key mapping from a
   ;; project name to a satellite-repository-name.  We could almost
   ;; make this just a (project-name . satellite-name) mapping, but we
   ;; need to version project-name, and we also want to cache in the
   ;; master some information in the satellites so that we don't
   ;; always have to examine the satellites for often accessed
   ;; information.
   (classes :accessor master-catalog/classes
            :initform nil
            :version-technique :composite-set)

   ;; Cset-relationship-tuples is conceptually a list of sublists,
   ;; where each sublist is a tuple.  For every master cid which
   ;; results in the creation of satellite cids, a tuple is added
   ;; which enumerates the master cid and the satellite cids which it
   ;; caused to be created.  e.g. '((master.cid.1 satellite-1.cid.1
   ;; satellite-2.cid.1)) Because we want portable references, blah
   ;; blah blah, we actually reference DIDS of CHANGE-SET objects
   ;; rather than the cids.  We may instead wish to store CID-OBJECT
   ;; references.  TBD.

   ;; Right now, this information is maintained only for change
   ;; transactions which arise from WITH-CM-MASTER-TXN and
   ;; WITH-CM-SATELLITE-TXN.  This is ok, since those are the
   ;; interesting txns which manipulate satellite databases.

   ;; Note that because of the high volume of csets we expect to
   ;; track, we actually represent this information as a vector of
   ;; vectors to achieve space compaction.
   (cset-relationship-tuples :initform (make-instance 'persistent-vector
                                                           :initial-element nil
                                                           :size 1)
                             :version-technique :nonversioned)

   (cset-rel-tuples-index :initform (make-instance 'persistent-vector
                                                        :initial-element -1
                                                        :size 1)
                          :version-technique :nonversioned)

   ;; BOTH these slots are updated ONLY by vm-txn-note-change-set,
   ;; except for schema upgrading.

   ;; The cset-rel-tuples-index slot is a conceptual hash table into the
   ;; cset-relationship-tuples slot. This is used by
   ;; master-catalog-lookup-cset-relationship
   ;; to avoid an extremely costly linear search of cset-relationship-tuples.
   ;; This is very important for cset_add, cset_remove, and csets_from_file.

   ;; The idea is that the did-string of the master-cid's did is hashed.
   ;; Reducing that hash modulo the number of entries in cset-rel-tuples-index,
   ;; finds a "home" index of cset-rel-tuples-index. Using the sb32 value
   ;; in that element, we either have a -1 (so the entry is not in the
   ;; hash table) or we get an index into cset_relationship_tuples.
   ;; If there is no hash collision, that element of cset_relationship_tuples
   ;; will contain the desired master-cid did we are looking for. If it
   ;; isn't the one we want, we have had a hash collision, and we resolve it
   ;; by linear probing in the next-door (circularly) element of
   ;; cset-rel-tuples-index.
   ;; The number of elements of cset-rel-tuples-index is always a prime number,
   ;; and is also maintained to be more than twice as large as the number of
   ;; entries in cset-relationship-tuples. That is important, to prevent
   ;; clustering and slow searching. So when it grows, cset-rel-tuples-index
   ;; grows by a reasonable factor (about 2) so that it always contains
   ;; at least half "holes", that is, -1.  Further, we want to avoid frequent
   ;; growth, because growing requires computing every entry in the hash table
   ;; again. That makes for a big transaction, as every element of the
   ;; cid-relationship-tuple vector has to be mapped in, and rehashed with
   ;; the new size of cset-rel-tuples-index.
   ;; Space considerations: In Jack's db, there are roughly 40,000 elements
   ;; currently in the cset-relationship-tuples.  Suppose we had 100,000
   ;; elements. In Jack's db, it appears that the tuples are about 2 elements
   ;; each, average. Suppose it were 9. Then the tuples would take 4*(1+9)=40
   ;; bytes each, so 40*100,000 = 4Mb total (plus another 400,000 for the
   ;; cset-relationship-tuples vector itself).  This is large, but not likely
   ;; to be a cause of breakage anytime soon.


   ;; The cset-name-hashtable maps cset names to csets.  While the HP
   ;; model of ChangeSafe doesn't allow changing the name of a cset,
   ;; we allow this in general.  So this hash table is keyed by cset
   ;; name, and valued by all csets which EVER bore that name in their
   ;; versioned name component.  The hash value is therefore a list.
   ;; In the case of system augmented names (by
   ;; change_create/master_change), there shouldn't be any collisions.
   ;; We also use this slot to hash unaugmented user names to csets,
   ;; and those are far more likely to have collisions (one key ->
   ;; multiple csets).  In the case of un-augmented names, this is
   ;; expected. In the case of augmented names, this is an error.
   (cset-name-hashtable :version-technique nil
                        :initform (make-instance 'persistent-hash-table :size 1023)
                        :reader master-catalog/cset-name-hashtable)
   )
  (:documentation "Catalog/hierarchy-root of versioned information maintained in the master repository.")
  (:metaclass versioned-standard-class)
  (:schema-version 0))
Pretty straightforward, no? No? Let's list the products in the master catalog:
(defun cmctl/list-products (conman-request)
  "cheesy hack to list the products"
  (let ((master-repository-name (conman-request/repository-dbpath conman-request))
        (reason "list products")
        (userid (conman-request/user-name conman-request)))
    (call-with-master-catalog-transaction
     master-repository-name
     userid
     :master-metaversion :latest-metaversion
     :version :latest-version
     :reason reason
     :transaction-type :read-only
     :receiver (lambda (master-repository master-transaction master-catalog)
                 (declare (ignore master-repository master-transaction))
                 (collect 'list
                          (map-fn 't (lambda (product)
                                       (list (distributed-object-identifier product)
                                             (named-object/name product)
                                             (described-object/description product)))
                                  (master-catalog/scan-products master-catalog)))))))
That isn't very edifying.

Start from the end:
(master-catalog/scan-products master-catalog)
defined as
(defun master-catalog/scan-products (master-catalog)
  (declare (optimizable-series-function))
  (versioned-object/scan-composite-versioned-slot master-catalog 'products))
The optimizable-series-function declaration indicates that we are using Richard Waters's series package. This allows us to write functions that can be assembled into an efficient pipeline for iterating over a collection of objects. This code:
(collect 'list
   (map-fn 't (lambda (product)
                (list (distributed-object-identifier product)
                      (named-object/name product)
                      (described-object/description product)))
           (master-catalog/scan-products master-catalog)))
takes each product in turn, creates a three element list of the identifier, the project name, and the product description, and finally collects the three-tuples in a list to be returned to the caller. Here is what it looks like macroexpanded:
(COMMON-LISP:LET* ((#:OUT-917 MASTER-CATALOG))
  (COMMON-LISP:LET (#:OUT-914)
    (SETQ #:OUT-914 'PRODUCTS)
    (COMMON-LISP:LET (#:OUT-913 #:OUT-912)
      (SETQ #:OUT-913 (SLOT-VALUE-UNVERSIONED #:OUT-917 #:OUT-914))
      (SETQ #:OUT-912
              (IF *VERSIONED-VALUE-CID-SET-OVERRIDE*
                  (PROGN
                   (DEBUG-MESSAGE 4 "Using override cid-set to scan slot ~s"
                    #:OUT-914)
                   *VERSIONED-VALUE-CID-SET-OVERRIDE*)
                  (TRANSACTION/CID-SET *TRANSACTION*)))
      (COMMON-LISP:LET (#:OUT-911 #:OUT-910 #:OUT-909)
        (MULTIPLE-VALUE-SETQ (#:OUT-911 #:OUT-910 #:OUT-909)
          (CVI-GET-ION-VECTOR-AND-INDEX #:OUT-913 #:OUT-912))
        (IF #:OUT-911
            NIL
            (PROGN
             (IF (COMMON-LISP:LET ((#:G717-902 #:OUT-910))
                   (IF #:G717-902
                       #:G717-902
                       (THE T (CVI/DEFAULT-ALLOWED #:OUT-913))))
                 NIL
                 (PROGN
                  (SLOT-UNBOUND (CLASS-OF #:OUT-917) #:OUT-917 #:OUT-914)))))
        (DEBUG-MESSAGE 5 "Active ion vector for retrieval:  ~s" #:OUT-911)
        (COMMON-LISP:LET (#:OUT-908)
          (SETQ #:OUT-908
                  (IF #:OUT-911
                      #:OUT-911
                      (THE T #())))
          (COMMON-LISP:LET (#:ELEMENTS-907
                            (#:LIMIT-905 (ARRAY-TOTAL-SIZE #:OUT-908))
                            (#:INDEX-904 -1)
                            (#:INDEX-903 (- -1 1))
                            #:ITEMS-915
                            #:ITEMS-900
                            (#:LASTCONS-897 (LIST NIL))
                            #:LST-898)
            (DECLARE (TYPE SERIES::VECTOR-INDEX+ #:LIMIT-905)
                     (TYPE SERIES::-VECTOR-INDEX+ #:INDEX-904)
                     (TYPE INTEGER #:INDEX-903)
                     (TYPE CONS #:LASTCONS-897)
                     (TYPE LIST #:LST-898))
            (SETQ #:LST-898 #:LASTCONS-897)
            (TAGBODY
             #:LL-918
              (INCF #:INDEX-904)
              (LOCALLY
               (DECLARE (TYPE SERIES::VECTOR-INDEX+ #:INDEX-904))
               (IF (= #:INDEX-904 #:LIMIT-905)
                   (GO SERIES::END))
               (SETQ #:ELEMENTS-907
                       (ROW-MAJOR-AREF #:OUT-908
                                       (THE SERIES::VECTOR-INDEX
                                            #:INDEX-904))))
              (INCF #:INDEX-903)
              (IF (MINUSP #:INDEX-903)
                  (GO #:LL-918))
              (SETQ #:ITEMS-915
                      ((LAMBDA (ION-SOUGHT)
                         (CVI-INSERTION-RECORD/GET-VALUE-FOR-ION
                          (SVREF #:OUT-909 ION-SOUGHT) ION-SOUGHT))
                       #:ELEMENTS-907))
              (SETQ #:ITEMS-900
                      ((LAMBDA (PRODUCT)
                         (LIST (DISTRIBUTED-OBJECT-IDENTIFIER PRODUCT)
                               (NAMED-OBJECT/NAME PRODUCT)
                               (DESCRIBED-OBJECT/DESCRIPTION PRODUCT)))
                       #:ITEMS-915))
              (SETQ #:LASTCONS-897
                      (SETF (CDR #:LASTCONS-897) (CONS #:ITEMS-900 NIL)))
              (GO #:LL-918)
             SERIES::END)
            (CDR #:LST-898)))))))
To be continued...

Sunday, October 13, 2013

Next step

We build a simple project/branch/version hierarchical abstraction, and we implement it on top of the core layer.

  • a project is a collection of branches that represent alternative ways the state evolves. Every project has a main branch.
  • A branch is a collection of versions that represent the evolution of the branch state over time.
  • A version is a collection of change-sets with some trappings.
  • A change-set is our unit of change.
When we do a read/write transaction, we'll add a new change set to the repository. Read-only transactions will specify a version for reading the core objects. Or two versions for generating diffs. Or maybe even three versions for a three-way merge.

Under this development model, developers will sync their workspace with the most chronologically recent version of the main branch. Their new change sets will be visible only to them, unless (until, we hope) they are "promoted" into the development branch.

We wanted to encourage frequent incremental check-ins. We wanted hook this up to Emacs autosave. Frequent check-ins of much smaller diffs breaks even.

Once you get to the point where your code passes all the tests, you promote it into the branch so everyone can use it. Every now and then the admins will "fork a relase", do some "cherrypicks" and make a release branch in the project.

Once you get to the point where your code passes all the tests, you promote it into the branch so everyone can use it. Every now and then the admins will "fork a relase", do some "cherrypicks" and make a release branch in the project.

The core code does all the heavy lifting, this is window dressing. The style: minimalist. Skin the change model.

This is turning into the code equivalent of a power-point lecture. You can read it yourself, I'm not going to walk you through it.

So that's it? Is that all there is? Many source code or change management systems do something more or less similar to this with files and directory trees instead of CLOS objects. Cute hack, but why all the fuss? Hasn't this been done?

If it seems obvious to you how we'd implement some of the usual source code control operations, good. We don't have to train you.

I wont insult your intelligence explaining in detail how to do a rollback by removing a change-set from a version. Use SETF, of course.

I hope nobody notices the 300 lb chicken sitting next to that shady-looking egg in the corner.

And something for philosopher/implementors to worry about: If I demote the change-set that instantiates an object, where does the object go? Is it possible create a reference to an uninstantiated object? What happens if you try to follow it?


Monday, September 23, 2013

Putting it all together

A ChangeSafe repository is implemented as a transient wrapper object around a persistent object. The wrapper object caches some immutable metadata. You'd hate to have to run a transaction in the middle of the print function in order to print the repository name. The wrapper also contains metadata associated with the backing store that the repository is using.

Oh yeah, there is something interesting going on in the wrapper, We keep track of the ongoing transactions by mapping the user-id to a list of transaction contexts (every nested transaction by a user "pushes" a new txn-context).

Anyway, it's the repository-persistent-information that has the interesting stuff:
(defclass repository-persistent-information ()
  (
   (type  :initarg :type
          :initform (error "Required initarg :type omitted.")
          :reader repository-persistent-information/type
          :type repository-type)

   ;; Database parent is the root extent for an extent database, or the master database for a satellite.
   ;; Root extents or master repositories won't have a parent
   (parent-repository :initarg :parent-repository
                      :initform nil
                      :reader repository-persistent-information/parent
                      :type (optional relative-pathname))

   ;; Satellite repositories is non-nil only for master repositories.
   (satellite-repositories :initform nil
                           :initarg :satellite-repositories
                           :accessor repository-persistent-information/satellite-repositories)

   (canonical-class-dictionary :initform (make-instance 'canonical-class-dictionary)
                               :reader repository-persistent-information/canonical-class-dictionary)
   (cid-master-table :initform (make-instance 'cid-master-table)
                     :reader repository-persistent-information/cid-master-table)
   (root-mapper  :initarg :root-mapper
                 :initform (error "Required initarg :root-mapper omitted.")
                 :reader repository-persistent-information/root-mapper)
   (cid-mapper   :initarg :cid-mapper
                 :initform (error "Required initarg :cid-mapper omitted.")
                 :reader repository-persistent-information/cid-mapper)
   (local-mapper :initarg :local-mapper
                 :initform (error "Required initarg :local-mapper omitted.")
                 :reader repository-persistent-information/local-mapper)
   (locally-named-roots :initarg :locally-named-roots
                        :initform (error "Required initarg :locally-named-roots omitted.")
                        :reader repository-persistent-information/locally-named-roots)
   (anonymous-user :initarg :anonymous-user
                   :initform nil
                   :reader repository-persistent-information/anonymous-user))
  (:default-initargs :node-id +object-id-of-root+)  ;; force this to always be the root object.
  (:documentation "Persistent information describing a repositiory, and stored in the repository")
  (:metaclass persistent-standard-class)
  (:schema-version 0))

The repository-type is just a keyword:
(defconstant *repository-types* '(:basic :master :satellite :transport :extent :workspace)
  "Type of repositories.  Note that all but :EXTENT types of repositories
   serve as root extents for databases which have multiple extents, and therefore imply extent.")
The parent-repository and the
satellite-repositories are for juggling multiple "satellite" repositories for holding particular subsets of changes (for, say, geographically distributing the servers for different product groups).

The canonical-class-dictionary is an intern table for objects.

The cid-master-table is (logically) the collection of audit-records. A CID (after change id) is represented as an integer index into the master table.

The root-mapper is a mapping table from distributed identifiers to objects.

The cid-mapper is a mapping table from the distributed identifier that represents the CID to the integer index of that CID in the master table. It is a subtable of the local mapper.

The local-mapper is submapping of the root-mapping, but a supermapping of the cid-mapper.

The locally-named-rootsis a hash table for storing the root objects of the repository.

Finally, there is the anonymous-user slot, which is the user id assigned for bootstrapping.

And all this crap is in support of this procedure:
(defun call-with-repository-transaction (&key repository 
                                              transaction-type
                                              user-id-specifier
                                              reason

                                              ;; generally, you only want to specify these two
                                              meta-cid-set-specifier
                                              cid-set-specifier 
                                              ;; but if you are doing a comparison,
                                              ;; specify these as well
                                              aux-meta-cid-set-specifier
                                              aux-cid-set-specifier 

                                              receiver)
  (check-type user-id-specifier (or keyword distributed-identifier))
  (check-type transaction-type repository-transaction-type)
  (check-type reason string)
  ;; implementation omitted for brevity, ha ha
  )

Naturally we need to specify the :repository, the :transaction-type is one of
(defconstant *repository-transaction-types* '(:read-only
                                              :read-write
                                              :read-cons
                                              :read-only-compare
                                              :read-cons-nonversioned
                                              :read-only-nonversioned
                                              :read-write-nonversioned))
The :user-id-specifier should be a distributed-identifier of a core-user instance.

The :reason is a human readable string describing the transaction.

The :meta-cid-set-specifier is mumble, mumble... just a sec...

The :cid-set-specifier is how you specify which CIDs will form the basis view for the transaction. We allow this to be a procedure that returns a cid-set object, and we will call this procedure as we are setting up the transaction and use the :meta-cid-set-specifier to specify the CIDs to form the versioned view the procedure will see.

The :meta-cid-set-specifier can be the symbol :latest-metaversion, a timestamp, or a cid-set. :latest-metaversion means to use all CIDS while resolving the :cid-set-specifier, a timestamp is useful for rewinding the world, and the main use for using an explicit cid-set is for synchronizing views between master and satellite repositories.

The :receiver is invoked within the dynamic extent of a transaction. It is passed a core-txn object that contains the metadata associated with the transaction.

The ChangeSafe core components are the repository that holds changes and associated meta-information, and simple versioned CLOS objects. It is only useful as a foundation layer, though.

Next up, another level of abstraction...

Tuesday, September 10, 2013

Mix in a little CLOS

The obvious idea here is to make CLOS objects where the slots are implemented as versioned value objects. Then we override slot-value-using-class. You might consider this a stupid CLOS trick. You could just as well establish an abstraction layer through other means, but the point is to create an understandable abstraction model. It is easy to understand what is going to happen if we override slot-value-using-class.

We use the MOP to create a new kind of slot so that we can compose values on the fly when the programmer calls slot-value-using-class. We also override (setf slot-value-using-class) so that it calls the "diff" computing code. Again, the point is to make it easy to understand what is happening.

The end result is the versioned-standard-object. An instance of a versioned-standard-object (or any of it's inheritors, naturally), has all its slots implemented versioned value objects. The programmer should specify versioned-standard-class as the metaclass in the class definition.

(defclass test-class ()
  ((nvi-slot  :version-technique :nonlogged
              :accessor test-class/nvi-slot)
   (lnvi-slot :version-technique :logged
              :accessor test-class/lnvi-slot)
   (svi-slot  :version-technique :scalar
              :accessor test-class/svi-slot))
  (:metaclass versioned-standard-class)
  (:schema-version 0))
In this example, the test class has some of the different kinds of versioned values that are named by the version technique. A :nonlogged slot is the "escape mechanism". It's a fancy name for "Just turn off the versioning, and use this here value."

A :logged slot is less drastic. There's no versioning behavior, it's just a persistent slot, but we'll keep a list of the transactions that modified it.

Finally, the :scalar version technique is one where the last chronologically participating change has the value.

A versioned slot using the :composite-sequence uses a set of diffs to represent the versioned slot value, and these are composed as described in an earlier post.
(defclass test-cvi-class ()
  ((cvi-slot-a :version-technique :composite-sequence
               :accessor test-cvi-class/cvi-slot-a)
   (cvi-slot-b :version-technique :composite-sequence)
   (cvi-slot-c :version-technique :composite-sequence :initform nil))
  (:metaclass versioned-standard-class)
  (:schema-version 0))

Once this is working, we have what we need to bootstrap the rest of the versioned storage.