Introducing term enumeration.

This commit introduces `leon.purescala.DataGen`, an object that contains two static methods; `generate` and `findModels`. `generate` is a term generator based on composition of streams. It can generate hundreds of instances of recursive types in less than a tenth of a second. `findModels` leverages `generate` to perform (small-)model finding. Pass it an expression and an evaluator (preferably `CodeGenEvaluator`) and watch it find models to your formula. The commit also includes a small regression test suite. (Note that although we have currently no use for this, the `generate` function can in principle be used to generate arbitrary terms. E.g. you could pass variables as fixed generators for certain types.)

Introducing term enumeration.
8dee703b · Philippe Suter · 63477d6c · 8dee703b · 8dee703b
Commit 8dee703b authored 12 years ago by Philippe Suter
--- a/src/main/scala/leon/purescala/DataGen.scala
+++ b/src/main/scala/leon/purescala/DataGen.scala
+package leon
+package purescala
+
+import purescala.Common._
+import purescala.Trees._
+import purescala.TreeOps._
+import purescala.TypeTrees._
+import purescala.Definitions._
+
+import evaluators._
+
+import scala.collection.mutable.{Map=>MutableMap}
+
+/** Utility functions to generate values of a given type.
+  * In fact, it could be used to generate *terms* of a given type,
+  * e.g. by passing trees representing variables for the "bounds". */
+object DataGen {
+  private val defaultBounds : Map[TypeTree,Seq[Expr]] = Map(
+    Int32Type -> Seq(IntLiteral(0), IntLiteral(1), IntLiteral(-1), IntLiteral(2))
+  )
+
+  private val boolStream : Stream[Expr] =
+    Stream.cons(BooleanLiteral(true), Stream.cons(BooleanLiteral(false), Stream.empty))
+
+  private def intStream : Stream[Expr] = Stream.cons(IntLiteral(0), intStream0(1))
+  private def intStream0(n : Int) : Stream[Expr] = Stream.cons(IntLiteral(n), intStream0(if(n > 0) -n else -(n-1)))
+
+  private def natStream : Stream[Expr] = natStream0(0)
+  private def natStream0(n : Int) : Stream[Expr] = Stream.cons(IntLiteral(n), natStream0(n+1))
+
+  // TODO can we cache something, maybe? It seems like every type should correspond to a unique stream?
+  // We should make sure the cache depends on the bounds (i.e. is not reused for different bounds.)
+  def generate(tpe : TypeTree, bounds : Map[TypeTree,Seq[Expr]] = defaultBounds) : Stream[Expr] = bounds.get(tpe).map(_.toStream).getOrElse {
+    tpe match {
+      case BooleanType =>
+        boolStream
+
+      case Int32Type =>
+        intStream
+
+      case TupleType(bses) =>
+        naryProduct(bses.map(generate(_, bounds))).map(Tuple)
+
+      case act : AbstractClassType =>
+        // We prioritize base cases among the children.
+        // Otherwise we run the risk of infinite recursion when
+        // generating lists.
+        val ccChildren = act.classDef.knownChildren.collect(_ match {
+            case ccd : CaseClassDef => ccd
+          }
+        )
+        val (leafs,conss) = ccChildren.partition(_.fields.size == 0)
+
+        // The stream for leafs...
+        val leafsStream = leafs.toStream.flatMap { ccd =>
+          generate(classDefToClassType(ccd), bounds)
+        }
+
+        // ...to which we append the streams for constructors.
+        leafsStream.append(interleave(conss.map { ccd =>
+          generate(classDefToClassType(ccd), bounds)
+        }))
+
+      case cct : CaseClassType =>
+        val ccd = cct.classDef
+        if(ccd.fields.isEmpty) {
+          Stream.cons(CaseClass(ccd, Nil), Stream.empty)
+        } else {
+          val fieldTypes = ccd.fields.map(_.tpe)
+          val subStream = naryProduct(fieldTypes.map(generate(_, bounds)))
+          subStream.map(prod => CaseClass(ccd, prod))
+        }
+
+      case _ => Stream.empty
+    }
+  }
+
+  def findModels(expr : Expr, evaluator : Evaluator, maxModels : Int, maxTries : Int, bounds : Map[TypeTree,Seq[Expr]] = defaultBounds) : Stream[Map[Identifier,Expr]] = {
+    val freeVars : Seq[Identifier] = variablesOf(expr).toSeq
+
+    evaluator.compile(expr, freeVars).map { evalFun =>
+      val sat = EvaluationResults.Successful(BooleanLiteral(true))
+
+      naryProduct(freeVars.map(id => generate(id.getType, bounds)))
+        .take(maxTries)
+        .filter(s => evalFun(s) == sat)
+        .take(maxModels)
+        .map(s => freeVars.zip(s).toMap)
+
+    } getOrElse {
+      Stream.empty
+    }
+  }
+
+  // The following are stream utilities.
+
+  // Takes a series of streams and merges them, round-robin style.
+  private def interleave[T](streams : Seq[Stream[T]]) : Stream[T] = int0(streams)
+  private def int0[T](streams : Seq[Stream[T]]) : Stream[T] = {
+    var ss = streams
+    while(!ss.isEmpty && ss.head.isEmpty) {
+      ss = ss.tail
+    }
+    if(ss.isEmpty) return Stream.empty
+    if(ss.size == 1) return ss(0)
+    // TODO: This circular-shifts the list. I'd be interested in a constant time
+    // operation. Perhaps simply by choosing the right data-structure?
+    Stream.cons(ss.head.head, interleave(ss.tail :+ ss.head.tail))
+  }
+
+  // Takes a series of streams and enumerates their cartesian product.
+  private def naryProduct[T](streams : Seq[Stream[T]]) : Stream[List[T]] = {
+    val dimensions = streams.size
+
+    if(dimensions == 0)
+      return Stream.cons(Nil, Stream.empty)
+
+    if(streams.exists(_.isEmpty))
+      return Stream.empty
+
+    val indices = if(streams.forall(_.hasDefiniteSize)) {
+      val max = streams.map(_.size).max
+      diagCount(dimensions).take(max)
+    } else {
+      diagCount(dimensions)
+    }
+
+    var allReached : Boolean = false
+    val bounds : Array[Int] = Array.fill(dimensions)(Int.MaxValue)
+
+    indices.takeWhile(_ => !allReached).flatMap { indexList =>
+      var d = 0
+      var continue = true
+      var is = indexList
+      var ss = streams.toList
+
+      if(indexList.sum >= bounds.max) {
+        allReached = true
+      }
+
+      var tuple : List[T] = Nil
+
+      while(continue && d < dimensions) {
+        var i = is.head
+        if(i > bounds(d)) {
+          continue = false
+        } else try {
+          // TODO can we speed up by caching the random access into
+          // the stream in an indexedSeq? After all, `i` increases
+          // slowly.
+          tuple = (ss.head)(i) :: tuple
+          is = is.tail
+          ss = ss.tail
+          d += 1
+        } catch {
+          case e : IndexOutOfBoundsException =>
+            bounds(d) = i - 1
+            continue = false
+        }
+      }
+      if(continue) Some(tuple.reverse) else None
+    }
+  }
+
+  // Utility function for n-way cartesian product.
+  // Counts over indices in `dim` dimensions in a fair, diagonal, way.
+  private def diagCount(dim : Int) : Stream[List[Int]] = diag0(dim, 0)
+  private def diag0(dim : Int, nextSum : Int) : Stream[List[Int]] = summingTo(nextSum, dim).append(diag0(dim, nextSum + 1))
+  // All tuples of dimension `n` whose sum is `sum`.
+  private def summingTo(sum : Int, n : Int) : Stream[List[Int]] = {
+    // assert(sum >= 0)
+    if(sum < 0) {
+      Stream.empty
+    } else if(n == 1) {
+      Stream.cons(sum :: Nil, Stream.empty) 
+    } else {
+      (0 to sum).toStream.flatMap(fst => summingTo(sum - fst, n - 1).map(fst :: _))
+    }
+  }
+}
--- a/src/test/scala/leon/test/purescala/DataGen.scala
+++ b/src/test/scala/leon/test/purescala/DataGen.scala
+package leon.test
+package purescala
+
+import leon._
+import leon.plugin.{TemporaryInputPhase,ExtractionPhase}
+
+import leon.purescala.Common._
+import leon.purescala.Trees._
+import leon.purescala.Definitions._
+import leon.purescala.TypeTrees._
+import leon.purescala.DataGen._
+
+import leon.evaluators._
+
+import org.scalatest.FunSuite
+
+class DataGen extends FunSuite {
+  private implicit lazy val leonContext = LeonContext(
+    settings = Settings(
+      synthesis = false,
+      xlang     = false,
+      verify    = false
+    ),
+    files = List(),
+    reporter = new SilentReporter
+  )
+
+  private def parseString(str : String) : Program = {
+    val pipeline = TemporaryInputPhase andThen ExtractionPhase
+
+    val errorsBefore   = leonContext.reporter.errorCount
+    val warningsBefore = leonContext.reporter.warningCount
+
+    val program = pipeline.run(leonContext)((str, Nil))
+  
+    assert(leonContext.reporter.errorCount   === errorsBefore)
+    assert(leonContext.reporter.warningCount === warningsBefore)
+
+    program
+  }
+
+  test("Booleans") {
+    generate(BooleanType).toSet.size === 2
+    generate(TupleType(Seq(BooleanType,BooleanType))).toSet.size === 4
+  }
+
+  test("Lists") {
+    val p = """|object Program {
+               |  sealed abstract class List
+               |  case class Cons(head : Int, tail : List) extends List
+               |  case object Nil extends List
+               |
+               |  def size(lst : List) : Int = lst match {
+               |    case Cons(_, xs) => 1 + size(xs)
+               |    case Nil => 0
+               |  }
+               |
+               |  def isSorted(lst : List) : Boolean = lst match {
+               |    case Nil => true
+               |    case Cons(_, Nil) => true
+               |    case Cons(x, xs @ Cons(y, ys)) => x < y && isSorted(xs)
+               |  }
+               |
+               |  def content(lst : List) : Set[Int] = lst match {
+               |    case Nil => Set.empty[Int]
+               |    case Cons(x, xs) => Set(x) ++ content(xs)
+               |  }
+               |
+               |  def insertSpec(elem : Int, list : List, res : List) : Boolean = {
+               |    isSorted(res) && content(res) == (content(list) ++ Set(elem))
+               |  }
+               |}""".stripMargin
+
+    val prog = parseString(p)
+
+    val listType : TypeTree = classDefToClassType(prog.mainObject.classHierarchyRoots.head)
+    val sizeDef    : FunDef = prog.definedFunctions.find(_.id.name == "size").get
+    val sortedDef  : FunDef = prog.definedFunctions.find(_.id.name == "isSorted").get
+    val contentDef : FunDef = prog.definedFunctions.find(_.id.name == "content").get
+    val insSpecDef : FunDef = prog.definedFunctions.find(_.id.name == "insertSpec").get
+
+    val consDef : CaseClassDef = prog.mainObject.caseClassDef("Cons")
+
+    generate(listType).take(100).toSet.size === 100
+
+    val evaluator = new CodeGenEvaluator(leonContext, prog)
+
+    val a = Variable(FreshIdentifier("a").setType(Int32Type))
+    val b = Variable(FreshIdentifier("b").setType(Int32Type))
+    val x = Variable(FreshIdentifier("x").setType(listType))
+    val y = Variable(FreshIdentifier("y").setType(listType))
+
+    val sizeX    = FunctionInvocation(sizeDef, Seq(x))
+    val contentX = FunctionInvocation(contentDef, Seq(x))
+    val contentY = FunctionInvocation(contentDef, Seq(y))
+    val sortedX  = FunctionInvocation(sortedDef, Seq(x))
+    val sortedY  = FunctionInvocation(sortedDef, Seq(y))
+
+    assert(findModels(
+      GreaterThan(sizeX, IntLiteral(0)),
+      evaluator,
+      10,
+      500
+    ).size === 10)
+
+    assert(findModels(
+      And(Equals(contentX, contentY), sortedY),
+      evaluator,
+      10,
+      500
+    ).size === 10)
+
+    assert(findModels(
+      And(Seq(Equals(contentX, contentY), sortedX, sortedY, Not(Equals(x, y)))),
+      evaluator,
+      1,
+      500
+    ).isEmpty, "There should be no models for this problem")
+
+    assert(findModels(
+      And(Seq(
+        LessThan(a, b),
+        FunctionInvocation(sortedDef, Seq(CaseClass(consDef, Seq(a, x)))),
+        FunctionInvocation(insSpecDef, Seq(b, x, y))
+      )),
+      evaluator,
+      10,
+      500
+    ).size >= 5, "There should be at least 5 models for this problem.")
+  }
+}