Gaaah, fixed a bug. A minor one, but still a big.

For our test, we shouldn't have \psi(u,...), it should be \psi(1,...) because the u is not providing us random-ness. It's the minibatch variation that's providing randomness. I re-ran figures that were affected. Fortunately for that problem, results are similar.
BIDData · Mar 21, 2017 · 853f4cc · 853f4cc
1 parent 1595e69
commit 853f4cc
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 8 deletions.
diff --git a/scripts/analyze_mhtest_logreg.ssc b/scripts/analyze_mhtest_logreg.ssc
@@ -17,7 +17,7 @@
  */ 
 
 // Double-check these.
-val total = 5000
+val total = 3000
 val datadir = "mhtest_analysis/data_mhtest/"
 
 // On stout OR bitter:

diff --git a/scripts/test_mh.ssc b/scripts/test_mh.ssc
@@ -24,7 +24,7 @@
  * random-ness!!
  */
 
-val seed = 4
+val seed = 69
 println("Our seed: " +seed)
 setseed(seed)
 
@@ -94,7 +94,7 @@ mopts.verboseMH = false
 mopts.collectData = true            // CAUTION! May save a lot!
 mopts.collectDataDir = "mhtest_analysis/data_mhtest/"   // Clear this directory
 mopts.exitTheta = true              // breaks out of program ...
-mopts.exitThetaAmount = 5000        // means we collect this many thetas
+mopts.exitThetaAmount = 3000        // means we collect this many thetas
 mopts.initThetaHere = true          // break symmetry at the start
 mopts.burnIn = -1                   // change stuff after this sample (set to -1 to ignore)
 mopts.tempAfterBurnin = 1           // after burn-in, change temp to this

diff --git a/src/main/scala/BIDMach/updaters/MHTest.scala b/src/main/scala/BIDMach/updaters/MHTest.scala
@@ -21,7 +21,7 @@ import edu.berkeley.bid.CUMACH
  * 
  * - In particular, we need \Delta* and Var(\Delta*). Since \Delta* is of the form:
  * 
- * 		\Delta* = - log(u) + (1/b)\sum_{i=1}^b Y_i 
+ * 		\Delta* = -\psi + (1/b)\sum_{i=1}^b Y_i 
  * 
  * for IID random variables Y_i, which represent a log of a probability ratio,
  * it suffices to compute the statistics as follows:
@@ -61,7 +61,7 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
   var b:Long = 0                       // Current minibatch size (also `b` in the paper).
   var N:Long = 0                       // Maximum minibatch size (i.e. all training data).
   var n:Float = 0f                     // The *number* of minibatches we are using.
-  var logu:Float = 0f                  // log u, since we assume a symmetric proposer.
+  var psi:Float = 0f                   // \psi = log (1 * prop_ratio * prior_ratio)
   var T:Int = 1                        // The temperature of the distribution.
   var t:Int = 0                        // Current number of samples of theta.
   var sumOfValues:Float = 0f           // \sum_{i=1}^b (N/T)*log(p(x_i|theta')/p(x_i|theta)).
@@ -148,7 +148,7 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
     // (Part 2) Update our \Delta* and sample variance of \Delta*.
     sumOfSquares += sum((diff)*@(diff)).v
     sumOfValues += sum(diff).v
-    val deltaStar = sumOfValues/b.v - logu
+    val deltaStar = sumOfValues/b.v - psi
     val sampleVariance = (sumOfSquares/b.v - ((sumOfValues/b.v)*(sumOfValues/b.v))) / b.v
     val numStd = deltaStar / math.sqrt(sampleVariance)
     var accept = false
@@ -221,7 +221,7 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
   def beforeEachMinibatch() {
     if (opts.verboseMH) println("\n\tNew minibatch!")
     randomWalkProposer()
-    logu = ln(rand(1,1)).v
+    psi = ln(1).v // WARNING, symmetric proposals ONLY, since \psi(1,\theta,theta')=0.
     newMinibatch = false
     b = 0
     n = 0
@@ -313,7 +313,7 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
 
   /** This is for debugging. */
   def debugPrints(sampleVariance:Float, deltaStar:Float) {
-    println("b="+b+", n="+n+", logu="+logu+ ", b-mbSize="+(b - model.datasource.opts.batchSize).toInt)
+    println("b="+b+", n="+n+", psi="+psi+ ", b-mbSize="+(b - model.datasource.opts.batchSize).toInt)
     println("mean(scores0) = "+mean(scores0,2).dv+", mean(scores1) = "+mean(scores1,2).dv)
     println("sampleVar = " +sampleVariance)
     println("delta* = " + deltaStar)