Description
Uses a collapsed (marginalizes over a
, b
,
amp
numerically) gibbs sampler which samples from the
conditionals with slice sampling. Also re-parameterizes density with
(sqrt(gwidth^2+lwidth^2), gwidth-lwidth)
instead of
(gwidth, lwitdh)
if
min(gwidth/lwidth, lwidth/gwidth) > 0.05
(otherwise
transformation becomes unstable). Finds global maxima in posterior using
population based golden section search and uses it as the starting point
for the slice sampler. About x10 times faster than the previous
implementation. Using Zell Prior could be another x10.
Data
library('plotly')
Loading required package: ggplot2
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
library(spectralmc)
#toluene data
cdata <- get_toluene_measured_data()
x <- cdata$x
y <- cdata$y
Algorithm
peak_spawner <- function() {
params <- list(
amp = rnorm(1, 1, 1), # doesnt matter
lwidth = rnorm(1, 0.1, 0.001), # matters!!!
gwidth = rnorm(1, 5, 0.001), # matters!!!
pos = runif(1, 1, 1), # doesnt matter
a = 0, # doesnt matter
b = 0 # doesnt matter
)
return(params)
}
fit_kp <- 30
max_a <- 1e-2
lower <- c(-Inf, -max_a, 0)
upper <- c(Inf, max_a, Inf)
noise_sigma <- 100 # estimated by eye; intentionally to big such that tiny peaks are overnoised
signal_model <- voigt.model
res <- chains.rjmcmc_like_collapsed_slicer_gibbs(
x,
y,
signal_model,
peak_spawner,
lower,
upper,
noise_sigma,
fit_kp = fit_kp,
add_peak_every = 10,
iter = 20000,
print_bar = TRUE,
half_steps = TRUE,
decorr_trafo = TRUE,
save_loc = save_loc,
checkpoint_every = 50,
)
Results
Fit
All
plt.fit.chain.interactive(x, y, res$samples, signal_model, step_width = 200)
Burn-in
plt.fit.chain.interactive(x, y, res$samples[seq(1, 500)], signal_model, step_width = 5)
Traceplots
plt.traceplot(res$samples, "pos")
plt.traceplot(res$samples, "amp")
plt.traceplot(res$samples, "lwidth")
plt.traceplot(res$samples, "gwidth")
plt.traceplot(res$samples, "a")
plt.traceplot(res$samples, "b")
Posterior plot
plt.posterior(res$post_vals, steps = NULL)
Fit vs True
plt.samples_vs_true(samples, 'gwidth')
plt.samples_vs_true(samples, 'lwidth')
Todo
- Critical Bug amp, a, b are not sampled instead they
are set to roughly their means, easy to fix.
- Not a bug: Uses a safeguard which prevents messing
up the fit if the slice sampler fails, this however destroys the gibbs
sampler property and causes rejection plateaus.
---
title: "Collapsed Gibbs Sampler with GSS Slice Sampling"
author: Jan Meißner^[RWTH Aachen, philipp.meissner@rwth-aachen.de]
output:
  html_notebook:
    toc: true
    toc_depth: 5
    toc_float:
      collapsed: false
    number_sections: true
---

<script type="text/javascript">
  function jump_header(key){
    $(':header:contains('+key+')')[0].scrollIntoView();
  }

  function jump_marked(key){
    $('#' + key)[0].scrollIntoView();
  }

  function plotZoom(el){
      if(document.fullscreen) {
        document.exitFullscreen()
      } else {
        $(el).closest('.js-plotly-plot')[0].requestFullscreen();
      }
  }

  $( document ).ready(function() {
    $(".modebar-btn.plotlyjsicon.modebar-btn--logo").replaceWith(
    `
    <a rel="tooltip" onclick=plotZoom(this) class="modebar-btn fullscreen-btn" data-title="Full Screen" data-attr="zoom" data-val="auto" data-toggle="false" data-gravity="n" >
      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon" height="1em" width="1em">
        <!--! Font Awesome Pro 6.1.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2022 Fonticons, Inc. -->
        <path d="M128 32H32C14.31 32 0 46.31 0 64v96c0 17.69 14.31 32 32 32s32-14.31 32-32V96h64c17.69 0 32-14.31 32-32S145.7 32 128 32zM416 32h-96c-17.69 0-32 14.31-32 32s14.31 32 32 32h64v64c0 17.69 14.31 32 32 32s32-14.31 32-32V64C448 46.31 433.7 32 416 32zM128 416H64v-64c0-17.69-14.31-32-32-32s-32 14.31-32 32v96c0 17.69 14.31 32 32 32h96c17.69 0 32-14.31 32-32S145.7 416 128 416zM416 320c-17.69 0-32 14.31-32 32v64h-64c-17.69 0-32 14.31-32 32s14.31 32 32 32h96c17.69 0 32-14.31 32-32v-96C448 334.3 433.7 320 416 320z"/>
      </svg>
    </a>
    `);
  });
</script>


# Description
Uses a collapsed (marginalizes over `a`, `b`, `amp` numerically)
gibbs sampler which samples from the conditionals with slice sampling. Also re-parameterizes density with `(sqrt(gwidth^2+lwidth^2), gwidth-lwidth)` instead of `(gwidth, lwitdh)` if `min(gwidth/lwidth, lwidth/gwidth) > 0.05` (otherwise transformation becomes unstable). Finds global maxima in posterior using population based golden section search and uses it as the starting point for the slice sampler. About x10 times faster than the previous implementation. Using Zell Prior could be another x10.

# Data
```{r echo = FALSE}
#setwd('C:/Users/Jan/Desktop/ISW_SpectraBayes/ISW_SpectraBayes/bayes_mcmc')

get_toluene_measured_data <- function(){
  path <- "C:/Users/Jan/Desktop/ISW_SpectraBayes/ISW_SpectraBayes/bayes_mcmc/notebooks/golden_gibbs_tolune/toluene_measured.csv"
  toluene_measured <- read.csv(path, header=FALSE, sep=";")
  x <- as.vector(toluene_measured[,1])
  ytrue <- as.vector(toluene_measured[,2])
  return(list(x=x,y=ytrue))
}
```
```{r}
library('plotly')
library(spectralmc)

#toluene data
cdata <- get_toluene_measured_data()
x <- cdata$x
y <- cdata$y
```
# Algorithm
```{r eval = FALSE}
peak_spawner <- function() {
  params <- list(
          amp = rnorm(1, 1, 1), # doesnt matter
          lwidth = rnorm(1, 0.1, 0.001), # matters!!!
          gwidth = rnorm(1, 5, 0.001), # matters!!!
          pos = runif(1, 1, 1), # doesnt matter
          a = 0, # doesnt matter
          b = 0 # doesnt matter
  )
  return(params)
}

fit_kp <- 30
max_a <- 1e-2
lower <- c(-Inf, -max_a, 0)
upper <- c(Inf, max_a, Inf)
noise_sigma <- 100 # estimated by eye; intentionally to big such that tiny peaks are overnoised
signal_model <- voigt.model

res <- chains.rjmcmc_like_collapsed_slicer_gibbs(
        x,
        y,
        signal_model,
        peak_spawner,
        lower,
        upper,
        noise_sigma,
        fit_kp = fit_kp,
        add_peak_every = 10,
        iter = 20000,
        print_bar = TRUE,
        half_steps = TRUE,
        decorr_trafo = TRUE,
        save_loc = save_loc,
        checkpoint_every = 50,
)
```
```{r  echo=FALSE}
setwd('C:/Users/Jan/Desktop/ISW_SpectraBayes/ISW_SpectraBayes/bayes_mcmc')
res <- readRDS("notebooks/golden_gibbs_tolune/res.RData")
fit_kp <- 30
max_a <- 1e-2
lower <- c(-Inf, -max_a, 0)
upper <- c(Inf, max_a, Inf)
noise_sigma <- 100 # estimated by eye; intentionally to big such that tiny peaks are overnoised
signal_model <- voigt.model
```
# Results

## Fit

### All
```{r}
plt.fit.chain.interactive(x, y, res$samples, signal_model, step_width = 200)
```

### Burn-in
```{r}
plt.fit.chain.interactive(x, y, res$samples[seq(1, 500)], signal_model, step_width = 5)
```
### Traceplots
```{r}
plt.traceplot(res$samples, "pos")
plt.traceplot(res$samples, "amp")
plt.traceplot(res$samples, "lwidth")
plt.traceplot(res$samples, "gwidth")
plt.traceplot(res$samples, "a")
plt.traceplot(res$samples, "b")
```
### Posterior plot
```{r}
plt.posterior(res$post_vals, steps = NULL)
```

```{r  echo=FALSE}

plt.samples_vs_true <- function (samples, othername = 'gwidth') {
  flatsg_data <- spectralmc::utils.flatten_samples(samples)
  n <- length(samples[[1]]$pos)

  all_pos <- c()
  for (i in seq(n)){
    all_pos <- c(all_pos, flatsg_data$flat_samples[paste0('pos[',i,']')])
  }
  all_pos <- Reduce(c,all_pos)

  all_other <- c()
  for (i in seq(n)){
    all_other <- c(all_other, flatsg_data$flat_samples[paste0(othername,'[',i,']')])
  }
  all_other <- Reduce(c,all_other)

  peaks_true <- peaks_true[c(1,3,4), seq_len(ncol(peaks_true))]
  true_pos <- unlist(peaks_true[1,seq_len(ncol(peaks_true))])
  if (othername == 'gwidth'){
    true_other <- unlist(peaks_true[2,seq_len(ncol(peaks_true))])
  } else {
    true_other <- unlist(peaks_true[3,seq_len(ncol(peaks_true))])
  }

  fig <- plot_ly(x = all_pos, y = all_other, type = 'scatter', name = 'Samples',
                 mode = 'markers', marker = list(size = 5, opacity=0.5))
  fig <- fig %>% plotly::add_markers(x = true_pos, y = true_other, type = 'scatter', name = 'True Params', mode = 'markers', marker = list(color = 'red', symbol = 'x', opacity=1))
  fig <- fig %>% plotly::layout(title = "Peak Parameters: Samples vs. True",
                                xaxis = list(title = "Position", zeroline = FALSE),
                                yaxis = list(title = othername, exponentformat = 'e'))
  fig
}

library('plotly')

setwd('C:/Users/Jan/Desktop/ISW_SpectraBayes/ISW_SpectraBayes/bayes_mcmc')
res <- readRDS("notebooks/golden_gibbs_tolune/res.RData")
path <- "C:/Users/Jan/Desktop/ISW_SpectraBayes/ISW_SpectraBayes/bayes_mcmc/notebooks/golden_gibbs_tolune/toluene_peaks.csv"

peaks_true <- read.csv(path, header=FALSE, sep=",")

samples <- res$samples[1500:length(res$samples)]
samples <- samples[seq(1, length(samples), length.out = 50)]
```
### Fit vs True
```{r}
plt.samples_vs_true(samples, 'gwidth')
plt.samples_vs_true(samples, 'lwidth')
```

# Todo
- **Critical Bug** amp, a, b are not sampled instead they are set to roughly their means (not really either); Easy to fix.
- **Not a bug**: Uses a safeguard which prevents messing up the fit if the slice sampler fails, this however destroys the gibbs sampler property and causes rejection plateaus.
- **Done**: nu, be transformation becomes bad (why?) if either lwidth or gwidth is really small compared to the other, should switch to nu,be only if `min(lwidth/gwidth, gwidth/lwidth) > 0.05`
