Title: | Generative Neural Networks |
---|---|
Description: | Tools to set up, train, store, load, investigate and analyze generative neural networks. In particular, functionality for generative moment matching networks is provided. |
Authors: | Marius Hofert [aut, cre]
|
Maintainer: | Marius Hofert <[email protected]> |
License: | GPL (>= 3) |
Version: | 0.0-4 |
Built: | 2025-01-24 03:56:51 UTC |
Source: | https://github.com/cran/gnn |
Catches results, warnings and errors.
catch(expr)
catch(expr)
expr |
expression to be evaluated, typically a function call. |
This function is particularly useful for large(r) simulation studies to not fail until finished.
list
with components:
value |
value of |
warning |
warning message (see |
error |
error message (see |
Marius Hofert (based on doCallWE()
and tryCatch.W.E()
in
the R package simsalapar).
library(gnn) # for being standalone catch(log(2)) catch(log(-1)) catch(log("a"))
library(gnn) # for being standalone catch(log(2)) catch(log(-1)) catch(log("a"))
Feedforward method for objects of S3
class "gnn_GNN"
.
## S3 method for class 'gnn_GNN' ffGNN(x, data)
## S3 method for class 'gnn_GNN' ffGNN(x, data)
x |
object of |
data |
|
The output matrix
of x
when fed with data
.
Marius Hofert
if(TensorFlow_available()) { # rather restrictive (due to R-Forge, winbuilder) library(gnn) # for being standalone ## Define dummy model d <- 2 # bivariate case GMMN <- FNN(c(d, 300, d)) # Feedforward NN with MMD loss (a GMMN; random weights) ## Feedforward n <- 3 set.seed(271) X <- ffGNN(GMMN, data = matrix(runif(n * d), ncol = d)) stopifnot(dim(X) == c(n, d)) }
if(TensorFlow_available()) { # rather restrictive (due to R-Forge, winbuilder) library(gnn) # for being standalone ## Define dummy model d <- 2 # bivariate case GMMN <- FNN(c(d, 300, d)) # Feedforward NN with MMD loss (a GMMN; random weights) ## Feedforward n <- 3 set.seed(271) X <- ffGNN(GMMN, data = matrix(runif(n * d), ncol = d)) stopifnot(dim(X) == c(n, d)) }
Finding the numbers of boxes that given (multivariate) points fall
into (the default is similar to findInterval()
but
other methods are provided, too).
find_box(x, endpoints = NULL, method = c("per.dim", "lexicographic", "nested", "diagonal"), rightmost.closed = TRUE, left.open = TRUE, ...)
find_box(x, endpoints = NULL, method = c("per.dim", "lexicographic", "nested", "diagonal"), rightmost.closed = TRUE, left.open = TRUE, ...)
x |
|
endpoints |
|
method |
|
rightmost.closed |
see |
left.open |
see |
... |
additional arguments passed to the underlying
|
The box numbers can be used, for example, to color points; see the examples below.
"per.dim"
-matrix of box numbers per
dimension.
"lexicographic"
, "nested"
, "diagonal"
-vector with box numbers.
Note that, as findInterval()
, means ‘in no box’.
Marius Hofert
## Example data n <- 1000 d <- 2 set.seed(271) U <- matrix(runif(n * d), ncol = d) # (n, d)-matrix of data (here: in [0,1]^d) ### 1 Basic example calls ###################################################### ## Define endpoints and evaluate for different methods epts <- seq(0, 1, by = 1/5) # 5 boxes per dimension find_box(U, endpoints = epts)[1:10,] # default "per.dim" (first 10 points only) boxes.lexi <- find_box(U, endpoints = epts, method = "lexicographic") boxes.nest <- find_box(U, endpoints = epts, method = "nested") boxes.diag <- find_box(U, endpoints = epts, method = "diagonal") ## Special cases ## First row of U (n = 1) U[1,] # ~= (0.25, 0.14) stopifnot(find_box(U[1, 1:2], endpoints = epts) == c(2, 1)) stopifnot(find_box(U[1, 1:2], endpoints = epts, method = "lexicographic") == 1) ## Note concerning the last line: It's 1 because all other boxes are empty stopifnot(find_box(U[1, 1:2], endpoints = epts, method = "nested") == 2) stopifnot(find_box(U[1, 1:2], endpoints = epts, method = "diagonal") == 0) ## Single number U[1,1] (d = 1) U[1,1] # ~= 0.25 stopifnot(find_box(U[1,1], endpoints = epts) == 2) stopifnot(find_box(U[1,1], endpoints = epts, method = "lexicographic") == 1) stopifnot(find_box(U[1,1], endpoints = epts, method = "nested") == 2) stopifnot(find_box(U[1,1], endpoints = epts, method = "diagonal") == 2) ### 2 Coloring points in lexicographic ordering ################################ ## Define color palette library(RColorBrewer) basecols <- c("#000000", brewer.pal(8, name = "Dark2")[c(8,7,3,1,5,4,2,6)]) mypal <- function(n) rep_len(basecols, length.out = n) ## Colors ncols <- diff(range(boxes.lexi)) + 1 # maximal number of colors needed palette(mypal(ncols)) # set palette according to maximum number of colors needed ## Boxes of equal size boxes.lexi <- find_box(U, endpoints = epts, method = "lexicographic") cols <- if(min(boxes.lexi) == 0) boxes.lexi + 1 else boxes.lexi plot(U, pch = 20, xlab = expression(U[1]), ylab = expression(U[2]), col = cols) abline(v = epts, h = epts, col = "gray50") # guides ## Boxes of different sizes and numbers epts. <- list(seq(0.2, 1, by = 1/5), seq(1/3, 1, by = 1/3)) boxes.lexi <- find_box(U, endpoints = epts., method = "lexicographic") cols <- if(min(boxes.lexi) == 0) boxes.lexi + 1 else boxes.lexi plot(U, pch = 20, xlab = expression(U[1]), ylab = expression(U[2]), col = cols) abline(v = epts.[[1]], h = epts.[[2]], col = "gray50") ### 3 Coloring points along the diagonal in a nested way ####################### ## Boxes of equal size (with 'middle' part) boxes.nest <- find_box(U, endpoints = epts, method = "nested") cols <- if(min(boxes.nest) == 0) boxes.nest + 1 else boxes.nest # color numbers plot(U, pch = 20, xlab = expression(U[1]), ylab = expression(U[2]), col = cols) abline(v = epts, h = epts, col = "gray50") # guides ## Boxes of different sizes (without 'middle' part; have to be the same number of ## boxes per dimension, otherwise there is no obvious 'diagonal') epts. <- lapply(1:d, function(j) c(0, 0.1, 0.3, 0.6, 1)) # 4 boxes per dimension boxes.nest <- find_box(U, endpoints = epts., method = "nested") cols <- if(min(boxes.nest) == 0) boxes.nest + 1 else boxes.nest # color numbers plot(U, pch = 20, xlab = expression(U[1]), ylab = expression(U[2]), col = cols) abline(v = epts.[[1]], h = epts.[[2]], col = "gray50") # guides ### 4 Coloring points along the diagonal ####################################### ## Boxes of equal size boxes.diag <- find_box(U, endpoints = epts, method = "diagonal") cols <- if(min(boxes.diag) == 0) boxes.diag + 1 else boxes.diag # color numbers plot(U, pch = 20, xlab = expression(U[1]), ylab = expression(U[2]), col = cols) abline(v = epts, h = epts, col = "gray50") # guides ## Boxes of different sizes (have to be the same number of ## boxes per dimension, otherwise there is no obvious 'diagonal') epts. <- lapply(1:d, function(j) c(0, 0.05, 0.1, 0.3, 0.6, 1)) boxes.diag <- find_box(U, endpoints = epts., method = "diagonal") cols <- if(min(boxes.diag) == 0) boxes.diag + 1 else boxes.diag # color numbers plot(U, pch = 20, xlab = expression(U[1]), ylab = expression(U[2]), col = cols) abline(v = epts.[[1]], h = epts.[[2]], col = "gray50") # guides
## Example data n <- 1000 d <- 2 set.seed(271) U <- matrix(runif(n * d), ncol = d) # (n, d)-matrix of data (here: in [0,1]^d) ### 1 Basic example calls ###################################################### ## Define endpoints and evaluate for different methods epts <- seq(0, 1, by = 1/5) # 5 boxes per dimension find_box(U, endpoints = epts)[1:10,] # default "per.dim" (first 10 points only) boxes.lexi <- find_box(U, endpoints = epts, method = "lexicographic") boxes.nest <- find_box(U, endpoints = epts, method = "nested") boxes.diag <- find_box(U, endpoints = epts, method = "diagonal") ## Special cases ## First row of U (n = 1) U[1,] # ~= (0.25, 0.14) stopifnot(find_box(U[1, 1:2], endpoints = epts) == c(2, 1)) stopifnot(find_box(U[1, 1:2], endpoints = epts, method = "lexicographic") == 1) ## Note concerning the last line: It's 1 because all other boxes are empty stopifnot(find_box(U[1, 1:2], endpoints = epts, method = "nested") == 2) stopifnot(find_box(U[1, 1:2], endpoints = epts, method = "diagonal") == 0) ## Single number U[1,1] (d = 1) U[1,1] # ~= 0.25 stopifnot(find_box(U[1,1], endpoints = epts) == 2) stopifnot(find_box(U[1,1], endpoints = epts, method = "lexicographic") == 1) stopifnot(find_box(U[1,1], endpoints = epts, method = "nested") == 2) stopifnot(find_box(U[1,1], endpoints = epts, method = "diagonal") == 2) ### 2 Coloring points in lexicographic ordering ################################ ## Define color palette library(RColorBrewer) basecols <- c("#000000", brewer.pal(8, name = "Dark2")[c(8,7,3,1,5,4,2,6)]) mypal <- function(n) rep_len(basecols, length.out = n) ## Colors ncols <- diff(range(boxes.lexi)) + 1 # maximal number of colors needed palette(mypal(ncols)) # set palette according to maximum number of colors needed ## Boxes of equal size boxes.lexi <- find_box(U, endpoints = epts, method = "lexicographic") cols <- if(min(boxes.lexi) == 0) boxes.lexi + 1 else boxes.lexi plot(U, pch = 20, xlab = expression(U[1]), ylab = expression(U[2]), col = cols) abline(v = epts, h = epts, col = "gray50") # guides ## Boxes of different sizes and numbers epts. <- list(seq(0.2, 1, by = 1/5), seq(1/3, 1, by = 1/3)) boxes.lexi <- find_box(U, endpoints = epts., method = "lexicographic") cols <- if(min(boxes.lexi) == 0) boxes.lexi + 1 else boxes.lexi plot(U, pch = 20, xlab = expression(U[1]), ylab = expression(U[2]), col = cols) abline(v = epts.[[1]], h = epts.[[2]], col = "gray50") ### 3 Coloring points along the diagonal in a nested way ####################### ## Boxes of equal size (with 'middle' part) boxes.nest <- find_box(U, endpoints = epts, method = "nested") cols <- if(min(boxes.nest) == 0) boxes.nest + 1 else boxes.nest # color numbers plot(U, pch = 20, xlab = expression(U[1]), ylab = expression(U[2]), col = cols) abline(v = epts, h = epts, col = "gray50") # guides ## Boxes of different sizes (without 'middle' part; have to be the same number of ## boxes per dimension, otherwise there is no obvious 'diagonal') epts. <- lapply(1:d, function(j) c(0, 0.1, 0.3, 0.6, 1)) # 4 boxes per dimension boxes.nest <- find_box(U, endpoints = epts., method = "nested") cols <- if(min(boxes.nest) == 0) boxes.nest + 1 else boxes.nest # color numbers plot(U, pch = 20, xlab = expression(U[1]), ylab = expression(U[2]), col = cols) abline(v = epts.[[1]], h = epts.[[2]], col = "gray50") # guides ### 4 Coloring points along the diagonal ####################################### ## Boxes of equal size boxes.diag <- find_box(U, endpoints = epts, method = "diagonal") cols <- if(min(boxes.diag) == 0) boxes.diag + 1 else boxes.diag # color numbers plot(U, pch = 20, xlab = expression(U[1]), ylab = expression(U[2]), col = cols) abline(v = epts, h = epts, col = "gray50") # guides ## Boxes of different sizes (have to be the same number of ## boxes per dimension, otherwise there is no obvious 'diagonal') epts. <- lapply(1:d, function(j) c(0, 0.05, 0.1, 0.3, 0.6, 1)) boxes.diag <- find_box(U, endpoints = epts., method = "diagonal") cols <- if(min(boxes.diag) == 0) boxes.diag + 1 else boxes.diag # color numbers plot(U, pch = 20, xlab = expression(U[1]), ylab = expression(U[2]), col = cols) abline(v = epts.[[1]], h = epts.[[2]], col = "gray50") # guides
Functions and methods for training generative neural networks.
## S3 method for class 'gnn_GNN' fitGNN(x, data, batch.size = nrow(data), n.epoch = 100, prior = NULL, max.n.prior = 5000, verbose = 2, ...) ## S3 method for class 'gnn_GNN' fitGNNonce(x, data, batch.size = nrow(data), n.epoch = 100, prior = NULL, verbose = 2, file = NULL, name = NULL, ...) ## S3 method for class 'gnn_GNN' is.trained(x) ## S3 method for class 'list' is.trained(x)
## S3 method for class 'gnn_GNN' fitGNN(x, data, batch.size = nrow(data), n.epoch = 100, prior = NULL, max.n.prior = 5000, verbose = 2, ...) ## S3 method for class 'gnn_GNN' fitGNNonce(x, data, batch.size = nrow(data), n.epoch = 100, prior = NULL, verbose = 2, file = NULL, name = NULL, ...) ## S3 method for class 'gnn_GNN' is.trained(x) ## S3 method for class 'list' is.trained(x)
x |
|
data |
|
batch.size |
number of samples used per stochastic gradient step. |
n.epoch |
number of epochs (one epoch equals one pass through the complete training dataset while updating the GNN's parameters through stochastic gradient steps). |
prior |
|
max.n.prior |
maximum number of prior samples stored in |
verbose |
|
file |
|
name |
|
... |
additional arguments passed to the underlying
|
the trained x
.
object of class as x
with the trained GNN.
logical
indicating whether x
is trained.
logical
of length
length(x)
indicating, for each component, whether
it is trained.
Marius Hofert
FNN()
, save_gnn()
,
load_gnn()
.
Constructor for a generative feedforward neural network (FNN) model,
an object of S3
class "gnn_FNN"
.
FNN(dim = c(2, 2), activation = c(rep("relu", length(dim) - 2), "sigmoid"), batch.norm = FALSE, dropout.rate = 0, loss.fun = "MMD", n.GPU = 0, ...)
FNN(dim = c(2, 2), activation = c(rep("relu", length(dim) - 2), "sigmoid"), batch.norm = FALSE, dropout.rate = 0, loss.fun = "MMD", n.GPU = 0, ...)
dim |
|
activation |
|
loss.fun |
|
batch.norm |
|
dropout.rate |
|
n.GPU |
non-negative |
... |
additional arguments passed to |
The S3
class "gnn_FNN"
is a subclass of the
S3
class "gnn_GNN"
which in turn is a subclass of
"gnn_Model"
.
FNN()
returns an object of S3
class "gnn_FNN"
with components
model
FNN model (a keras object inheriting from
the R6 classes "keras.engine.training.Model"
,
"keras.engine.network.Network"
,
"keras.engine.base_layer.Layer"
and "python.builtin.object"
, or a raw
object).
type
character
string indicating
the type of model.
dim
see above.
activation
see above.
batch.norm
see above.
dropout.rate
see above.
n.param
number of trainable, non-trainable and total number of parameters.
loss.type
type of loss function (character
).
n.train
number of training samples (NA_integer_
unless trained).
batch.size
batch size (NA_integer_
unless trained).
n.epoch
number of epochs (NA_integer_
unless trained).
loss
numeric(n.epoch)
containing the
loss function values per epoch.
time
object of S3 class "proc_time"
containing the training time (if trained).
prior
matrix
containing a (sub-)sample
of the prior (if trained).
Marius Hofert and Avinash Prasad
Li, Y., Swersky, K. and Zemel, R. (2015). Generative moment matching networks. Proceedings of Machine Learning Research, 37 (International Conference on Maching Learning), 1718–1727. See http://proceedings.mlr.press/v37/li15.pdf (2019-08-24)
Dziugaite, G. K., Roy, D. M. and Ghahramani, Z. (2015). Training generative neural networks via maximum mean discrepancy optimization. AUAI Press, 258–267. See http://www.auai.org/uai2015/proceedings/papers/230.pdf (2019-08-24)
Hofert, M., Prasad, A. and Zhu, M. (2020). Quasi-random sampling for multivariate distributions via generative neural networks. Journal of Computational and Graphical Statistics, doi:10.1080/10618600.2020.1868302.
Hofert, M., Prasad, A. and Zhu, M. (2020). Multivariate time-series modeling with generative neural networks. See https://arxiv.org/abs/2002.10645.
Hofert, M. Prasad, A. and Zhu, M. (2020). Applications of multivariate quasi-random sampling with neural networks. See https://arxiv.org/abs/2012.08036.
if(TensorFlow_available()) { # rather restrictive (due to R-Forge, winbuilder) library(gnn) # for being standalone ## Training data d <- 2 # bivariate case P <- matrix(0.9, nrow = d, ncol = d); diag(P) <- 1 # correlation matrix ntrn <- 60000 # training data sample size set.seed(271) library(nvmix) X <- abs(rNorm(ntrn, scale = P)) # componentwise absolute values of N(0,P) sample ## Plot a subsample m <- 2000 # subsample size for plots opar <- par(pty = "s") plot(X[1:m,], xlab = expression(X[1]), ylab = expression(X[2])) # plot |X| U <- apply(X, 2, rank) / (ntrn + 1) # pseudo-observations of |X| plot(U[1:m,], xlab = expression(U[1]), ylab = expression(U[2])) # visual check ## Model 1: A basic feedforward neural network (FNN) with MSE loss function fnn <- FNN(c(d, 300, d), loss.fun = "MSE") # define the FNN fnn <- fitGNN(fnn, data = U, n.epoch = 40) # train with batch optimization plot(fnn, kind = "loss") # plot the loss after each epoch ## Model 2: A GMMN (FNN with MMD loss function) gmmn <- FNN(c(d, 300, d)) # define the GMMN (initialized with random weights) ## For training we need to use a mini-batch optimization (batch size < nrow(U)). ## For a fair comparison (same number of gradient steps) to NN, we use 500 ## samples (25% = 4 gradient steps/epoch) for 10 epochs for GMMN. library(keras) # for callback_early_stopping() ## We monitor the loss function and stop earlier if the loss function ## over the last patience-many epochs has changed by less than min_delta ## in absolute value. Then we keep the weights that led to the smallest ## loss seen throughout training. gmmn <- fitGNN(gmmn, data = U, batch.size = 500, n.epoch = 10, callbacks = callback_early_stopping(monitor = "loss", min_delta = 1e-3, patience = 3, restore_best_weights = TRUE)) plot(gmmn, kind = "loss") # plot the loss after each epoch ## Note: ## - Obviously, in a real-world application, batch.size and n.epoch ## should be (much) larger (e.g., batch.size = 5000, n.epoch = 300). ## - Training is not reproducible (due to keras). ## Model 3: A FNN with CvM loss function fnnCvM <- FNN(c(d, 300, d), loss.fun = "CvM") fnnCvM <- fitGNN(fnnCvM, data = U, batch.size = 500, n.epoch = 10, callbacks = callback_early_stopping(monitor = "loss", min_delta = 1e-3, patience = 3, restore_best_weights = TRUE)) plot(fnnCvM, kind = "loss") # plot the loss after each epoch ## Sample from the different models set.seed(271) V.fnn <- rGNN(fnn, size = m) set.seed(271) V.gmmn <- rGNN(gmmn, size = m) set.seed(271) V.fnnCvM <- rGNN(fnnCvM, size = m) ## Joint plot of training subsample with GMMN PRNs. Clearly, the MSE ## cannot be used to learn the distribution correctly. layout(matrix(1:4, ncol = 2, byrow = TRUE)) plot(U[1:m,], xlab = expression(U[1]), ylab = expression(U[2]), cex = 0.2) mtext("Training subsample", side = 4, line = 0.4, adj = 0) plot(V.fnn, xlab = expression(V[1]), ylab = expression(V[2]), cex = 0.2) mtext("Trained NN with MSE loss", side = 4, line = 0.4, adj = 0) plot(V.gmmn, xlab = expression(V[1]), ylab = expression(V[2]), cex = 0.2) mtext("Trained NN with MMD loss", side = 4, line = 0.4, adj = 0) plot(V.fnnCvM, xlab = expression(V[1]), ylab = expression(V[2]), cex = 0.2) mtext("Trained NN with CvM loss", side = 4, line = 0.4, adj = 0) ## Joint plot of training subsample with GMMN QRNs library(qrng) # for sobol() V.fnn. <- rGNN(fnn, size = m, method = "sobol", randomize = "Owen") V.gmmn. <- rGNN(gmmn, size = m, method = "sobol", randomize = "Owen") V.fnnCvM. <- rGNN(fnnCvM, size = m, method = "sobol", randomize = "Owen") plot(U[1:m,], xlab = expression(U[1]), ylab = expression(U[2]), cex = 0.2) mtext("Training subsample", side = 4, line = 0.4, adj = 0) plot(V.fnn., xlab = expression(V[1]), ylab = expression(V[2]), cex = 0.2) mtext("Trained NN with MSE loss", side = 4, line = 0.4, adj = 0) plot(V.gmmn., xlab = expression(V[1]), ylab = expression(V[2]), cex = 0.2) mtext("Trained NN with MMD loss", side = 4, line = 0.4, adj = 0) plot(V.fnnCvM., xlab = expression(V[1]), ylab = expression(V[2]), cex = 0.2) mtext("Trained NN with CvM loss", side = 4, line = 0.4, adj = 0) layout(1) par(opar) }
if(TensorFlow_available()) { # rather restrictive (due to R-Forge, winbuilder) library(gnn) # for being standalone ## Training data d <- 2 # bivariate case P <- matrix(0.9, nrow = d, ncol = d); diag(P) <- 1 # correlation matrix ntrn <- 60000 # training data sample size set.seed(271) library(nvmix) X <- abs(rNorm(ntrn, scale = P)) # componentwise absolute values of N(0,P) sample ## Plot a subsample m <- 2000 # subsample size for plots opar <- par(pty = "s") plot(X[1:m,], xlab = expression(X[1]), ylab = expression(X[2])) # plot |X| U <- apply(X, 2, rank) / (ntrn + 1) # pseudo-observations of |X| plot(U[1:m,], xlab = expression(U[1]), ylab = expression(U[2])) # visual check ## Model 1: A basic feedforward neural network (FNN) with MSE loss function fnn <- FNN(c(d, 300, d), loss.fun = "MSE") # define the FNN fnn <- fitGNN(fnn, data = U, n.epoch = 40) # train with batch optimization plot(fnn, kind = "loss") # plot the loss after each epoch ## Model 2: A GMMN (FNN with MMD loss function) gmmn <- FNN(c(d, 300, d)) # define the GMMN (initialized with random weights) ## For training we need to use a mini-batch optimization (batch size < nrow(U)). ## For a fair comparison (same number of gradient steps) to NN, we use 500 ## samples (25% = 4 gradient steps/epoch) for 10 epochs for GMMN. library(keras) # for callback_early_stopping() ## We monitor the loss function and stop earlier if the loss function ## over the last patience-many epochs has changed by less than min_delta ## in absolute value. Then we keep the weights that led to the smallest ## loss seen throughout training. gmmn <- fitGNN(gmmn, data = U, batch.size = 500, n.epoch = 10, callbacks = callback_early_stopping(monitor = "loss", min_delta = 1e-3, patience = 3, restore_best_weights = TRUE)) plot(gmmn, kind = "loss") # plot the loss after each epoch ## Note: ## - Obviously, in a real-world application, batch.size and n.epoch ## should be (much) larger (e.g., batch.size = 5000, n.epoch = 300). ## - Training is not reproducible (due to keras). ## Model 3: A FNN with CvM loss function fnnCvM <- FNN(c(d, 300, d), loss.fun = "CvM") fnnCvM <- fitGNN(fnnCvM, data = U, batch.size = 500, n.epoch = 10, callbacks = callback_early_stopping(monitor = "loss", min_delta = 1e-3, patience = 3, restore_best_weights = TRUE)) plot(fnnCvM, kind = "loss") # plot the loss after each epoch ## Sample from the different models set.seed(271) V.fnn <- rGNN(fnn, size = m) set.seed(271) V.gmmn <- rGNN(gmmn, size = m) set.seed(271) V.fnnCvM <- rGNN(fnnCvM, size = m) ## Joint plot of training subsample with GMMN PRNs. Clearly, the MSE ## cannot be used to learn the distribution correctly. layout(matrix(1:4, ncol = 2, byrow = TRUE)) plot(U[1:m,], xlab = expression(U[1]), ylab = expression(U[2]), cex = 0.2) mtext("Training subsample", side = 4, line = 0.4, adj = 0) plot(V.fnn, xlab = expression(V[1]), ylab = expression(V[2]), cex = 0.2) mtext("Trained NN with MSE loss", side = 4, line = 0.4, adj = 0) plot(V.gmmn, xlab = expression(V[1]), ylab = expression(V[2]), cex = 0.2) mtext("Trained NN with MMD loss", side = 4, line = 0.4, adj = 0) plot(V.fnnCvM, xlab = expression(V[1]), ylab = expression(V[2]), cex = 0.2) mtext("Trained NN with CvM loss", side = 4, line = 0.4, adj = 0) ## Joint plot of training subsample with GMMN QRNs library(qrng) # for sobol() V.fnn. <- rGNN(fnn, size = m, method = "sobol", randomize = "Owen") V.gmmn. <- rGNN(gmmn, size = m, method = "sobol", randomize = "Owen") V.fnnCvM. <- rGNN(fnnCvM, size = m, method = "sobol", randomize = "Owen") plot(U[1:m,], xlab = expression(U[1]), ylab = expression(U[2]), cex = 0.2) mtext("Training subsample", side = 4, line = 0.4, adj = 0) plot(V.fnn., xlab = expression(V[1]), ylab = expression(V[2]), cex = 0.2) mtext("Trained NN with MSE loss", side = 4, line = 0.4, adj = 0) plot(V.gmmn., xlab = expression(V[1]), ylab = expression(V[2]), cex = 0.2) mtext("Trained NN with MMD loss", side = 4, line = 0.4, adj = 0) plot(V.fnnCvM., xlab = expression(V[1]), ylab = expression(V[2]), cex = 0.2) mtext("Trained NN with CvM loss", side = 4, line = 0.4, adj = 0) layout(1) par(opar) }
Basic functions and methods for objects of S3
class "gnn_GNN"
.
## S3 method for class 'gnn_GNN' print(x, ...) ## S3 method for class 'gnn_GNN' str(object, ...) ## S3 method for class 'gnn_GNN' summary(object, ...) ## S3 method for class 'gnn_GNN' dim(x) ## S3 method for class 'gnn_GNN' is.GNN(x) ## S3 method for class 'list' is.GNN(x)
## S3 method for class 'gnn_GNN' print(x, ...) ## S3 method for class 'gnn_GNN' str(object, ...) ## S3 method for class 'gnn_GNN' summary(object, ...) ## S3 method for class 'gnn_GNN' dim(x) ## S3 method for class 'gnn_GNN' is.GNN(x) ## S3 method for class 'list' is.GNN(x)
x |
|
object |
object of |
... |
currently not used. |
return value of the print()
method for objects of class "list"
.
nothing, as str()
returns
nothing when applied to objects of class "list"
.
return value of the summary()
method for objects of class "list"
.
slot dim
of x
, so a vector of
dimensions of input, hidden and output layers.
logical
of length equal to the
length of x
indicating, for each component,
whether it is an object of class "gnn_GNN"
.
Marius Hofert
if(TensorFlow_available()) { # rather restrictive (due to R-Forge, winbuilder) library(gnn) # for being standalone d <- 2 dim <- c(d, 300, d) # dimensions of the input, hidden and output layers GMMN <- FNN(dim) # define the GMMN model stopifnot(is.GNN(GMMN)) # check for being a GNN GMMN # print() method str(GMMN) # str() method summary(GMMN) # summary() method stopifnot(dim(GMMN) == c(d, 300, d)) # dim() method }
if(TensorFlow_available()) { # rather restrictive (due to R-Forge, winbuilder) library(gnn) # for being standalone d <- 2 dim <- c(d, 300, d) # dimensions of the input, hidden and output layers GMMN <- FNN(dim) # define the GMMN model stopifnot(is.GNN(GMMN)) # check for being a GNN GMMN # print() method str(GMMN) # str() method summary(GMMN) # summary() method stopifnot(dim(GMMN) == c(d, 300, d)) # dim() method }
Implementation of various loss functions to measure statistical discrepancy between two datasets.
loss(x, y, type = c("MMD", "CvM", "MSE", "BCE"), ...) MMD(x, y, ...) CvM(x, y)
loss(x, y, type = c("MMD", "CvM", "MSE", "BCE"), ...) MMD(x, y, ...) CvM(x, y)
x |
2d-tensor or |
y |
2d-tensor or |
type |
|
... |
additional arguments passed to the underlying functions,
most notably |
loss()
returns a 0d tensor containing the loss.
MMD()
and CvM()
return a 0d tensor (if x
and y
are tensors) or numeric(1)
(if x
or
y
are R matrices).
Marius Hofert and Avinash Prasad
Kingma, D. P. and Welling, M. (2014). Stochastic gradient VB and the variational auto-encoder. Second International Conference on Learning Representations (ICLR). See https://keras.rstudio.com/articles/examples/variational_autoencoder.html
Rémillard, B. and Scaillet, O. (2009). Testing for equality between two copulas. Journal of Multivariate Analysis 100, 377–386.
FNN()
where loss()
is used.
Functions for plotting.
## S3 method for class 'gnn_GNN' plot(x, kind = c("scatter", "loss"), max.n.samples = NULL, type = NULL, xlab = NULL, ylab = NULL, y2lab = NULL, labels = "X", pair = NULL, ...)
## S3 method for class 'gnn_GNN' plot(x, kind = c("scatter", "loss"), max.n.samples = NULL, type = NULL, xlab = NULL, ylab = NULL, y2lab = NULL, labels = "X", pair = NULL, ...)
x |
trained object of class |
kind |
|
max.n.samples |
maximal number of samples to be plotted. |
type |
line type; see |
xlab |
x-axis label; see |
ylab |
y-axis label; see |
y2lab |
secondary y-axis label. |
labels |
|
pair |
|
... |
additional arguments passed to the underlying
|
Plot by side-effect.
Marius Hofert
fitGNN()
.
Keras objects cannot be saved like other R objects.
The methods as.raw()
and as.keras()
can
be used to convert the model
slots of objects
of S3
class "gnn_GNN"
to
"raw"
objects (which can be saved)
or "keras.engine.training.Model"
objects (which
can be trained).
## S3 method for class 'gnn_GNN' as.raw(x) ## S3 method for class 'gnn_GNN' as.keras(x)
## S3 method for class 'gnn_GNN' as.raw(x) ## S3 method for class 'gnn_GNN' as.keras(x)
x |
object of |
object of S3
class "gnn_GNN"
with
slot method
converted by the respective method if necessary.
Marius Hofert
Sampling method for objects of S3
class "gnn_GNN"
.
## S3 method for class 'gnn_GNN' rGNN(x, size, prior = NULL, pobs = FALSE, ...)
## S3 method for class 'gnn_GNN' rGNN(x, size, prior = NULL, pobs = FALSE, ...)
x |
object of |
size |
sample size, a positive |
prior |
one of |
pobs |
|
... |
additional arguments passed to the underlying
|
(size, dim(x)[1])
-matrix
of samples.
Marius Hofert
if(TensorFlow_available()) { # rather restrictive (due to R-Forge, winbuilder) library(gnn) # for being standalone ## Define dummy model d <- 2 # bivariate case GMMN <- FNN(c(d, 300, d)) # Feedforward NN with MMD loss (a GMMN; random weights) ## Sampling n <- 3 (X1 <- rGNN(GMMN, size = n)) # default (independent N(0,1) samples as prior) (X2 <- rGNN(GMMN, size = n, # passing additional arguments to rPrior() qmargins = qexp, method = "sobol", seed = 271)) (X3 <- rGNN(GMMN, prior = matrix(rexp(n * d), ncol = d))) # providing 'prior' stopifnot(dim(X1) == c(n, d), dim(X2) == c(n, d), dim(X3) == c(n, d)) }
if(TensorFlow_available()) { # rather restrictive (due to R-Forge, winbuilder) library(gnn) # for being standalone ## Define dummy model d <- 2 # bivariate case GMMN <- FNN(c(d, 300, d)) # Feedforward NN with MMD loss (a GMMN; random weights) ## Sampling n <- 3 (X1 <- rGNN(GMMN, size = n)) # default (independent N(0,1) samples as prior) (X2 <- rGNN(GMMN, size = n, # passing additional arguments to rPrior() qmargins = qexp, method = "sobol", seed = 271)) (X3 <- rGNN(GMMN, prior = matrix(rexp(n * d), ncol = d))) # providing 'prior' stopifnot(dim(X1) == c(n, d), dim(X2) == c(n, d), dim(X3) == c(n, d)) }
Fixes the removal of file extensions of file_path_sans_ext()
in the case where file names contain digits after the last dot
(which is often used to incorporate numeric numbers into file names).
rm_ext(x)
rm_ext(x)
x |
file name(s) with extension(s) to be stripped off. |
The file name without its extension (if the file name had an extension).
Marius Hofert
library(gnn) # for being standalone myfilepath1 <- "/myusername/my_filename_with_dots_0.25_0.50_0.75.rda" myfilepath2 <- "/myusername/my_filename_with_dots_0.25_0.50_0.75" myfilepath3 <- "/myusername/my_filename_with_dots_0.25_0.50_0.75." myfilepath4 <- "/myusername/my_filename_with_dots_0.25_0.50_0.75._" myfilepath5 <- "/myusername/my_filename_with_dots_0.25_0.50_0.75._*.rda" library(tools) file_path_sans_ext(myfilepath2) # fails (only case) stopifnot(rm_ext(myfilepath1) == file_path_sans_ext(myfilepath1)) stopifnot(rm_ext(myfilepath2) == myfilepath2) stopifnot(rm_ext(myfilepath3) == file_path_sans_ext(myfilepath3)) stopifnot(rm_ext(myfilepath4) == file_path_sans_ext(myfilepath4)) stopifnot(rm_ext(myfilepath5) == file_path_sans_ext(myfilepath5))
library(gnn) # for being standalone myfilepath1 <- "/myusername/my_filename_with_dots_0.25_0.50_0.75.rda" myfilepath2 <- "/myusername/my_filename_with_dots_0.25_0.50_0.75" myfilepath3 <- "/myusername/my_filename_with_dots_0.25_0.50_0.75." myfilepath4 <- "/myusername/my_filename_with_dots_0.25_0.50_0.75._" myfilepath5 <- "/myusername/my_filename_with_dots_0.25_0.50_0.75._*.rda" library(tools) file_path_sans_ext(myfilepath2) # fails (only case) stopifnot(rm_ext(myfilepath1) == file_path_sans_ext(myfilepath1)) stopifnot(rm_ext(myfilepath2) == myfilepath2) stopifnot(rm_ext(myfilepath3) == file_path_sans_ext(myfilepath3)) stopifnot(rm_ext(myfilepath4) == file_path_sans_ext(myfilepath4)) stopifnot(rm_ext(myfilepath5) == file_path_sans_ext(myfilepath5))
Sampling from a prior distribution.
rPrior(n, copula, qmargins = qnorm, method = c("pseudo", "sobol"), ...)
rPrior(n, copula, qmargins = qnorm, method = c("pseudo", "sobol"), ...)
n |
sample size, a positive |
copula |
object of |
qmargins |
marginal quantile |
method |
|
... |
additional arguments passed to |
(n, dim(copula))
-matrix
of samples.
Marius Hofert
library(gnn) # for being standalone n <- 5 d <- 3 library(copula) cop <- claytonCopula(2, dim = d) X1 <- rPrior(n, copula = cop) # Clayton copula and N(0,1) margins X2 <- rPrior(n, copula = cop, qmargins = qexp) # Exp(1) margins X3 <- rPrior(n, copula = cop, qmargins = qexp, method = "sobol", seed = 271) stopifnot(dim(X1) == c(n, d), dim(X2) == c(n, d), dim(X3) == c(n, d))
library(gnn) # for being standalone n <- 5 d <- 3 library(copula) cop <- claytonCopula(2, dim = d) X1 <- rPrior(n, copula = cop) # Clayton copula and N(0,1) margins X2 <- rPrior(n, copula = cop, qmargins = qexp) # Exp(1) margins X3 <- rPrior(n, copula = cop, qmargins = qexp, method = "sobol", seed = 271) stopifnot(dim(X1) == c(n, d), dim(X2) == c(n, d), dim(X3) == c(n, d))
Save and load .rda
files with conversion to objects of class
raw
(for save_gnn()
) or "keras.engine.training.Model"
(for load_gnn()
).
save_gnn(..., file, name = NULL) load_gnn(file)
save_gnn(..., file, name = NULL) load_gnn(file)
... |
objects to be saved in |
file |
|
name |
|
nothing (generates an .rda
by side-effect).
the loaded object(s). Those of class "gnn_GNN"
are converted with as.keras()
before they are
returned; this also applies to a component of a loaded object of class
list
.
Marius Hofert
See the underlying functions load()
and save()
(among others).
if(TensorFlow_available()) { # rather restrictive (due to R-Forge, winbuilder) library(gnn) # for being standalone file <- tempfile("foo", fileext = ".rda") GMMN1 <- FNN() save_gnn(GMMN1, file = file) # converts GMMN via as.raw() GMMN2 <- load_gnn(file) # converts loaded object via as.keras() stopifnot(is.GNN(GMMN2), inherits(GMMN2[["model"]], "keras.engine.training.Model")) rm(GMMN1, GMMN2) # clean-up stopifnot(file.remove(file)) }
if(TensorFlow_available()) { # rather restrictive (due to R-Forge, winbuilder) library(gnn) # for being standalone file <- tempfile("foo", fileext = ".rda") GMMN1 <- FNN() save_gnn(GMMN1, file = file) # converts GMMN via as.raw() GMMN2 <- load_gnn(file) # converts loaded object via as.keras() stopifnot(is.GNN(GMMN2), inherits(GMMN2[["model"]], "keras.engine.training.Model")) rm(GMMN1, GMMN2) # clean-up stopifnot(file.remove(file)) }
A simple (and restrictive) check whether TensorFlow is available.
TensorFlow_available()
TensorFlow_available()
Essentially calls "pip list | grep tensorflow"
via
system()
. Only available on non-Windows operating
systems; returns FALSE
on Windows.
logical
indicating whether TensorFlow was found.
Marius Hofert
library(gnn) # for being standalone TensorFlow_available()
library(gnn) # for being standalone TensorFlow_available()
Functions and methods for extracting and printing timings in human-readable format.
as.human(x, fmt = "%.2f") human.time(expr, print = TRUE, ...) ## S3 method for class 'gnn_GNN' time(x, human = FALSE, ...) ## S3 method for class 'gnn_proc_time' print(x, ...)
as.human(x, fmt = "%.2f") human.time(expr, print = TRUE, ...) ## S3 method for class 'gnn_GNN' time(x, human = FALSE, ...) ## S3 method for class 'gnn_proc_time' print(x, ...)
x |
|
fmt |
format string as required by |
expr |
see |
print |
|
human |
|
... |
for |
named character(3)
providing
user, system and elapsed time in human-readable format.
object of class "gnn_proc_time"
.
x
(invisibly).
Marius Hofert
if(TensorFlow_available()) { # rather restrictive (due to R-Forge, winbuilder) library(gnn) # for being standalone human.time(Sys.sleep(0.1)) # print human-readable time (proc.obj <- human.time(Sys.sleep(0.1), print = FALSE)) # save the timing (character(3)) fnn <- FNN() time(fnn) # default print method for objects of class "gnn_proc_time" time(fnn, human = TRUE) # human-readable print method for such objects }
if(TensorFlow_available()) { # rather restrictive (due to R-Forge, winbuilder) library(gnn) # for being standalone human.time(Sys.sleep(0.1)) # print human-readable time (proc.obj <- human.time(Sys.sleep(0.1), print = FALSE)) # save the timing (character(3)) fnn <- FNN() time(fnn) # default print method for objects of class "gnn_proc_time" time(fnn, human = TRUE) # human-readable print method for such objects }
Dimension-reduction transformations applied to an input data matrix. Currently on the principal component transformation and its inverse.
PCA_trafo(x, mu, Gamma, inverse = FALSE, ...)
PCA_trafo(x, mu, Gamma, inverse = FALSE, ...)
x |
|
mu |
if |
Gamma |
if |
inverse |
|
... |
additional arguments passed to the underlying
|
Conceptually, the principal component transformation transforms a
vector to a vector
where
,
where
is the mean vector of
and
is the
-matrix whose
columns contains the orthonormal eigenvectors of
cov(X)
.
The corresponding (conceptual) inverse transformation is
.
See McNeil et al. (2015, Section 6.4.5).
If inverse = TRUE
, the transformed data whose rows contain
, where
is one row of
x
. See the details below for the
notation.
If inverse = FALSE
, a list
containing:
PCs
:-matrix of principal components.
cumvar
:cumulative variances; the th entry
provides the fraction of the explained variance of the first
principal components.
sd
:sample standard deviations of the transformed data.
lambda
:eigenvalues of cov(x)
.
mu
:-vector of centers of
x
(see also
above) typically provided to PCA_trafo(, inverse = TRUE)
.
Gamma
:-matrix of principal axes (see also
above) typically provided to
PCA_trafo(, inverse = TRUE)
.
Marius Hofert
McNeil, A. J., Frey, R., and Embrechts, P. (2015). Quantitative Risk Management: Concepts, Techniques, Tools. Princeton University Press.
library(gnn) # for being standalone ## Generate data library(copula) set.seed(271) X <- qt(rCopula(1000, gumbelCopula(2, dim = 10)), df = 3.5) pairs(X, gap = 0, pch = ".") ## Principal component transformation PCA <- PCA_trafo(X) Y <- PCA$PCs PCA$cumvar[3] # fraction of variance explained by the first 3 principal components which.max(PCA$cumvar > 0.9) # number of principal components it takes to explain 90% ## Biplot (plot of the first two principal components = data transformed with ## the first two principal axes) plot(Y[,1:2]) ## Transform back and compare X. <- PCA_trafo(Y, mu = PCA$mu, Gamma = PCA$Gamma, inverse = TRUE) stopifnot(all.equal(X., X)) ## Note: One typically transforms back with only some of the principal axes X. <- PCA_trafo(Y[,1:3], mu = PCA$mu, # mu determines the dimension to transform to Gamma = PCA$Gamma, # must be of dim. (length(mu), k) for k >= ncol(x) inverse = TRUE) stopifnot(dim(X.) == c(1000, 10)) ## Note: We (typically) transform back to the original dimension. pairs(X., gap = 0, pch = ".") # pairs of back-transformed first three PCs
library(gnn) # for being standalone ## Generate data library(copula) set.seed(271) X <- qt(rCopula(1000, gumbelCopula(2, dim = 10)), df = 3.5) pairs(X, gap = 0, pch = ".") ## Principal component transformation PCA <- PCA_trafo(X) Y <- PCA$PCs PCA$cumvar[3] # fraction of variance explained by the first 3 principal components which.max(PCA$cumvar > 0.9) # number of principal components it takes to explain 90% ## Biplot (plot of the first two principal components = data transformed with ## the first two principal axes) plot(Y[,1:2]) ## Transform back and compare X. <- PCA_trafo(Y, mu = PCA$mu, Gamma = PCA$Gamma, inverse = TRUE) stopifnot(all.equal(X., X)) ## Note: One typically transforms back with only some of the principal axes X. <- PCA_trafo(Y[,1:3], mu = PCA$mu, # mu determines the dimension to transform to Gamma = PCA$Gamma, # must be of dim. (length(mu), k) for k >= ncol(x) inverse = TRUE) stopifnot(dim(X.) == c(1000, 10)) ## Note: We (typically) transform back to the original dimension. pairs(X., gap = 0, pch = ".") # pairs of back-transformed first three PCs
Transformations applied to each marginal component sample to map given data to a different range.
range_trafo(x, lower, upper, inverse = FALSE) logis_trafo(x, mean = 0, sd = 1, slope = 1, intercept = 0, inverse = FALSE)
range_trafo(x, lower, upper, inverse = FALSE) logis_trafo(x, mean = 0, sd = 1, slope = 1, intercept = 0, inverse = FALSE)
x |
|
lower |
value or |
upper |
value or |
mean |
value or |
sd |
value or |
slope |
value or |
intercept |
value or |
inverse |
|
An object as x
containing the componentwise transformed data.
Marius Hofert
library(gnn) # for being standalone ## Generate data n <- 100 set.seed(271) x <- cbind(rnorm(n), (1-runif(n))^(-1/2)-1) # normal and Pareto(2) margins plot(x) ## Range transformation ran <- apply(x, 2, range) # column j = range of the jth column of x x.ran <- range_trafo(x, lower = ran[1,], upper = ran[2,]) # marginally transform to [0,1] plot(x.ran) # => now range [0,1] but points a bit clustered around small y-values x. <- range_trafo(x.ran, lower = ran[1,], upper = ran[2,], inverse = TRUE) # transform back stopifnot(all.equal(x., x)) # check ## Logistic transformation x.logis <- logis_trafo(x) # marginally transform to [0,1] via plogis() plot(x.logis) # => y-range is [1/2, 1] which can be harder to train x. <- logis_trafo(x.logis, inverse = TRUE) # transform back stopifnot(all.equal(x., x)) # check ## Logistic transformation with scaling to all of [0,1] in the second coordinate x.logis.scale <- logis_trafo(x, slope = 2, intercept = -1) plot(x.logis.scale) # => now y-range is scaled to [0,1] x. <- logis_trafo(x.logis.scale, slope = 2, intercept = -1, inverse = TRUE) # transform back stopifnot(all.equal(x., x)) # check ## Logistic transformation with sample mean and standard deviation and then ## transforming the range to [0,1] with a range transformation (note that ## slope = 2, intercept = -1 would not help here as the y-range is not [1/2, 1]) mu <- colMeans(x) sig <- apply(x, 2, sd) x.logis.fit <- logis_trafo(x, mean = mu, sd = sig) # marginally plogis(, location, scale) plot(x.logis.fit) # => y-range is not [1/2, 1] => use range transformation ran <- apply(x.logis.fit, 2, range) x.logis.fit.ran <- range_trafo(x.logis.fit, lower = ran[1,], upper = ran[2,]) plot(x.logis.fit.ran) # => now y-range is [1/2, 1] x. <- logis_trafo(range_trafo(x.logis.fit.ran, lower = ran[1,], upper = ran[2,], inverse = TRUE), mean = mu, sd = sig, inverse = TRUE) # transform back stopifnot(all.equal(x., x)) # check ## Note that for heavy-tailed data, plogis() can fail to stay inside (0,1) ## even with adapting to sample mean and standard deviation. We now present ## a case where we see that using a fitted logistic distribution function ## is *just* good enough to numerically keep the data inside (0,1). set.seed(271) x <- cbind(rnorm(n), (1-runif(n))^(-2)-1) # normal and Pareto(1/2) margins plot(x) # => heavy-tailed in y-coordinate ## Transforming with standard logistic distribution function x.logis <- logis_trafo(x) stopifnot(any(x.logis[,2] == 1)) ## => There is value numerically indistinguishable from 1 to which applying ## the inverse transform will lead to Inf stopifnot(any(is.infinite(logis_trafo(x.logis, inverse = TRUE)))) ## Now adapt the logistic distribution to share the mean and standard deviation ## with the data mu <- colMeans(x) sig <- apply(x, 2, sd) x.logis.scale <- logis_trafo(x, mean = mu, sd = sig) stopifnot(all(x.logis.scale[,2] != 1)) # => no values equal to 1 anymore ## Alternatively, log() the data first, thus working with a log-logistic ## distribution as transformation lx <- cbind(x[,1], log(x[,2])) # 2nd coordinate only lmu <- c(mu[1], mean(lx[,2])) lsig <- c(sig[1], sd(lx[,2])) x.llogis <- logis_trafo(lx, mean = lmu, sd = lsig) x. <- logis_trafo(x.llogis, mean = lmu, sd = lsig, inverse = TRUE) x.. <- cbind(x.[,1], exp(x.[,2])) # undo log() stopifnot(all.equal(x.., x))
library(gnn) # for being standalone ## Generate data n <- 100 set.seed(271) x <- cbind(rnorm(n), (1-runif(n))^(-1/2)-1) # normal and Pareto(2) margins plot(x) ## Range transformation ran <- apply(x, 2, range) # column j = range of the jth column of x x.ran <- range_trafo(x, lower = ran[1,], upper = ran[2,]) # marginally transform to [0,1] plot(x.ran) # => now range [0,1] but points a bit clustered around small y-values x. <- range_trafo(x.ran, lower = ran[1,], upper = ran[2,], inverse = TRUE) # transform back stopifnot(all.equal(x., x)) # check ## Logistic transformation x.logis <- logis_trafo(x) # marginally transform to [0,1] via plogis() plot(x.logis) # => y-range is [1/2, 1] which can be harder to train x. <- logis_trafo(x.logis, inverse = TRUE) # transform back stopifnot(all.equal(x., x)) # check ## Logistic transformation with scaling to all of [0,1] in the second coordinate x.logis.scale <- logis_trafo(x, slope = 2, intercept = -1) plot(x.logis.scale) # => now y-range is scaled to [0,1] x. <- logis_trafo(x.logis.scale, slope = 2, intercept = -1, inverse = TRUE) # transform back stopifnot(all.equal(x., x)) # check ## Logistic transformation with sample mean and standard deviation and then ## transforming the range to [0,1] with a range transformation (note that ## slope = 2, intercept = -1 would not help here as the y-range is not [1/2, 1]) mu <- colMeans(x) sig <- apply(x, 2, sd) x.logis.fit <- logis_trafo(x, mean = mu, sd = sig) # marginally plogis(, location, scale) plot(x.logis.fit) # => y-range is not [1/2, 1] => use range transformation ran <- apply(x.logis.fit, 2, range) x.logis.fit.ran <- range_trafo(x.logis.fit, lower = ran[1,], upper = ran[2,]) plot(x.logis.fit.ran) # => now y-range is [1/2, 1] x. <- logis_trafo(range_trafo(x.logis.fit.ran, lower = ran[1,], upper = ran[2,], inverse = TRUE), mean = mu, sd = sig, inverse = TRUE) # transform back stopifnot(all.equal(x., x)) # check ## Note that for heavy-tailed data, plogis() can fail to stay inside (0,1) ## even with adapting to sample mean and standard deviation. We now present ## a case where we see that using a fitted logistic distribution function ## is *just* good enough to numerically keep the data inside (0,1). set.seed(271) x <- cbind(rnorm(n), (1-runif(n))^(-2)-1) # normal and Pareto(1/2) margins plot(x) # => heavy-tailed in y-coordinate ## Transforming with standard logistic distribution function x.logis <- logis_trafo(x) stopifnot(any(x.logis[,2] == 1)) ## => There is value numerically indistinguishable from 1 to which applying ## the inverse transform will lead to Inf stopifnot(any(is.infinite(logis_trafo(x.logis, inverse = TRUE)))) ## Now adapt the logistic distribution to share the mean and standard deviation ## with the data mu <- colMeans(x) sig <- apply(x, 2, sd) x.logis.scale <- logis_trafo(x, mean = mu, sd = sig) stopifnot(all(x.logis.scale[,2] != 1)) # => no values equal to 1 anymore ## Alternatively, log() the data first, thus working with a log-logistic ## distribution as transformation lx <- cbind(x[,1], log(x[,2])) # 2nd coordinate only lmu <- c(mu[1], mean(lx[,2])) lsig <- c(sig[1], sd(lx[,2])) x.llogis <- logis_trafo(lx, mean = lmu, sd = lsig) x. <- logis_trafo(x.llogis, mean = lmu, sd = lsig, inverse = TRUE) x.. <- cbind(x.[,1], exp(x.[,2])) # undo log() stopifnot(all.equal(x.., x))