diff --git a/R/as.textmodel2.R b/R/as.textmodel2.R index 1567753..e82ff1e 100644 --- a/R/as.textmodel2.R +++ b/R/as.textmodel2.R @@ -1,12 +1,20 @@ #' @rdname as.textmodel_lss #' @export +#' @param spatial \[experimental\] if `FALSE`, return a probabilistic model. +#' See the details. +#' @details +#' If `spatial = TRUE`, it return a spatial model; otherwise a probabilistic model. +#' While the polarity scores of words are their cosine similarity to seed words in +#' spatial models, they are predicted probability that the seed words to occur in +#' their contexts. +#' #' @method as.textmodel_lss textmodel_word2vec as.textmodel_lss.textmodel_word2vec <- function(x, seeds, terms = NULL, nested_weight = TRUE, + spatial = FALSE, verbose = FALSE, - spatial = TRUE, ...) { #args <- list(terms = terms, seeds = seeds) diff --git a/R/textmodel_lss.R b/R/textmodel_lss.R index 1fc32ef..cfe8b78 100644 --- a/R/textmodel_lss.R +++ b/R/textmodel_lss.R @@ -51,11 +51,8 @@ #' in `seed_weighted` in the object. #' #' When `x` is a tokens or tokens_xptr object, [wordvector::textmodel_word2vec] -#' is called internally with `type = "skip-gram"` and other arguments passed via `...`. -#' If `spatial = TRUE`, it return a spatial model; otherwise a probabilistic model. -#' While the polarity scores of words are their cosine similarity to seed words in -#' spatial models, they are predicted probability that the seed words to occur in -#' their contexts. The probabilistic models are still experimental, so use them with caution. +#' is called internally with `type = "skip-gram"` and other arguments passed via +#' `...`. #' #' `nested_weight = TRUE` to limit the impact of glob patterns used in seed words. #' When it is `FALSE`, the weights of the seed words are all equal being the inverse of diff --git a/R/textmodel_lss2.R b/R/textmodel_lss2.R index 3a70604..5a67c65 100644 --- a/R/textmodel_lss2.R +++ b/R/textmodel_lss2.R @@ -1,5 +1,4 @@ #' @rdname textmodel_lss -#' @param spatial \[experimental\] if `FALSE`, return a probabilistic model. See the details. #' @export #' @inheritParams wordvector::textmodel_word2vec #' @importFrom quanteda dfm dfm_group @@ -10,7 +9,6 @@ textmodel_lss.tokens <- function(x, seeds, terms = NULL, k = 200, nested_weight = TRUE, include_data = FALSE, group_data = FALSE, - spatial = TRUE, verbose = FALSE, ...) { @@ -25,8 +23,8 @@ textmodel_lss.tokens <- function(x, seeds, terms = NULL, k = 200, w2v <- wordvector::textmodel_word2vec(x, dim = k, min_count = min_count, type = "skip-gram", tolower = tolower, normalize = FALSE, verbose = verbose, ...) - result <- as.textmodel_lss(w2v, seeds = seeds, terms = terms, spatial = spatial, - nested_weight = nested_weight, verbose = FALSE) + result <- as.textmodel_lss(w2v, seeds = seeds, terms = terms, + nested_weight = nested_weight, verbose = FALSE, ...) result$type <- "word2vec" result$call <- try(match.call(sys.function(-1), call = sys.call(-1)), silent = TRUE) diff --git a/man/as.textmodel_lss.Rd b/man/as.textmodel_lss.Rd index f3913c6..be1f4e3 100644 --- a/man/as.textmodel_lss.Rd +++ b/man/as.textmodel_lss.Rd @@ -30,8 +30,8 @@ as.textmodel_lss(x, ...) seeds, terms = NULL, nested_weight = TRUE, + spatial = FALSE, verbose = FALSE, - spatial = TRUE, ... ) } @@ -63,7 +63,8 @@ the number of glob pattern matches.} \item{verbose}{show messages if \code{TRUE}.} -\item{spatial}{if \code{TRUE}, return a spatial model. Otherwise, a probabilistic model.} +\item{spatial}{[experimental] if \code{FALSE}, return a probabilistic model. +See the details.} } \value{ a dummy \link{textmodel_lss} object @@ -79,4 +80,9 @@ if it was trained originally using SVD. If \code{x} is a dense matrix, it is treated as a column-oriented word vectors with which polarity of words are computed. If \code{x} is a named numeric vector, the values are treated as polarity scores of the words in the names. + +If \code{spatial = TRUE}, it return a spatial model; otherwise a probabilistic model. +While the polarity scores of words are their cosine similarity to seed words in +spatial models, they are predicted probability that the seed words to occur in +their contexts. } diff --git a/man/as.textmodel_lss.textmodel_wordvector.Rd b/man/as.textmodel_lss.textmodel_wordvector.Rd index c351f5b..00b8d58 100644 --- a/man/as.textmodel_lss.textmodel_wordvector.Rd +++ b/man/as.textmodel_lss.textmodel_wordvector.Rd @@ -9,8 +9,8 @@ seeds, terms = NULL, nested_weight = TRUE, + spatial = FALSE, verbose = FALSE, - spatial = TRUE, ... ) } diff --git a/man/textmodel_lss.Rd b/man/textmodel_lss.Rd index cf03965..c8286db 100644 --- a/man/textmodel_lss.Rd +++ b/man/textmodel_lss.Rd @@ -52,7 +52,6 @@ textmodel_lss(x, ...) nested_weight = TRUE, include_data = FALSE, group_data = FALSE, - spatial = TRUE, verbose = FALSE, ... ) @@ -112,8 +111,6 @@ size of the corpus. Used only when \code{x} is a \code{fcm}.} this in \code{x} are removed before training.} \item{tolower}{if \code{TRUE}, lower-case all the words in the model.} - -\item{spatial}{[experimental] if \code{FALSE}, return a probabilistic model. See the details.} } \description{ Latent Semantic Scaling (LSS) is a semi-supervised algorithm for document scaling based on @@ -141,11 +138,8 @@ between seed words' computed and original scores. Weighted scores are saved in \code{seed_weighted} in the object. When \code{x} is a tokens or tokens_xptr object, \link[wordvector:textmodel_word2vec]{wordvector::textmodel_word2vec} -is called internally with \code{type = "skip-gram"} and other arguments passed via \code{...}. -If \code{spatial = TRUE}, it return a spatial model; otherwise a probabilistic model. -While the polarity scores of words are their cosine similarity to seed words in -spatial models, they are predicted probability that the seed words to occur in -their contexts. The probabilistic models are still experimental, so use them with caution. +is called internally with \code{type = "skip-gram"} and other arguments passed via +\code{...}. \code{nested_weight = TRUE} to limit the impact of glob patterns used in seed words. When it is \code{FALSE}, the weights of the seed words are all equal being the inverse of diff --git a/tests/testthat/test-as.textmodel2.R b/tests/testthat/test-as.textmodel2.R index 2bbe729..1bbba1f 100644 --- a/tests/testthat/test-as.textmodel2.R +++ b/tests/testthat/test-as.textmodel2.R @@ -7,7 +7,7 @@ test_that("as.textmodel_lss works with textmodel_wordvector", { # spatial wdv <- readRDS("../data/word2vec.RDS") - lss <- as.textmodel_lss(wdv, seed) + lss <- as.textmodel_lss(wdv, seed, spatial = TRUE) expect_equal(lss$beta_type, "similarity") expect_equal(lss$embedding, t(wdv$values)) diff --git a/tests/testthat/test-textmodel_lss2.R b/tests/testthat/test-textmodel_lss2.R index e21f6a8..c7e8d0c 100644 --- a/tests/testthat/test-textmodel_lss2.R +++ b/tests/testthat/test-textmodel_lss2.R @@ -14,7 +14,7 @@ test_that("textmodel_lss works when spatial = TRUE", { skip_on_cran() # without data - lss1 <- textmodel_lss(toks_test, seed_test, k = 10) + lss1 <- textmodel_lss(toks_test, seed_test, k = 10, spatial = TRUE) expect_s3_class(lss1, "textmodel_lss") expect_equal(lss1$k, 10) @@ -30,7 +30,8 @@ test_that("textmodel_lss works when spatial = TRUE", { ) # with data - lss2 <- textmodel_lss(toks_test, seed_test, k = 10, include_data = TRUE) + lss2 <- textmodel_lss(toks_test, seed_test, k = 10, include_data = TRUE, + spatial = TRUE) expect_s3_class(lss2, "textmodel_lss") expect_equal(lss2$concatenator, concatenator(toks_test)) @@ -42,7 +43,7 @@ test_that("textmodel_lss works when spatial = TRUE", { # with terms lss3 <- textmodel_lss(toks_test, seed_test, k = 10, terms = feat_test, - include_data = TRUE, group_data = TRUE) + include_data = TRUE, group_data = TRUE, spatial = TRUE) expect_s3_class(lss3, "textmodel_lss") expect_true(all(names(lss3$beta) %in% feat_test)) @@ -53,7 +54,7 @@ test_that("textmodel_lss works when spatial = TRUE", { # with tokens_xptr lss4 <- textmodel_lss(as.tokens_xptr(toks_test), seed_test, k = 10, - include_data = TRUE) + include_data = TRUE, spatial = TRUE) expect_s3_class(lss4, "textmodel_lss") expect_equal(docnames(lss4$data), docnames(toks_test)) @@ -61,7 +62,7 @@ test_that("textmodel_lss works when spatial = TRUE", { # warning expect_warning( textmodel_lss(toks_test, seed_test, k = 10, - include_data = FALSE, group_data = TRUE), + include_data = FALSE, group_data = TRUE, spatial = TRUE), "group_data is ignored when include_data = FALSE" ) @@ -99,7 +100,8 @@ test_that("textmodel_lss works when spatial = FALSE", { ) # with data - lss2 <- textmodel_lss(toks_test, seed_test, k = 10, include_data = TRUE, spatial = FALSE) + lss2 <- textmodel_lss(toks_test, seed_test, k = 10, include_data = TRUE, + spatial = FALSE) expect_s3_class(lss2, "textmodel_lss") expect_equal(lss2$concatenator, concatenator(toks_test))