Skip to content

Commit

Permalink
Resolve conflict
Browse files Browse the repository at this point in the history
Merge branch 'add-wordvector' of https://github.com/koheiw/LSX into add-wordvector

# Conflicts:
#	NEWS.md
  • Loading branch information
koheiw committed Dec 13, 2024
2 parents 614b62f + 6600d4a commit e118750
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 10 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: LSX
Type: Package
Title: Semi-Supervised Algorithm for Document Scaling
Version: 1.4.1
Version: 1.4.2
Authors@R: person("Kohei", "Watanabe", email = "[email protected]", role = c("aut", "cre", "cph"))
Description: A word embeddings-based semi-supervised model for document scaling Watanabe (2020) <doi:10.1080/19312458.2020.1832976>.
LSS allows users to analyze large and complex corpora on arbitrary dimensions with seed words exploiting efficiency of word embeddings (SVD, Glove).
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

* Add `as.textmodel_lss()` for objects from the **wordvector** package.
* Reduce dependent packages by moving **rsparse**, **irlba** and *rsvd* to Suggests.
* Fix handling of phrasal patterns in `textplot_terms()`.
* Improve objects created by `as.textmodel_lss.textmodel_lss()`.

## Changes in v1.4.1

Expand Down
4 changes: 3 additions & 1 deletion R/as.textmodel.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ as.textmodel_lss.matrix <- function(x, seeds,
seeds_weighted = seed,
embedding = x,
similarity = simil$seed,
call = try(match.call(sys.function(-1), call = sys.call(-1)), silent = TRUE)
call = try(match.call(sys.function(-1), call = sys.call(-1)), silent = TRUE),
version = utils::packageVersion("LSX")
)
return(result)
}
Expand Down Expand Up @@ -93,6 +94,7 @@ as.textmodel_lss.textmodel_lss <- function(x, ...) {
if (is.null(x$embedding))
stop("x must be a valid textmodel_lss object")
result <- as.textmodel_lss(x$embedding, ...)
result$concatenator <- x$concatenator
result$data <- x$data
result$frequency <- x$frequency[names(result$beta)]
return(result)
Expand Down
11 changes: 11 additions & 0 deletions R/textplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,19 @@ textplot_terms.textmodel_lss <- function(x, highlighted = NULL,
case_insensitive = TRUE,
concatenator = concatenator
)

# flag nested patterns (see quanteda::dfm_lookup)
if (length(ids)) {
m <- factor(names(ids), levels = unique(names(ids)))
dup <- unlist(lapply(split(ids, m), duplicated), use.names = FALSE)
} else {
dup <- logical()
}

key <- attr(ids, "key")
ids <- ids[lengths(ids) == 1 & !dup] # drop phrasal and nested patterns
id <- unlist(ids)

if (!is.null(key) && !is.null(id)) {
temp$group <- factor(names(id[match(temp$id, id)]), levels = key)
} else {
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-as.textmodel.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ test_that("as.textmodel_lss works with textmodel_lss", {
expect_equal(lss$embedding, lss_test$embedding)
expect_identical(lss$data, lss_test$data)
expect_identical(lss$frequency, lss_test$frequency)
expect_identical(names(lss$frequency), names(lss$frequency))
expect_identical(lss$concatenator, lss_test$concatenator)

expect_error(
as.textmodel_lss(lss_test, seed, slice = 100),
Expand Down
14 changes: 7 additions & 7 deletions tests/testthat/test-textplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,18 +100,18 @@ test_that("textplot_terms works even when frequency has zeros (#85)", {
})

test_that("textplot_terms works with dictionary", {
toks <- tokens_compound(toks_test, data_dictionary_LSD2015)
dfmt <- dfm(toks) %>%
dfm_subset(Year > 2000)

dict <- dictionary(list("american" = c("american *"),
"president" = c("president *")))
toks <- tokens_subset(toks_test, Year > 2000) %>%
tokens_compound(dict)
dfmt <- dfm(toks)
seed <- c("nice*" = 1, "positive*" = 1, "bad*" = -1, "negative*" = -1)
suppressWarnings(
lss <- textmodel_lss(dfmt, seed, k = 10)
)
expect_silent(print(
textplot_terms(lss, data_dictionary_LSD2015, max_highlighted = 10)
))
expect_silent(print(
textplot_terms(lss, dictionary(list(phrase = "hard work")))
textplot_terms(lss, dict, max_highlighted = 10)
))
expect_silent(print(
textplot_terms(lss, dictionary(list(none = "xxxxx")))
Expand Down

0 comments on commit e118750

Please sign in to comment.