Skip to content

Commit

Permalink
Merge pull request #1452 from dbetebenner/master
Browse files Browse the repository at this point in the history
Fixing/creating createSuperCohort function
  • Loading branch information
dbetebenner authored Oct 10, 2024
2 parents 35a08bf + cf1166c commit 3bdc641
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 69 deletions.
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ authors:
given-names: "Shang"
title: "SGP: Student Growth Percentiles & Percentile Growth Trajectories"
<<<<<<< HEAD
version: 2.2-0.0
version: 2.2-0.1
doi: 10.5281/zenodo.3634024
date-released: 2024-10-6
date-released: 2024-10-10
url: "https://sgp.io"
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: SGP
Type: Package
Title: Student Growth Percentiles & Percentile Growth Trajectories
Version: 2.2-0.0
Date: 2024-10-6
Version: 2.2-0.1
Date: 2024-10-10
Authors@R: c(person(given=c("Damian", "W."), family="Betebenner", email="dbetebenner@nciea.org", role=c("aut", "cre")),
person(given=c("Adam", "R."), family="Van Iwaarden", email="avaniwaarden@nciea.org", role="aut"),
person(given="Ben", family="Domingue", email="ben.domingue@gmail.com", role="aut"),
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# SGP 2.2-0.0

## User Visible Changes:

* Alignment of scale score targets with time frame of projections
* Alignment of scale score targets with cutscores
* General bug fixes and meta-data updates to SGPstateData

# SGP 2.1-0.0

## User Visible Changes:
Expand Down
100 changes: 40 additions & 60 deletions R/createSuperCohortData.R
Original file line number Diff line number Diff line change
@@ -1,62 +1,42 @@
`createSuperCohortData` <-
`createSuperCohortData` <-
function(
dataForSuperCohort,
base_data,
sgp.config,
simex.baseline.config,
content_areas,
years,
grades,
baseline.grade.sequences.lags,
exclude.years) {

VALID_CASE <- CONTENT_AREA <- YEAR <- GRADE <- YEAR_WITHIN <- COHORT_YEAR <- NULL


### Utility functions

test.year.sequence <- function(content_areas, years, grades, baseline.grade.sequences.lags=NULL) {
grades <- type.convert(as.character(grades), as.is=TRUE)
if (is.null(baseline.grade.sequences.lags)) baseline.grade.sequences.lags <- rep(1L, length(grades)-1L)
tmp.years.sequence <- list()
tmp.years.sequence <- lapply(years, function(x) yearIncrement(year=x, increment=c(0,cumsum(baseline.grade.sequences.lags))))
return(tmp.years.sequence[sapply(tmp.years.sequence, function(x) all(x %in% years))])
} ### END test.year.sequence

variables.to.get=c("VALID_CASE", "YEAR", "CONTENT_AREA", "GRADE", "ID", "SCALE_SCORE", "ACHIEVEMENT_LEVEL", "YEAR_WITHIN", "FIRST_OBSERVATION",
"LAST_OBSERVATION", simex.baseline.config$csem.data.vnames)
tmp_sgp_data_for_analysis <- dataForSuperCohort[,intersect(names(dataForSuperCohort), variables.to.get), with=FALSE]["VALID_CASE"]

if ("YEAR_WITHIN" %in% names(tmp_sgp_data_for_analysis)) {
setkey(tmp_sgp_data_for_analysis, VALID_CASE, CONTENT_AREA, YEAR, GRADE, YEAR_WITHIN)
year_within.tf <- TRUE
} else {
setkey(tmp_sgp_data_for_analysis, VALID_CASE, CONTENT_AREA, YEAR, GRADE)
year_within.tf <- FALSE
}

tmp.year.sequence <- test.year.sequence(content_areas, years, grades, baseline.grade.sequences.lags)

if (!is.null(exclude.years)) {
tmp.year.sequence <- tmp.year.sequence[sapply(tmp.year.sequence, function(x) !tail(x, 1) %in% exclude.years)]
}

tmp.list <- list()
for (cohort.iter in seq_along(tmp.year.sequence)) {
tmp.sgp.iter <- sgp.config[[cohort.iter]] # Convert sgp.config into a valid sgp.iter for getPanelData
names(tmp.sgp.iter) <- gsub('sgp.baseline.', 'sgp.', names(tmp.sgp.iter))
tmp.sgp.iter$sgp.panel.years <- tmp.year.sequence[[cohort.iter]]
tmp.sgp.iter$sgp.grade.sequences <- tmp.sgp.iter$sgp.grade.sequences
if (!is.null(tmp.sgp.iter$sgp.exclude.sequences)) tmp.sgp.iter$sgp.exclude.sequences <- tmp.sgp.iter$sgp.exclude.sequences[COHORT_YEAR %in% tail(tmp.sgp.iter$sgp.panel.years, 1L)]
if (!is.null(simex.baseline.config)) sgp.csem <- simex.baseline.config$csem.data.vnames else sgp.csem <- NULL
tmp.list[[cohort.iter]] <- getPanelData(tmp_sgp_data_for_analysis, "sgp.percentiles", sgp.iter = tmp.sgp.iter, sgp.csem=sgp.csem)
setnames(tmp.list[[cohort.iter]], c("ID",
paste("GRADE", rev(seq_along(tmp.year.sequence[[cohort.iter]])), sep="_"),
paste("SCALE_SCORE", rev(seq_along(tmp.year.sequence[[cohort.iter]])), sep="_"),
if(year_within.tf) paste("YEAR_WITHIN", rev(seq_along(tmp.year.sequence[[cohort.iter]])), sep="_"),
if(!is.null(sgp.csem)) paste(sgp.csem, rev(seq_along(tmp.year.sequence[[cohort.iter]])), sep="_")))
}
tmp.dt <- rbindlist(tmp.list, fill=TRUE)
if (year_within.tf) tmp.dt[, grep("YEAR_WITHIN", names(tmp.dt)) := NULL] # remove YEAR_WITHIN from Data where relevant
setkey(tmp.dt)
return(tmp.dt)
} ### END createSuperCohortData
target_years, ## Provide either target_years OR num_priors. length(target_years) = num_priors + 1
num_priors, ## Provide either num_priors OR target_years. num_priors = length(target_years) - 1
indicate_cohort=FALSE
) {

### Parameters
data.years <- sort(unique(base_data$YEAR))
tmp.cohort.list <- list()

### Test parameters
if (missing(target_years) & missing(num_priors)) stop("Provide either targets years for super-cohort or number of priors of desired SGP analyses.")
if (missing(target_years) & !missing(num_priors)) target_years <- tail(data.years, num_priors)
if (length(target_years) >= length(data.years)) stop("Super-cohort construction is possible only when number of target_years is less than number of years in supplied data.")

### Loop over configurations
for (sgp.config.iter in sgp.config) {
tmp.list <- list()
for (data.years.iter in 1:(length(data.years) - sum(sgp.config.iter[['sgp.grade.sequences.lags']]))) {
grade_year_content_area_map <- data.table(
CONTENT_AREA = sgp.config.iter[['sgp.content.areas']],
YEAR = data.years[c(data.years.iter, data.years.iter + (cumsum(sgp.config.iter[['sgp.grade.sequences.lags']])))],
GRADE = sgp.config.iter[['sgp.grade.sequences']],
YEAR_NEW = tail(data.years, length(sgp.config.iter[['sgp.content.areas']])))

tmp.list[[data.years.iter]] <- base_data[grade_year_content_area_map, on=list(CONTENT_AREA, YEAR, GRADE)][,YEAR:=YEAR_NEW][,YEAR_NEW:=NULL]

if (indicate_cohort) {
tmp.list[[data.years.iter]][,COHORT:=paste(unlist(grade_year_content_area_map[,1:3]), collapse="_")]
}

tmp.dt <- rbindlist(tmp.list)
setkey(tmp.dt, YEAR, GRADE, ID)
tmp.dt <- tmp.dt[unique(tmp.dt[,.(YEAR, GRADE, ID)]), mult="last"] ### Remove duplicates created by collapsing data into YEAR_NEW taking LAST (most recent) case
}
tmp.cohort.list[[paste(sgp.config.iter[['sgp.content.areas']][1], paste(sgp.config.iter[['sgp.grade.sequences']], collapse=""), sep="_")]] <- tmp.dt
}
return(rbindlist(tmp.cohort.list))
} ### END createSuperCohortData
2 changes: 1 addition & 1 deletion R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ function(libname, pkgname) {
`.onAttach` <-
function(libname, pkgname) {
if (interactive()) {
packageStartupMessage(magenta$bold('SGP',paste(paste0(unlist(strsplit(as.character(packageVersion("SGP")), "[.]")), c(".", "-", ".", "")), collapse=""),' (10-6-2024). For help: >help("SGP") or visit sgp.io'))
packageStartupMessage(magenta$bold('SGP',paste(paste0(unlist(strsplit(as.character(packageVersion("SGP")), "[.]")), c(".", "-", ".", "")), collapse=""),' (10-10-2024). For help: >help("SGP") or visit sgp.io'))
}
}
4 changes: 2 additions & 2 deletions inst/CITATION
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ bibentry(
person(given = c("Yi"), family = "Shang")
),
year = "2024",
note = "R package version 2.2-0.0",
note = "R package version 2.2-0.1",
url = "https://sgp.io",
textVersion = paste(
"Damian W. Betebenner, Adam R. Van Iwaarden, Benjamin Domingue and Yi Shang (2024).",
"SGP: Student Growth Percentiles & Percentile Growth Trajectories.",
"(R package version 2.2-0.0)",
"(R package version 2.2-0.1)",
"URL: https://sgp.io"
)
)
8 changes: 8 additions & 0 deletions inst/NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# SGP 2.2-0.0

## User Visible Changes:

* Alignment of scale score targets with time frame of projections
* Alignment of scale score targets with cutscores
* General bug fixes and meta-data updates to SGPstateData

# SGP 2.1-0.0

## User Visible Changes:
Expand Down
4 changes: 2 additions & 2 deletions man/SGP-package.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ growth projections to be calculated across assessment transitions by equating th
\tabular{ll}{
Package: \tab SGP\cr
Type: \tab Package\cr
Version: \tab 2.2-0.0\cr
Date: \tab 2024-10-6\cr
Version: \tab 2.2-0.1\cr
Date: \tab 2024-10-10\cr
License: \tab GPL-3\cr
LazyLoad: \tab yes\cr
}
Expand Down

0 comments on commit 3bdc641

Please sign in to comment.