diff --git a/DESCRIPTION b/DESCRIPTION index 79d6dc3..741b046 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GetLattesData Title: Reading Bibliometric Data from Lattes Platform -Version: 1.4.4 +Version: 1.5 Authors@R: person("Marcelo", "Perlin", email = "marceloperlin@gmail.com", role = c("aut", "cre")) Description: A simple API for downloading and reading xml data directly from Lattes . Depends: @@ -10,10 +10,13 @@ License: GPL-2 Encoding: UTF-8 BugReports: https://github.com/msperlin/GetLattesData/issues URL: https://github.com/msperlin/GetLattesData/ -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Suggests: knitr, rmarkdown, testthat, ggplot2, - readxl + readxl, + purrr, + xml2, + tibble VignetteBuilder: knitr diff --git a/NEWS.md b/NEWS.md index eac0497..84b01eb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +## Version 1.5 (2024-05-04) + +- added projects in output + ## Version 1.4.4 (2023-11-27) - added date of last update diff --git a/R/gld_get_lattes_data_from_zip.R b/R/gld_get_lattes_data_from_zip.R index 43e94f2..2972790 100644 --- a/R/gld_get_lattes_data_from_zip.R +++ b/R/gld_get_lattes_data_from_zip.R @@ -58,6 +58,7 @@ gld_get_lattes_data_from_zip <- function(zip.files, tbooks <- do.call(args = lapply(my.l, function(x) x$tbooks), what = dplyr::bind_rows) tconferences <- do.call(args = lapply(my.l, function(x) x$tconferences), what = dplyr::bind_rows) t_df_atprof <- do.call(args = lapply(my.l, function(x) x$t_df_atprof), what = dplyr::bind_rows) + tprojects <- do.call(args = lapply(my.l, function(x) x$tprojects), what = dplyr::bind_rows) }) # do Qualis @@ -90,6 +91,7 @@ gld_get_lattes_data_from_zip <- function(zip.files, USE.NAMES=F)) if (!all(is.na(idx))) { + tpublic.published$SJR <- df.sjr$SJR[idx] tpublic.published$H.SJR <- df.sjr$`H index`[idx] @@ -152,6 +154,8 @@ gld_get_lattes_data_from_zip <- function(zip.files, tpublic.accepted$order.aut <- as.numeric(tpublic.accepted$order.aut) tpublic.accepted$n.authors <- as.numeric(tpublic.accepted$n.authors) + tprojects$`ANO-INICIO` <- as.numeric(tprojects$`ANO-INICIO`) + tprojects$`ANO-FIM` <- as.numeric(tprojects$`ANO-FIM`) }) @@ -175,6 +179,8 @@ gld_get_lattes_data_from_zip <- function(zip.files, t_df_atprof <- dplyr::as_tibble(lapply(t_df_atprof, my.enc.fct)) + tprojects <- dplyr::as_tibble(lapply(tprojects, my.enc.fct)) + # return data l.out <- list(tpesq = tpesq, tpublic.published = tpublic.published, @@ -182,7 +188,8 @@ gld_get_lattes_data_from_zip <- function(zip.files, tsupervisions = tsupervisions, tbooks = tbooks, tconferences = tconferences, - t_atprof = t_df_atprof) + t_atprof = t_df_atprof, + tprojects = tprojects) return(l.out) diff --git a/R/gld_read_lattes_zip.R b/R/gld_read_lattes_zip.R index b9d7cf7..6f43f7f 100644 --- a/R/gld_read_lattes_zip.R +++ b/R/gld_read_lattes_zip.R @@ -307,7 +307,6 @@ gld_read_zip <- function(zip.in){ AT_PROF <- do.call(c, list(my.l$`DADOS-GERAIS`$`ATUACOES-PROFISSIONAIS`)) - df_atprof <- dplyr::bind_rows( lapply(AT_PROF, parse_at_prof) ) @@ -321,6 +320,44 @@ gld_read_zip <- function(zip.in){ cat(paste0('\n\tFound ', nrow(df_atprof), ' employment registries')) + # research papers + + my_xml <- xml2::read_xml(zip.in) + + all_proj <- xml2::xml_find_all(my_xml, ".//PROJETO-DE-PESQUISA") + + if (length(all_proj) != 0) { + + l_info <- all_proj |> + xml2::xml_attrs() + + df_projects <- purrr::map_df( + l_info, + function(x) tibble::as_tibble(t(x)) + ) + + df_projects$name <- data.tpesq$name + df_projects$id.file <- basename(zip.in) + + # all_names <- c("SEQUENCIA-PROJETO", "ANO-INICIO", "ANO-FIM", "NOME-DO-PROJETO", + # "SITUACAO", "NATUREZA", "NUMERO-GRADUACAO", "NUMERO-ESPECIALIZACAO", + # "NUMERO-MESTRADO-ACADEMICO", "NUMERO-MESTRADO-PROF", "NUMERO-DOUTORADO", + # "DESCRICAO-DO-PROJETO", "IDENTIFICADOR-PROJETO", "DESCRICAO-DO-PROJETO-INGLES", + # "NOME-DO-PROJETO-INGLES", "FLAG-POTENCIAL-INOVACAO", "NOME-COORDENADOR-CERTIFICACAO", + # "DATA-CERTIFICACAO", "NUMERO_TECNICO_NIVEL_MEDIO") + to_keep <- c("SEQUENCIA-PROJETO", "ANO-INICIO", "ANO-FIM", "NOME-DO-PROJETO", + "SITUACAO", "NATUREZA", "NUMERO-GRADUACAO", "NUMERO-ESPECIALIZACAO", + "NUMERO-MESTRADO-ACADEMICO", "NUMERO-MESTRADO-PROF", "NUMERO-DOUTORADO", + "IDENTIFICADOR-PROJETO", "FLAG-POTENCIAL-INOVACAO", "NOME-COORDENADOR-CERTIFICACAO", + "DATA-CERTIFICACAO", "NUMERO_TECNICO_NIVEL_MEDIO") + + df_projects <- df_projects[, to_keep] + + } else { + df_projects <- dplyr::tibble() + } + + cat(paste0('\n\tFound ', nrow(df_projects), ' projects')) # output my.l <- list(tpesq = data.tpesq, @@ -329,7 +366,8 @@ gld_read_zip <- function(zip.in){ tsupervisions = data.supervisions, tbooks = data.books, tconferences = data.conferences, - t_df_atprof = df_atprof) + t_df_atprof = df_atprof, + tprojects = df_projects) return(my.l) diff --git a/inst/extdata/3262699324398819.zip b/inst/extdata/3262699324398819.zip index 1c6e3c0..c9fd5e3 100644 Binary files a/inst/extdata/3262699324398819.zip and b/inst/extdata/3262699324398819.zip differ diff --git a/vignettes/gld_vignette-ReadLattes.R b/vignettes/gld_vignette-ReadLattes.R index a780d67..06082f7 100644 --- a/vignettes/gld_vignette-ReadLattes.R +++ b/vignettes/gld_vignette-ReadLattes.R @@ -12,7 +12,7 @@ f.in <- c(system.file('extdata/3262699324398819.zip', package = 'GetLattesData') field.qualis = 'ADMINISTRAÇÃO PÚBLICA E DE EMPRESAS, CIÊNCIAS CONTÁBEIS E TURISMO' # get data -l.out <- gld_get_lattes_data_from_zip(f.in, +l.out <- gld_get_lattes_data_from_zip(f.in, field.qualis = field.qualis ) @@ -50,3 +50,5 @@ my.tab <- tpublic.published %>% knitr::kable(my.tab) +tprojects <- l.out$tprojects +dplyr::glimpse(tprojects)