Skip to content

Commit

Permalink
added projetcts (research and extension) in output
Browse files Browse the repository at this point in the history
  • Loading branch information
msperlin committed May 4, 2024
1 parent 252aeee commit e84174c
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 7 deletions.
9 changes: 6 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: GetLattesData
Title: Reading Bibliometric Data from Lattes Platform
Version: 1.4.4
Version: 1.5
Authors@R: person("Marcelo", "Perlin", email = "marceloperlin@gmail.com", role = c("aut", "cre"))
Description: A simple API for downloading and reading xml data directly from Lattes <http://lattes.cnpq.br/>.
Depends:
Expand All @@ -10,10 +10,13 @@ License: GPL-2
Encoding: UTF-8
BugReports: https://github.com/msperlin/GetLattesData/issues
URL: https://github.com/msperlin/GetLattesData/
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Suggests: knitr,
rmarkdown,
testthat,
ggplot2,
readxl
readxl,
purrr,
xml2,
tibble
VignetteBuilder: knitr
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## Version 1.5 (2024-05-04)

- added projects in output

## Version 1.4.4 (2023-11-27)

- added date of last update
Expand Down
9 changes: 8 additions & 1 deletion R/gld_get_lattes_data_from_zip.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ gld_get_lattes_data_from_zip <- function(zip.files,
tbooks <- do.call(args = lapply(my.l, function(x) x$tbooks), what = dplyr::bind_rows)
tconferences <- do.call(args = lapply(my.l, function(x) x$tconferences), what = dplyr::bind_rows)
t_df_atprof <- do.call(args = lapply(my.l, function(x) x$t_df_atprof), what = dplyr::bind_rows)
tprojects <- do.call(args = lapply(my.l, function(x) x$tprojects), what = dplyr::bind_rows)
})

# do Qualis
Expand Down Expand Up @@ -90,6 +91,7 @@ gld_get_lattes_data_from_zip <- function(zip.files,
USE.NAMES=F))

if (!all(is.na(idx))) {

tpublic.published$SJR <- df.sjr$SJR[idx]
tpublic.published$H.SJR <- df.sjr$`H index`[idx]

Expand Down Expand Up @@ -152,6 +154,8 @@ gld_get_lattes_data_from_zip <- function(zip.files,
tpublic.accepted$order.aut <- as.numeric(tpublic.accepted$order.aut)
tpublic.accepted$n.authors <- as.numeric(tpublic.accepted$n.authors)

tprojects$`ANO-INICIO` <- as.numeric(tprojects$`ANO-INICIO`)
tprojects$`ANO-FIM` <- as.numeric(tprojects$`ANO-FIM`)

})

Expand All @@ -175,14 +179,17 @@ gld_get_lattes_data_from_zip <- function(zip.files,

t_df_atprof <- dplyr::as_tibble(lapply(t_df_atprof, my.enc.fct))

tprojects <- dplyr::as_tibble(lapply(tprojects, my.enc.fct))

# return data
l.out <- list(tpesq = tpesq,
tpublic.published = tpublic.published,
tpublic.accepted = tpublic.accepted,
tsupervisions = tsupervisions,
tbooks = tbooks,
tconferences = tconferences,
t_atprof = t_df_atprof)
t_atprof = t_df_atprof,
tprojects = tprojects)

return(l.out)

Expand Down
42 changes: 40 additions & 2 deletions R/gld_read_lattes_zip.R
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,6 @@ gld_read_zip <- function(zip.in){

AT_PROF <- do.call(c, list(my.l$`DADOS-GERAIS`$`ATUACOES-PROFISSIONAIS`))


df_atprof <- dplyr::bind_rows(
lapply(AT_PROF, parse_at_prof)
)
Expand All @@ -321,6 +320,44 @@ gld_read_zip <- function(zip.in){

cat(paste0('\n\tFound ', nrow(df_atprof), ' employment registries'))

# research papers

my_xml <- xml2::read_xml(zip.in)

all_proj <- xml2::xml_find_all(my_xml, ".//PROJETO-DE-PESQUISA")

if (length(all_proj) != 0) {

l_info <- all_proj |>
xml2::xml_attrs()

df_projects <- purrr::map_df(
l_info,
function(x) tibble::as_tibble(t(x))
)

df_projects$name <- data.tpesq$name
df_projects$id.file <- basename(zip.in)

# all_names <- c("SEQUENCIA-PROJETO", "ANO-INICIO", "ANO-FIM", "NOME-DO-PROJETO",
# "SITUACAO", "NATUREZA", "NUMERO-GRADUACAO", "NUMERO-ESPECIALIZACAO",
# "NUMERO-MESTRADO-ACADEMICO", "NUMERO-MESTRADO-PROF", "NUMERO-DOUTORADO",
# "DESCRICAO-DO-PROJETO", "IDENTIFICADOR-PROJETO", "DESCRICAO-DO-PROJETO-INGLES",
# "NOME-DO-PROJETO-INGLES", "FLAG-POTENCIAL-INOVACAO", "NOME-COORDENADOR-CERTIFICACAO",
# "DATA-CERTIFICACAO", "NUMERO_TECNICO_NIVEL_MEDIO")
to_keep <- c("SEQUENCIA-PROJETO", "ANO-INICIO", "ANO-FIM", "NOME-DO-PROJETO",
"SITUACAO", "NATUREZA", "NUMERO-GRADUACAO", "NUMERO-ESPECIALIZACAO",
"NUMERO-MESTRADO-ACADEMICO", "NUMERO-MESTRADO-PROF", "NUMERO-DOUTORADO",
"IDENTIFICADOR-PROJETO", "FLAG-POTENCIAL-INOVACAO", "NOME-COORDENADOR-CERTIFICACAO",
"DATA-CERTIFICACAO", "NUMERO_TECNICO_NIVEL_MEDIO")

df_projects <- df_projects[, to_keep]

} else {
df_projects <- dplyr::tibble()
}

cat(paste0('\n\tFound ', nrow(df_projects), ' projects'))

# output
my.l <- list(tpesq = data.tpesq,
Expand All @@ -329,7 +366,8 @@ gld_read_zip <- function(zip.in){
tsupervisions = data.supervisions,
tbooks = data.books,
tconferences = data.conferences,
t_df_atprof = df_atprof)
t_df_atprof = df_atprof,
tprojects = df_projects)

return(my.l)

Expand Down
Binary file modified inst/extdata/3262699324398819.zip
Binary file not shown.
4 changes: 3 additions & 1 deletion vignettes/gld_vignette-ReadLattes.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ f.in <- c(system.file('extdata/3262699324398819.zip', package = 'GetLattesData')
field.qualis = 'ADMINISTRAÇÃO PÚBLICA E DE EMPRESAS, CIÊNCIAS CONTÁBEIS E TURISMO'

# get data
l.out <- gld_get_lattes_data_from_zip(f.in,
l.out <- gld_get_lattes_data_from_zip(f.in,
field.qualis = field.qualis )


Expand Down Expand Up @@ -50,3 +50,5 @@ my.tab <- tpublic.published %>%

knitr::kable(my.tab)

tprojects <- l.out$tprojects
dplyr::glimpse(tprojects)

0 comments on commit e84174c

Please sign in to comment.