sbegueria · muschellij2 · Oct 16, 2018 · Oct 16, 2018
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,8 @@
+^codecov\.yml$
+^\.travis\.yml$
+^appveyor\.yml$
+^LICENSE\.md$
+^blah\.Rproj$
+^\.Rproj\.user$
+^README\.Rmd$
+^.*\.Rproj$
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+inst/doc
+.Rproj.user
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,17 @@
+language: R
+sudo: no
+cache: packages
+after_success: if [ "$TRAVIS_OS_NAME" == "linux" ]; then Rscript -e 'covr::codecov(type
+  = "all")'; fi
+warnings_are_errors: yes
+before_deploy: if [ "$TRAVIS_OS_NAME" == "osx" ]; then rm -f *.tar.gz; fi
+r_check_args: --as-cran --install-args=--build
+deploy:
+  provider: releases
+  skip_cleanup: yes
+  file_glob: yes
+  file: blah*.t*gz
+  'on':
+    tags: yes
+    repo: muschellij2/blah
+
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,31 @@
+Type: Package
+Package: bibliometRics
+Title: Bibliometric Analysis of Scientific Production
+Version: 0.1.0
+Authors@R: 
+    c(
+        person(given ="Santiago", family = "Beguería", 
+        role = c("aut", "cre"),
+        email = "santiago.begueria@csic.es",
+        comment = c(ORCID = "0000-0002-3974-2947")),
+        person(given = "John",
+           family = "Muschelli",
+           role = c("aut"),
+           email = "muschellij2@gmail.com",
+           comment = c(ORCID = "0000-0001-6469-1750"))
+    )
+Description: Analyzes the production of a single author, a
+    working team, or department from the perspective of citations.
+License: GPL-3
+Imports:
+    utils,
+    graphics
+Suggests: 
+    covr,
+    knitr,
+    rmarkdown,
+    testthat
+Encoding: UTF-8
+LazyData: true
+VignetteBuilder: knitr
+RoxygenNote: 6.1.0
diff --git a/LICENSE b/LICENSE
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,15 @@
+# Generated by roxygen2: do not edit by hand
+
+export(bibliometric)
+export(biblioplot)
+export(biblioplot2)
+export(citation_rank)
+export(egghe)
+export(format_pub)
+export(hirsch)
+export(ifactor)
+export(read.baselines)
+export(read.wos)
+import(graphics)
+importFrom(tools,toTitleCase)
+importFrom(utils,read.table)
diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,3 @@
+# blah 0.1.0
+
+* Added a `NEWS.md` file to track changes to the package.
diff --git a/R/bibliometRics.R b/R/bibliometRics.R
@@ -0,0 +1,182 @@
+
+
+
+
+# read.scopus <- function(infile) {
+
+	# # author
+	# au <- read.table(infile,sep=',',nrows=1,stringsAsFactors=FALSE)[,2]
+	# au <- substr(au,9,nchar(au))
+
+	# # read publications
+	# dat <- read.table(infile,sep=',',stringsAsFactors=FALSE,skip=7)[,c(1:7,9,12)]
+	# colnames(dat) <- c('year','title','authors','issn','journal','volume','issue','cit','cittotal')
+
+	# return()
+# }
+# bib <- read.scopus('scopus_SBP_win.csv')
+
+
+#' Calculate Bibliometrics
+#'
+#' @param bib A list with an element called  \code{pubs}, which is a
+#' \code{data.frame} with columns \code{'Publication Year'} and
+#' \code{'Total Citations'}.  Usually
+#' from \code{\link{read.wos}}
+#' @param base Baseline percentages
+#' @param discipline Name of Discipline, must be in column names of
+#' \code{base}
+#'
+#' @return A \code{data.frame} of metric
+#' @export
+#'
+#' @examples
+#' wos_file = system.file("sbegueria.txt", package = "bibliometRics")
+#' bib <- read.wos(wos_file)
+#' bib$author = gsub('[ñÑ]', 'N', bib$author)
+#' bib$pubs$Authors = gsub('[ñÑ]', 'N', bib$pubs$Authors)
+#'
+#' base = read.baselines()
+#' discipline = 'GEOSCIENCES'
+#' bibliometric(bib, base, 'GEOSCIENCES')
+bibliometric <-
+  function(bib,
+           base = read.baselines(),
+           discipline = NULL) {
+    if (!is.null(discipline)) {
+      w <- grep(discipline, names(base))
+    } else {
+      w <- 1
+    }
+    quant <- base[[w]]
+
+    au <- bib$author
+    dat <- bib$pubs
+
+    # publications
+    pubs <- nrow(dat)
+
+    # publications as lead author
+    leads <-
+      as.character(lapply(dat[, 'Authors'], function(x) {
+        strsplit(x, ';')[[1]][1]
+      }))
+    auu <- strsplit(au, ',')[[1]][1]
+    pubs_lead <- grep(auu, leads, ignore.case = TRUE)
+    #	pubs_lead <- length(grep(auu,leads,ignore.case=TRUE))
+
+    # impact factor
+    ifact2 <- round(ifactor(bib, n = 2), 2)
+    ifact5 <- round(ifactor(bib, n = 5), 2)
+
+    # citations
+    cit_tot <- sum(dat[, 'Total Citations'])
+    cit_max <- max(dat[, 'Total Citations'])
+    cit_art <- round(mean(dat[, 'Total Citations']), 2)
+    ini <- min(dat[, 'Publication Year'])
+    span <- max(dat[, 'Publication Year']) - ini
+
+    # number of pubs with n citations
+    icit <- function(x, n = 10) {
+      length(which(x[, 'Total Citations'] >= n))
+    }
+    i10 <- icit(dat)
+    i25 <- icit(dat, 25)
+    i50 <- icit(dat, 50)
+
+    # h-index
+    hin <- hirsch(bib)
+
+    # g-index
+    gin <- egghe(bib)
+
+    # pubs >0.9, >0.99
+    r <- citation_rank(bib, quant = quant)
+    p09 <- sum(as.numeric(r) <= 4, na.rm = TRUE)
+    p099 <- sum(as.numeric(r) <= 3, na.rm = TRUE)
+
+    # pubs >0.9, as lead author
+    r_lead <- citation_rank(bib, pubs_lead, quant)
+    p09_lead <- sum(as.numeric(r_lead) <= 4, na.rm = TRUE)
+
+    # i-score
+    scores <-
+      c(1 / (1 - 0.9999),
+        1 / (1 - 0.999),
+        1 / (1 - 0.99),
+        1 / (1 - 0.9),
+        1 / (1 - 0.8),
+        1 / (1 - 0.5),
+        1 / 1)
+    iscore <- sum(scores[as.numeric(r)], na.rm = TRUE)
+
+    # i-score, as lead author
+    isc_lead <- sum(scores[as.numeric(r_lead)], na.rm = TRUE)
+
+    # output
+    out <- as.data.frame(t(
+      c(
+        name = au,
+        ini,
+        span,
+        pubs,
+        length(pubs_lead),
+        round(pubs / span, 2),
+        hin,
+        round(hin / span, 2),
+        gin,
+        round(gin / span, 2),
+        cit_tot,
+        round(cit_tot / span, 2),
+        cit_art,
+        ifact2,
+        ifact5,
+        i10,
+        i25,
+        i50,
+        cit_max,
+        p09,
+        p09_lead,
+        p099,
+        round(iscore, 2),
+        round(isc_lead, 2)
+      )
+    ))
+    colnames(out) <-
+      c(
+        'name',
+        'ini',
+        'years',
+        'pubs',
+        'lead',
+        'pubs_year',
+        'hin',
+        'hin_year',
+        'gin',
+        'gin_year',
+        'cit_tot',
+        'cit_year',
+        'cit_art',
+        'ifact2',
+        'ifact5',
+        'i10',
+        'i25',
+        'i50',
+        'cit_max',
+        'pubs09',
+        'pubs09_lead',
+        'pubs099',
+        'iscore',
+        'iscore_lead'
+      )
+    return(out)
+  }
+#bibliometric(bib, baseline)
+
+
+
+
+
+
+
+
diff --git a/R/citation_rank.R b/R/citation_rank.R
@@ -0,0 +1,58 @@
+
+#' Calculate citation ranking (quantiles)
+#'
+#' @param x A list with an element called  \code{pubs}, which is a
+#' \code{data.frame} with columns \code{'Publication Year'} and
+#' \code{'Total Citations'}.  Usually
+#' from \code{\link{read.wos}}
+#' @param w column indicators of which quantiles to grab
+#' @param quant A \code{data.frame}? maybe
+#'
+#' @return A scalar number
+#' @export
+#'
+#' @examples
+#' wos_file = system.file("sbegueria.txt", package = "bibliometRics")
+#' bib <- read.wos(wos_file)
+#' hirsch(bib)
+#' hirsch(bib, 2010)
+#' egghe(bib)
+#' egghe(bib, 2010)
+#' base = read.baselines()
+#' citation_rank(bib, quant=base$GEOSCIENCES)
+citation_rank <- function(x, w = NULL, quant = NULL) {
+  x <- x$pubs
+  if (!is.null(w)) {
+    x <- x[w, ]
+  }
+  r <-
+    data.frame(a = x[, 'Publication Year'],
+               b = x[, 'Total Citations'],
+               c = '',
+               stringsAsFactors = FALSE)
+  r$c <-
+    factor(r$c,
+           levels = c(
+             '>q0.9999',
+             '>q0.999',
+             '>q0.99',
+             '>q0.9',
+             '>q0.8',
+             '>q0.5',
+             '>q0'
+           ))
+  for (i in 1:nrow(r)) {
+    w <- grep(r[i, 1], names(quant))
+    if (length(w) == 0)
+      next()
+    ww <- which(r[i, 2] - quant[, w] >= 0)[1]
+    if (is.na(ww)) {
+      r[i, 3] <- levels(r$c)[7]
+    } else {
+      r[i, 3] <- levels(r$c)[ww]
+    }
+  }
+  return(r[, 3])
+}
+#citation_rank(bib, c(9,10), quant)
+#citation_rank(bib, quant)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# blah 0.1.0

		* Added a `NEWS.md` file to track changes to the package.