-
Notifications
You must be signed in to change notification settings - Fork 20
/
cols
executable file
·34 lines (30 loc) · 1.05 KB
/
cols
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/usr/bin/env bash
# cols: apply a command to a subset of the columns and merge back with the remaining columns.
#
# Assumes that the input data is comma-delimited and that it has a header.
# Depends on csvcut, which is part of csvkit: http://csvkit.readthedocs.org
#
# Example usage 1: reverse sort column 'a'
# $ echo 'a,b\n1,2\n3,4\n5,6' | cols -c a body sort -nr
#
# Example usage 2: apply PCA (using tapkee) to all numerical features (-C selects all but the specified columns) of the Iris data set:
# $ < iris.csv cols -C species body tapkee --method pca | header -r x,y,species
#
# See also: header and body (https://github.com/jeroenjanssens/command-line-tools-for-data-science)
#
# Author: http://jeroenjanssens.com
ARG="$1"
ARG_INV="$(tr cC Cc <<< ${ARG})"
shift
COLUMNS="$1"
shift
EXPR="$@"
finish() {
rm -f $OTHER_COLUMNS
}
trap finish EXIT
if [ -z "$TMPDIR" ]; then
TMPDIR=/tmp
fi
OTHER_COLUMNS=$(mktemp ${TMPDIR}/cols-XXXXXXXX)
tee $OTHER_COLUMNS | csvcut $ARG "$COLUMNS" | eval ${EXPR} | paste -d, - <(csvcut ${ARG_INV} "$COLUMNS" $OTHER_COLUMNS)