From 501cd665e78b9036046df7af995b2bec80484b36 Mon Sep 17 00:00:00 2001 From: Paul Wessel Date: Mon, 21 Aug 2023 14:02:21 +0200 Subject: [PATCH] Augment gmtspatial with -N+i (#7747) * Augment gmtspatial with -N+i The -N+i setting will treat each input point individually and determine which polygon it falls into (if any). If a polygon ID is found then we write that point's full record out and add a final column with the ID. * Check that -N+i has no other modifiers --- doc/rst/source/gmtspatial.rst | 13 ++++-- src/gmtspatial.c | 79 ++++++++++++++++++++++++++++------- 2 files changed, 73 insertions(+), 19 deletions(-) diff --git a/doc/rst/source/gmtspatial.rst b/doc/rst/source/gmtspatial.rst index 6adf9977d94..735085ab22a 100644 --- a/doc/rst/source/gmtspatial.rst +++ b/doc/rst/source/gmtspatial.rst @@ -20,7 +20,7 @@ Synopsis [ |-F|\ [**l**] ] [ |-I|\ [**e**\|\ **i**] ] [ |-L|\ *dist*\ /*noise*\ /*offset* ] -[ |-N|\ *pfile*\ [**+a**][**+p**\ *start*][**+r**][**+z**] ] +[ |-N|\ *pfile*\ [**+a**][**+i**][**+p**\ *start*][**+r**][**+z**] ] [ |-Q|\ [*unit*][**+c**\ *min*\ [/*max*]][**+h**][**+l**][**+p**][**+s**\ [**a**\|\ **d**]] ] [ |SYN_OPT-R| ] [ |-S|\ **b**\ *width*\|\ **h**\|\ **i**\|\ **u**\|\ **s**\|\ **j** ] @@ -143,7 +143,7 @@ Optional Arguments .. _-N: -**-N**\ *pfile*\ [**+a**][**+p**\ *start*][**+r**][**+z**] +**-N**\ *pfile*\ [**+a**][**+i**][**+p**\ *start*][**+r**][**+z**] Determine if one (or all, with **+a**) points of each feature in the input data are inside any of the polygons given in the *pfile*. If inside, then report which polygon it is; the polygon ID is either @@ -156,7 +156,9 @@ Optional Arguments polygon contains a feature or **+z** to have the IDs added as an extra data column on output. Segments that fail to be inside a polygon are not written out. If more than one polygon contains the - same segment we skip the second (and further) scenarios. + same segment we skip the second (and further) scenarios. Alternatively, + just use modifier **+i** and we will determine the polygon ID for + every **i**\ ndividual input point and add it as the last column. .. _-Q: @@ -282,6 +284,11 @@ run:: gmt spatial lines.txt -F > polygons.txt +To append the polygon ID of every individual point in cloud.txt that is inside the +polygons in the file poly.txt and write that ID as the last column per output row, run:: + + gmt spatial cloud.txt -Npoly.txt+i > cloud_IDs.txt + To compute the area of all geographic polygons in the multisegment file polygons.txt, run:: diff --git a/src/gmtspatial.c b/src/gmtspatial.c index 9d50b90bcd0..5ac3b615bd2 100644 --- a/src/gmtspatial.c +++ b/src/gmtspatial.c @@ -41,6 +41,8 @@ #define GMT_N_MODE_NOTSET 0 #define GMT_N_MODE_REPORT 1 #define GMT_N_MODE_ADD_ID 2 +#define GMT_N_MODE_CLOUD 3 + #define GMT_W 3 #define POL_UNION 1 @@ -764,7 +766,7 @@ static int usage (struct GMTAPI_CTRL *API, int level) { const char *name = gmt_show_name_and_purpose (API, THIS_MODULE_LIB, THIS_MODULE_CLASSIC_NAME, THIS_MODULE_PURPOSE); if (level == GMT_MODULE_PURPOSE) return (GMT_NOERROR); GMT_Usage (API, 0, "usage: %s [] [-A[a]] [-C] [-D[+a][+c|C][+d][+f][+p][+s]] [-E+n|p] " - "[-F[l]] [-I[i|e]] [-L%s//] [-N[+a][+p][+r][+z]] [-Q[][+c[/]][+h][+l][+p][+s[a|d]]] [%s] " + "[-F[l]] [-I[i|e]] [-L%s//] [-N[+a][+i][+p][+r][+z]] [-Q[][+c[/]][+h][+l][+p][+s[a|d]]] [%s] " "[-Sb|h|i|j|s|u] [-T[]] [-W[][+f|l]] [%s] [%s] [%s] [%s] [%s] [%s] [%s] [%s] [%s] [%s] [%s] [%s] [%s] [%s] [%s]\n", name, GMT_DIST_OPT, GMT_Rgeo_OPT, GMT_V_OPT, GMT_a_OPT, GMT_b_OPT, GMT_d_OPT, GMT_e_OPT, GMT_f_OPT, GMT_g_OPT, GMT_h_OPT, GMT_i_OPT, GMT_j_OPT, GMT_o_OPT, GMT_q_OPT, GMT_s_OPT, GMT_colon_OPT, GMT_PAR_OPT); @@ -808,7 +810,7 @@ static int usage (struct GMTAPI_CTRL *API, int level) { GMT_Usage (API, 1, "\n-L%s//", GMT_DIST_OPT); GMT_Usage (API, -2, "Remove tile Lines. These are superfluous lines along the -R border. " "Append (in m) [0], coordinate noise [1e-10], and max offset from gridline [1e-10]."); - GMT_Usage (API, 1, "\n-N[+a][+p][+r][+z]"); + GMT_Usage (API, 1, "\n-N[+a][+i][+p][+r][+z]"); GMT_Usage (API, -2, "Determine ID of polygon (in ) enclosing each input feature. The ID is set as follows:"); GMT_Usage (API, 3, "%s If OGR/GMT polygons, get polygon ID via -a for Z column, else", GMT_LINE_BULLET); GMT_Usage (API, 3, "%s Interpret segment labels (-Z) as polygon IDs, else", GMT_LINE_BULLET); @@ -816,6 +818,7 @@ static int usage (struct GMTAPI_CTRL *API, int level) { GMT_Usage (API, 3, "%s Append +p to set origin for auto-incrementing polygon IDs [0].", GMT_LINE_BULLET); GMT_Usage (API, -2, "Additional modifiers are available:"); GMT_Usage (API, 3, "+a All points of a feature (line, polygon) must be inside the ID polygon [any point]."); + GMT_Usage (API, 3, "+i Determine ID polygon of all individual input points, add ID to record and output."); GMT_Usage (API, 3, "+r No table output; just report which polygon a feature is inside."); GMT_Usage (API, 3, "+z Append the ID as a new output data column [Default adds -Z to segment header]."); GMT_Usage (API, 1, "\n-Q[][+c[/]][+h][+l][+p][+s[a|d]]"); @@ -866,9 +869,11 @@ static int parse (struct GMT_CTRL *GMT, struct GMTSPATIAL_CTRL *Ctrl, struct GMT * returned when registering these sources/destinations with the API. */ - unsigned int pos, n_errors = 0; + bool got_i = false; + unsigned int pos, n_errors = 0, got_n = 0; int n; - char txt_a[GMT_LEN64] = {""}, txt_b[GMT_LEN64] = {""}, txt_c[GMT_LEN64] = {""}, p[GMT_LEN256] = {""}, *s = NULL, *c = NULL; + char txt_a[GMT_LEN64] = {""}, txt_b[GMT_LEN64] = {""}, txt_c[GMT_LEN64] = {""}, p[GMT_LEN256] = {""}; + char *s = NULL, *c = NULL, *q = NULL; struct GMT_OPTION *opt = NULL; struct GMTAPI_CTRL *API = GMT->parent; @@ -976,25 +981,35 @@ static int parse (struct GMT_CTRL *GMT, struct GMTSPATIAL_CTRL *Ctrl, struct GMT break; case 'N': /* Determine containing polygons for features */ n_errors += gmt_M_repeated_module_option (API, Ctrl->N.active); + if (gmt_validate_modifiers (GMT, opt->arg, 'N', "aiprz", GMT_MSG_ERROR)) n_errors++; if ((s = strchr (opt->arg, '+')) == NULL) { /* No modifiers */ Ctrl->N.file = strdup (opt->arg); continue; } - s[0] = '\0'; Ctrl->N.file = strdup (opt->arg); s[0] = '+'; - pos = 0; - while (gmt_strtok (s, "+", &pos, p)) { - switch (p[0]) { + else { /* Hide modifiers until we duplicate the polygon name */ + s[0] = '\0'; + Ctrl->N.file = strdup (opt->arg); + s[0] = '+'; + } + q = gmt_first_modifier (GMT, opt->arg, "aiprz"); + pos = 0; txt_a[0] = 0; + while (gmt_getmodopt (GMT, 'N', q, "aiprz", &pos, txt_a, &n_errors) && n_errors == 0) { + switch (txt_a[0]) { case 'a': /* All points must be inside polygon */ - Ctrl->N.all = true; + Ctrl->N.all = true; got_n++; + break; + case 'i': /* add polygon ID for individual input points */ + Ctrl->N.mode = GMT_N_MODE_CLOUD; + got_i = true; got_n++; break; case 'p': /* Set start of running numbers [0] */ - Ctrl->N.ID = (p[1]) ? atoi (&p[1]) : 1; + Ctrl->N.ID = (txt_a[1]) ? atoi (&txt_a[1]) : 1; got_n++; break; case 'r': /* Just give a report */ - Ctrl->N.mode = GMT_N_MODE_REPORT; + Ctrl->N.mode = GMT_N_MODE_REPORT; got_n++; break; case 'z': /* Add polygon ID as final column */ - Ctrl->N.mode = GMT_N_MODE_ADD_ID; + Ctrl->N.mode = GMT_N_MODE_ADD_ID; got_n++; break; } } @@ -1148,6 +1163,7 @@ static int parse (struct GMT_CTRL *GMT, struct GMTSPATIAL_CTRL *Ctrl, struct GMT n_errors += gmt_M_check_condition (GMT, Ctrl->L.active && Ctrl->L.s_cutoff < 0.0, "Option -L requires a positive cutoff in meters\n"); n_errors += gmt_M_check_condition (GMT, Ctrl->D.active && Ctrl->D.file && gmt_access (GMT, Ctrl->D.file, R_OK), "Option -D: Cannot read file %s!\n", Ctrl->D.file); n_errors += gmt_M_check_condition (GMT, Ctrl->T.active && Ctrl->T.file && gmt_access (GMT, Ctrl->T.file, R_OK), "Option -T: Cannot read file %s!\n", Ctrl->T.file); + n_errors += gmt_M_check_condition (GMT, got_i && got_n > 1, "Option -N: Cannot combine +i with other modifiers\n"); return (n_errors ? GMT_PARSE_ERROR : GMT_NOERROR); } @@ -1967,10 +1983,12 @@ EXTERN_MSC int GMT_gmtspatial (void *V_API, int mode, void *args) { if (Ctrl->N.active) { /* Report the polygons that contain the given features */ bool check_next; - uint64_t tbl, row, first, last, n, p, np, seg, seg2, n_inside; + uint64_t tbl, row, col, n, p, np, seg, seg2, n_inside; + int64_t kk; unsigned int *count = NULL, nmode; int ID = -1; - char seg_label[GMT_LEN64] = {""}, record[GMT_BUFSIZ] = {""}, *kind[2] = {"%%d points", "All points"}; + char seg_label[GMT_LEN64] = {""}, record[GMT_BUFSIZ] = {""}, *used = NULL; + double *out = NULL; struct GMT_DATASET *C = NULL; struct GMT_DATATABLE *T = NULL; struct GMT_DATASEGMENT *S = NULL, *S2 = NULL; @@ -1985,7 +2003,18 @@ EXTERN_MSC int GMT_gmtspatial (void *V_API, int mode, void *args) { Return (GMT_DIM_TOO_SMALL); } gmt_reenable_bghio_opts (GMT); /* Recover settings provided by user (if -b -g -h -i were used at all) */ - nmode = (Ctrl->N.mode == GMT_N_MODE_REPORT) ? GMT_IS_NONE : GMT_IS_LINE; + if (Ctrl->N.mode == GMT_N_MODE_CLOUD) { + nmode = GMT_IS_POINT; + used = gmt_M_memory (GMT, NULL, D->n_records, char); + out = gmt_M_memory (GMT, NULL, D->n_columns + 1, double); + Out.data = out; Out.text = NULL; + if ((error = GMT_Set_Columns (GMT->parent, GMT_OUT, D->n_columns + 1, GMT_COL_FIX_NO_TEXT)) != GMT_NOERROR) + Return (GMT_RUNTIME_ERROR); + } + else { + nmode = (Ctrl->N.mode == GMT_N_MODE_REPORT) ? GMT_IS_NONE : GMT_IS_LINE; + Out.text = record; + } if (GMT_Init_IO (API, GMT_IS_DATASET, nmode, GMT_OUT, GMT_ADD_DEFAULT, 0, options) != GMT_NOERROR) { /* Registers default output destination, unless already set */ Return (API->error); } @@ -2000,7 +2029,6 @@ EXTERN_MSC int GMT_gmtspatial (void *V_API, int mode, void *args) { T = C->table[0]; /* Only one input file so only one table */ count = gmt_M_memory (GMT, NULL, D->n_segments, unsigned int); - Out.text = record; for (seg2 = 0; seg2 < T->n_segments; seg2++) { /* For all polygons */ S2 = T->segment[seg2]; SH = gmt_get_DS_hidden (S2); @@ -2019,10 +2047,25 @@ EXTERN_MSC int GMT_gmtspatial (void *V_API, int mode, void *args) { else /* Increment running polygon ID */ ID++; + kk = -1; /* Start at first point when first incremented (for -N+c) */ for (tbl = p = 0; tbl < D->n_tables; tbl++) { for (seg = 0; seg < D->table[tbl]->n_segments; seg++, p++) { S = D->table[tbl]->segment[seg]; if (S->n_rows == 0) continue; + if (Ctrl->N.mode == GMT_N_MODE_CLOUD) { /* Find polygon containing this point */ + for (row = 0; row < S->n_rows; row++) { /* Check all points if they are inside */ + kk++; + if (used[kk]) continue; /* Already found it is inside another polygon */ + if (gmt_inonout (GMT, S->data[GMT_X][row], S->data[GMT_Y][row], S2) == GMT_INSIDE) { + used[kk] = 1; /* Flag it is found */ + for (col = 0; col < S->n_columns; col++) out[col] = S->data[col][row]; /* Copy over this row */ + out[col] = (double)ID; + GMT_Put_Record (API, GMT_WRITE_DATA, &Out); + } + } + continue; /* All done for this point & polygon combination */ + } + for (row = n = 0, check_next = true; check_next && row < S->n_rows; row++) { /* Check one or all points if they are inside */ if (Ctrl->N.all && n < row) /* At least one point has been found outside so with +a we stop checking for more */ check_next = false; @@ -2090,6 +2133,10 @@ EXTERN_MSC int GMT_gmtspatial (void *V_API, int mode, void *args) { Return (API->error); } gmt_M_free (GMT, count); + if (Ctrl->N.mode == GMT_N_MODE_CLOUD) { + gmt_M_free (GMT, used); + gmt_M_free (GMT, out); + } Return (GMT_NOERROR); } if (Ctrl->S.active && Ctrl->S.mode == POL_SPLIT) { /* Split polygons at dateline */