Skip to content

Commit

Permalink
csvparser: add filterx-func-format-csv function
Browse files Browse the repository at this point in the history
Signed-off-by: shifter <shifter@axoflow.com>
  • Loading branch information
bshifter committed May 31, 2024
1 parent 26ea041 commit 424d765
Show file tree
Hide file tree
Showing 5 changed files with 302 additions and 1 deletion.
2 changes: 2 additions & 0 deletions modules/csvparser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ set(CSVPARSER_SOURCES
csvparser-plugin.c
filterx-func-parse-csv.h
filterx-func-parse-csv.c
filterx-func-format-csv.h
filterx-func-format-csv.c
)

add_module(
Expand Down
4 changes: 3 additions & 1 deletion modules/csvparser/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ modules_csvparser_libcsvparser_la_SOURCES = \
modules/csvparser/csvparser-parser.h \
modules/csvparser/csvparser-plugin.c \
modules/csvparser/filterx-func-parse-csv.h \
modules/csvparser/filterx-func-parse-csv.c
modules/csvparser/filterx-func-parse-csv.c \
modules/csvparser/filterx-func-format-csv.h \
modules/csvparser/filterx-func-format-csv.c

modules_csvparser_libcsvparser_la_CPPFLAGS = \
$(AM_CPPFLAGS) \
Expand Down
6 changes: 6 additions & 0 deletions modules/csvparser/csvparser-plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "plugin.h"
#include "plugin-types.h"
#include "filterx-func-parse-csv.h"
#include "filterx-func-format-csv.h"

extern CfgParser csvparser_parser;

Expand All @@ -41,6 +42,11 @@ static Plugin csvparser_plugins[] =
.name = "parse_csv",
.construct = filterx_function_construct_parse_csv,
},
{
.type = LL_CONTEXT_FILTERX_FUNC,
.name = "format_csv",
.construct = filterx_function_construct_format_csv,
},
};

gboolean
Expand Down
255 changes: 255 additions & 0 deletions modules/csvparser/filterx-func-format-csv.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
/*
* Copyright (c) 2024 shifter
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* As an additional exemption you are allowed to compile & link against the
* OpenSSL libraries as published by the OpenSSL project. See the file
* COPYING for details.
*
*/

#include "filterx-func-format-csv.h"
#include "filterx/expr-literal.h"
#include "filterx/object-string.h"
#include "filterx/object-null.h"
#include "filterx/object-dict-interface.h"
#include "filterx/object-list-interface.h"
#include "filterx/filterx-eval.h"

#include "scratch-buffers.h"
#include "utf8utils.h"

#define FILTERX_FUNC_FORMAT_CSV_USAGE "Usage: format_csv(csv_dict or csv_array)"

typedef struct FilterXFunctionFormatCSV_
{
FilterXFunction super;
FilterXExpr *input;
gchar delimiter;
FilterXExpr *columns;
} FilterXFunctionFormatCSV;

static gboolean
_append_to_buffer(FilterXObject *key, FilterXObject *value, gpointer user_data)
{
if (!value)
return FALSE;

FilterXFunctionFormatCSV *self = ((gpointer *) user_data)[0];
GString *buffer = ((gpointer *) user_data)[1];

if (filterx_object_is_type(value, &FILTERX_TYPE_NAME(dict)) ||
filterx_object_is_type(value, &FILTERX_TYPE_NAME(list)))
{
msg_debug("FilterX: format_csv(): skipping object, type not supported",
evt_tag_str("type", value->type->name));
return TRUE;
}

if (buffer->len)
g_string_append(buffer, &self->delimiter);

gsize len_before_value = buffer->len;
if (!filterx_object_repr_append(value, buffer))
return FALSE;

/* TODO: make the characters here configurable. */
if (memchr(buffer->str + len_before_value, self->delimiter, buffer->len - len_before_value) != NULL)
{
ScratchBuffersMarker marker;
GString *value_buffer = scratch_buffers_alloc_and_mark(&marker);

g_string_assign(value_buffer, buffer->str + len_before_value);
g_string_truncate(buffer, len_before_value);
g_string_append_c(buffer, '"');
append_unsafe_utf8_as_escaped_binary(buffer, value_buffer->str, value_buffer->len, "\"");
g_string_append_c(buffer, '"');

scratch_buffers_reclaim_marked(marker);
}

return TRUE;
}

static FilterXObject *
_eval(FilterXExpr *s)
{
FilterXFunctionFormatCSV *self = (FilterXFunctionFormatCSV *) s;

FilterXObject *csv_data = filterx_expr_eval_typed(self->input);
if (!csv_data)
{
filterx_eval_push_error("Failed to evaluate input. " FILTERX_FUNC_FORMAT_CSV_USAGE, s, NULL);
return NULL;
}

gboolean success = FALSE;
GString *formatted = scratch_buffers_alloc();

if (filterx_object_is_type(csv_data, &FILTERX_TYPE_NAME(list)))
{
guint64 size;
if (!filterx_object_len(csv_data, &size))
return FALSE;

gpointer user_data[] = { self, formatted };
success = TRUE;
for (guint64 i = 0; i < size && success; i++)
{
FilterXObject *elt = filterx_list_get_subscript(csv_data, i);
success = _append_to_buffer(NULL, elt, user_data);
filterx_object_unref(elt);
}
}
else if (filterx_object_is_type(csv_data, &FILTERX_TYPE_NAME(dict)))
{
if (self->columns)
{
FilterXObject *cols = filterx_expr_eval(self->columns);
if (filterx_object_is_type(cols, &FILTERX_TYPE_NAME(list)))
{
guint64 size;
if (!filterx_object_len(cols, &size))
return FALSE;

gpointer user_data[] = { self, formatted };
success = TRUE;
for (guint64 i = 0; i < size && success; i++)
{
FilterXObject *col = filterx_list_get_subscript(cols, i);
FilterXObject *elt = filterx_object_get_subscript(csv_data, col);
success = _append_to_buffer(col, elt, user_data);
filterx_object_unref(col);
filterx_object_unref(elt);
}
}
filterx_object_unref(cols);
}
else
{
gpointer user_data[] = { self, formatted };
success = filterx_dict_iter(csv_data, _append_to_buffer, user_data);
}
}
else
{
filterx_eval_push_error("input must be a dict or list. " FILTERX_FUNC_FORMAT_CSV_USAGE, s, csv_data);
filterx_object_unref(csv_data);
return NULL;
}

filterx_object_unref(csv_data);
return success ? filterx_string_new(formatted->str, formatted->len) : NULL;
}

static void
_free(FilterXExpr *s)
{
FilterXFunctionFormatCSV *self = (FilterXFunctionFormatCSV *) s;

filterx_expr_unref(self->input);
filterx_expr_unref(self->columns);
filterx_function_free_method(&self->super);
}

static FilterXExpr *
_extract_columns_expr(FilterXFunctionArgs *args, GError **error)
{
return filterx_function_args_get_named_expr(args, FILTERX_FUNC_FORMAT_CSV_ARG_NAME_COLUMNS);
}

static gboolean
_extract_delimiter_arg(FilterXFunctionFormatCSV *self, FilterXFunctionArgs *args, GError **error)
{
gboolean exists;
gsize delimiter_len;
const gchar *delimiter = filterx_function_args_get_named_literal_string(args,
FILTERX_FUNC_FORMAT_CSV_ARG_NAME_DELIMITER,
&delimiter_len, &exists);
if (!exists)
return TRUE;

if (!delimiter)
{
g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL,
"delimiter must be a string literal. " FILTERX_FUNC_FORMAT_CSV_USAGE);
return FALSE;
}

if (delimiter_len != 1)
{
g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL,
"delimiter must be a single character. " FILTERX_FUNC_FORMAT_CSV_USAGE);
return FALSE;
}

self->delimiter = delimiter[0];
return TRUE;
}

static gboolean
_extract_arguments(FilterXFunctionFormatCSV *self, FilterXFunctionArgs *args, GError **error)
{
gsize args_len = filterx_function_args_len(args);
if (args_len != 1)
{
g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL,
"invalid number of arguments. " FILTERX_FUNC_FORMAT_CSV_USAGE);
return FALSE;
}

self->input = filterx_function_args_get_expr(args, 0);
if (!self->input)
{
g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL,
"input must be set. " FILTERX_FUNC_FORMAT_CSV_USAGE);
return FALSE;
}

if (!_extract_delimiter_arg(self, args, error))
return FALSE;

self->columns = _extract_columns_expr(args, error);

return TRUE;
}

FilterXFunction *
filterx_function_format_csv_new(const gchar *function_name, FilterXFunctionArgs *args, GError **error)
{
FilterXFunctionFormatCSV *self = g_new0(FilterXFunctionFormatCSV, 1);
filterx_function_init_instance(&self->super, function_name);

self->super.super.eval = _eval;
self->super.super.free_fn = _free;
self->delimiter = ',';

if (!_extract_arguments(self, args, error))
goto error;

filterx_function_args_free(args);
return &self->super;

error:
filterx_function_args_free(args);
filterx_expr_unref(&self->super.super);
return NULL;
}

gpointer
filterx_function_construct_format_csv(Plugin *self)
{
return (gpointer) filterx_function_format_csv_new;
}
36 changes: 36 additions & 0 deletions modules/csvparser/filterx-func-format-csv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright (c) 2024 shifter
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* As an additional exemption you are allowed to compile & link against the
* OpenSSL libraries as published by the OpenSSL project. See the file
* COPYING for details.
*
*/

#ifndef FILTERX_FUNC_FORMAT_CSV_H_INCLUDED
#define FILTERX_FUNC_FORMAT_CSV_H_INCLUDED


#include "plugin.h"
#include "filterx/expr-function.h"

#define FILTERX_FUNC_FORMAT_CSV_ARG_NAME_COLUMNS "columns"
#define FILTERX_FUNC_FORMAT_CSV_ARG_NAME_DELIMITER "delimiter"

FilterXFunction *filterx_function_format_csv_new(const gchar *function_name, FilterXFunctionArgs *args, GError **error);
gpointer filterx_function_construct_format_csv(Plugin *self);

#endif

0 comments on commit 424d765

Please sign in to comment.