Skip to content

Commit

Permalink
Adding XLS UI elements for ftp source
Browse files Browse the repository at this point in the history
  • Loading branch information
psainics committed Dec 10, 2023
1 parent e4a511c commit 6477595
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 6 deletions.
20 changes: 17 additions & 3 deletions docs/FTPSource-batchsource.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,29 @@ Properties
**Password:** Password to use for authentication.

**Format:** Format of the data to read.
The format must be one of 'blob', 'csv', 'delimited', 'json', 'text', 'tsv', or the
The format must be one of 'blob', 'csv', 'delimited', 'json', 'text', 'tsv', 'xls', or the
name of any format plugin that you have deployed to your environment. Note that FTP does
not support seeking in a file, so formats like avro and parquet cannot be used.
If the format is a macro, only the formats listed above can be used.
If the format is 'blob', every input file will be read into a separate record.
The 'blob' format also requires a schema that contains a field named 'body' of type 'bytes'.
If the format is 'text', the schema must contain a field named 'body' of type 'string'.

**Get Schema:** Auto-detects schema from file. Supported formats are: csv, delimited, tsv, blob and text.
**Sample Size:** The maximum number of rows that will get investigated for automatic data type detection.
The default value is 1000.

**Override:** A list of columns with the corresponding data types for whom the automatic data type detection gets
skipped.

**Terminate If Empty Row:** Whether to terminate the file reading if an empty row is encountered.
The default value is false.

**Select Sheet Using:** Select the sheet by name or number. Default is 'Sheet Number'.

**Sheet Value:** The name/number of the sheet to read from. If not specified, the first sheet will be read.
Sheet Number are 0 based, ie first sheet is 0.

**Get Schema:** Auto-detects schema from file. Supported formats are: csv, delimited, tsv, xls, blob and text.

Blob - is set by default as field named 'body' of type bytes.

Expand All @@ -47,7 +61,7 @@ JSON - is not supported. You must manually provide the output schema.
**Delimiter:** Delimiter to use when the format is 'delimited'. This will be ignored for other formats.

**Use First Row as Header:** Whether to use the first line of each file as the column headers. Supported formats are '
text', 'csv', 'tsv', and 'delimited'.
text', 'csv', 'tsv', 'xls', and 'delimited'.

**Enable Quoted Values** Whether to treat content between quotes as a value. This value will only be used if the format
is 'csv', 'tsv' or 'delimited'. For example, if this is set to true, a line that looks like `1, "a, b, c"` will output
Expand Down
32 changes: 30 additions & 2 deletions src/main/java/io/cdap/plugin/batch/source/ftp/FTPConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.google.gson.Gson;
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Macro;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.api.plugin.PluginConfig;
import io.cdap.cdap.etl.api.FailureCollector;
Expand Down Expand Up @@ -58,6 +59,9 @@ public class FTPConfig extends PluginConfig implements FileSourceProperties {
private static final Type MAP_STRING_STRING_TYPE = new TypeToken<Map<String, String>>() {
}.getType();
private static final List<String> LOCATION_PROPERTIES = Arrays.asList("type", "host", "path", "user", "password");
private static final String NAME_SHEET = "sheet";
private static final String NAME_SHEET_VALUE = "sheetValue";
private static final String NAME_TERMINATE_IF_EMPTY_ROW = "terminateIfEmptyRow";

@Macro
@Nullable
Expand Down Expand Up @@ -112,13 +116,18 @@ public class FTPConfig extends PluginConfig implements FileSourceProperties {

@Macro
@Nullable
@Description("Whether to use first row as header. Supported formats are 'text', 'csv', 'tsv', " +
@Description("The maximum number of rows that will get investigated for automatic data type detection.")
private Long sampleSize;

@Macro
@Nullable
@Description("Whether to use first row as header. Supported formats are 'text', 'csv', 'tsv', 'xls', " +
"'delimited'. Default value is false.")
private final Boolean skipHeader;

@Macro
@Description("Format of the data to read. Supported formats are 'avro', 'blob', 'csv', 'delimited', 'json', "
+ "'parquet', 'text', or 'tsv'. If no format is given, it will default to 'text'.")
+ "'parquet', 'text', or 'tsv', 'xls'. If no format is given, it will default to 'text'.")
private final String format;

@Macro
Expand Down Expand Up @@ -150,6 +159,25 @@ public class FTPConfig extends PluginConfig implements FileSourceProperties {
@Description("Maximum time in milliseconds to wait for connection initialization before time out.")
private final Integer connectTimeout;

@Name(NAME_SHEET)
@Macro
@Nullable
@Description("Select the sheet by name or number. Default is 'Sheet Number'.")
private String sheet;

@Name(NAME_SHEET_VALUE)
@Macro
@Nullable
@Description("The name/number of the sheet to read from. If not specified, the first sheet will be read." +
"Sheet Number are 0 based, ie first sheet is 0.")
private String sheetValue;

@Name(NAME_TERMINATE_IF_EMPTY_ROW)
@Macro
@Nullable
@Description("Whether to terminate the pipeline if an empty row is encountered. Default is 'false'.")
private String terminateIfEmptyRow;

@VisibleForTesting
private FTPConfig(@Nullable String referenceName, String type, String host, @Nullable Integer port, String path,
String user, String password, @Nullable String fileSystemProperties,
Expand Down
84 changes: 83 additions & 1 deletion widgets/FTPSource-batchsource.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@
{
"label": "tsv",
"value": "tsv"
},
{
"label": "xls",
"value": "xls"
}
]
}
Expand All @@ -96,6 +100,15 @@
"widget-type": "get-schema",
"widget-category": "plugin"
},
{
"widget-type": "number",
"label": "Sample Size",
"name": "sampleSize",
"widget-attributes": {
"default": "1000",
"minimum": "1"
}
},
{
"widget-type": "textbox",
"label": "Delimiter",
Expand Down Expand Up @@ -151,6 +164,42 @@
"label": "False"
}
}
},
{
"widget-type": "toggle",
"label": "Terminate If Empty Row",
"name": "terminateIfEmptyRow",
"widget-attributes": {
"default": "false",
"on": {
"value": "true",
"label": "True"
},
"off": {
"value": "false",
"label": "False"
}
}
},
{
"widget-type": "select",
"label": "Select Sheet Using",
"name": "sheet",
"widget-attributes": {
"values": [
"Sheet Name",
"Sheet Number"
],
"default": "Sheet Number"
}
},
{
"widget-type": "textbox",
"label": "Sheet Value",
"name": "sheetValue",
"widget-attributes": {
"default": "0"
}
}
]
},
Expand Down Expand Up @@ -257,13 +306,46 @@
{
"name": "skipHeader",
"condition": {
"expression": "format == 'delimited' || format == 'csv' || format == 'tsv'"
"expression": "format == 'delimited' || format == 'csv' || format == 'tsv' || format == 'xls'"
},
"show": [
{
"name": "skipHeader"
}
]
},
{
"name": "sheet",
"condition": {
"expression": "format == 'xls'"
},
"show": [
{
"name": "sheet"
}
]
},
{
"name": "sheetValue",
"condition": {
"expression": "format == 'xls'"
},
"show": [
{
"name": "sheetValue"
}
]
},
{
"name": "terminateIfEmptyRow",
"condition": {
"expression": "format == 'xls'"
},
"show": [
{
"name": "terminateIfEmptyRow"
}
]
}
],
"jump-config": {
Expand Down

0 comments on commit 6477595

Please sign in to comment.