#10702 Allow to customize column's minimal length and samples count

Former-commit-id: 9e62680f

#10702 Allow to customize column's minimal length and samples count
Former-commit-id: 9e62680f
e2d80d97 · ShadelessFox · 7ae502c9 · e2d80d97 · e2d80d97 · e2d80d97
5 changed file
--- a/plugins/org.jkiss.dbeaver.data.transfer/OSGI-INF/l10n/bundle.properties
+++ b/plugins/org.jkiss.dbeaver.data.transfer/OSGI-INF/l10n/bundle.properties
@@ -150,6 +150,10 @@ dataTransfer.producer.stream.processor.csv.property.timestampFormat.name = Date/
 dataTransfer.producer.stream.processor.csv.property.timestampFormat.description = Date/time format pattern. Use this to clarify the date format in CSV file, not to change output data.\nSearch for 'java DateTimeFormatter' for format details.
 dataTransfer.producer.stream.processor.csv.property.timestampZone.name = Timezone ID
 dataTransfer.producer.stream.processor.csv.property.timestampZone.description = Timezone ID. By default local machine timezone is used.\n3 ways to specify zone:\n\t-Local zone offset (+3, -04:30)\n\t-Specific zone offset (GMT+2, UTC+01:00)\n\t-Region based (UTC, ECT, PST, etc)
+dataTransfer.producer.stream.processor.csv.property.columnTypeSamplesCount.name = Column samples count
+dataTransfer.producer.stream.processor.csv.property.columnTypeSamplesCount.description = Amount of samples for guessing length and type of imported data.
+dataTransfer.producer.stream.processor.csv.property.columnTypeMinimalLength.name = Column minimal length
+dataTransfer.producer.stream.processor.csv.property.columnTypeMinimalLength.description = Minimal length of column.


 task.category.name.common = Common

--- a/plugins/org.jkiss.dbeaver.data.transfer/OSGI-INF/l10n/bundle_ru.properties
+++ b/plugins/org.jkiss.dbeaver.data.transfer/OSGI-INF/l10n/bundle_ru.properties
@@ -59,6 +59,10 @@ dataTransfer.producer.stream.processor.csv.property.delimiter.description = \u04
 dataTransfer.producer.stream.processor.csv.property.delimiter.name = \u0420\u0430\u0437\u0434\u0435\u043B\u0438\u0442\u0435\u043B\u044C \u0441\u0442\u043E\u043B\u0431\u0446\u043E\u0432
 dataTransfer.producer.stream.processor.csv.property.encoding.label = \u041A\u043E\u0434\u0438\u0440\u043E\u0432\u043A\u0430
 dataTransfer.producer.stream.processor.csv.property.extension.label = \u0420\u0430\u0441\u0448\u0438\u0440\u0435\u043D\u0438\u0435
+dataTransfer.producer.stream.processor.csv.property.columnTypeSamplesCount.name = \u041A\u043E\u043B\u0438\u0447\u0435\u0441\u0442\u0432\u043E \u043F\u0440\u043E\u0431 \u043A\u043E\u043B\u043E\u043D\u043A\u0438
+dataTransfer.producer.stream.processor.csv.property.columnTypeSamplesCount.description = \u0423\u0441\u0442\u0430\u043D\u0430\u0432\u043B\u0438\u0432\u0430\u0435\u0442 \u043C\u0430\u043A\u0441\u0438\u043C\u0430\u043B\u044C\u043D\u043E\u0435 \u043A\u043E\u043B\u0438\u0447\u0435\u0441\u0442\u0432\u043E \u043F\u0440\u043E\u0431, \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u0435\u043C\u044B\u0445 \u0434\u043B\u044F \u0443\u0433\u0430\u0434\u044B\u0432\u0430\u043D\u0438\u044F \u0434\u043B\u0438\u043D\u044B \u0438 \u0442\u0438\u043F\u0430 \u0438\u043C\u043F\u043E\u0440\u0442\u0438\u0440\u0443\u0435\u043C\u044B\u0445 \u0434\u0430\u043D\u043D\u044B\u0445.
+dataTransfer.producer.stream.processor.csv.property.columnTypeMinimalLength.name = \u041C\u0438\u043D\u0438\u043C\u0430\u043B\u044C\u043D\u0430\u044F \u0434\u043B\u0438\u043D\u0430 \u043A\u043E\u043B\u043E\u043D\u043A\u0438
+dataTransfer.producer.stream.processor.csv.property.columnTypeMinimalLength.description = \u0423\u0441\u0442\u0430\u043D\u0430\u0432\u043B\u0438\u0432\u0430\u0435\u0442 \u043C\u0438\u043D\u0438\u043C\u0430\u043B\u044C\u043D\u0443\u044E \u0434\u043B\u0438\u043D\u0443 \u043A\u043E\u043B\u043E\u043D\u043A\u0438.
 dataTransfer.processor.json.property.printTableName.label = \u041D\u0430\u043F\u0435\u0447\u0430\u0442\u0430\u0442\u044C \u0438\u043C\u044F \u0442\u0430\u0431\u043B\u0438\u0446\u044B
 dataTransfer.processor.json.property.formatDateISO.label = \u0424\u043E\u0440\u043C\u0430\u0442 \u0434\u0430\u0442\u044B \u0432 ISO 8601
 dataTransfer.processor.json.property.extension.label = \u0420\u0430\u0441\u0448\u0438\u0440\u0435\u043D\u0438\u0435 \u0444\u0430\u0439\u043B\u0430

--- a/plugins/org.jkiss.dbeaver.data.transfer/plugin.xml
+++ b/plugins/org.jkiss.dbeaver.data.transfer/plugin.xml
@@ -52,6 +52,8 @@
                    <property id="emptyStringNull" label="%dataTransfer.producer.stream.processor.csv.property.emptyStringNull.name" type="boolean" description="%dataTransfer.producer.stream.processor.csv.property.emptyStringNull.description" defaultValue="" required="false"/>
                    <property id="timestampFormat" label="%dataTransfer.producer.stream.processor.csv.property.timestampFormat.name" type="string" description="%dataTransfer.producer.stream.processor.csv.property.timestampFormat.description" defaultValue="yyyy-MM-dd[ HH:mm:ss[.SSS]]" required="false"/>
                    <property id="timestampZone" label="%dataTransfer.producer.stream.processor.csv.property.timestampZone.name" type="string" description="%dataTransfer.producer.stream.processor.csv.property.timestampZone.description" defaultValue="" required="false"/>
+                    <property id="columnTypeSamplesCount" label="%dataTransfer.producer.stream.processor.csv.property.columnTypeSamplesCount.name" type="integer" description="%dataTransfer.producer.stream.processor.csv.property.columnTypeSamplesCount.description" defaultValue="1000" required="false"/>
+                    <property id="columnTypeMinimalLength" label="%dataTransfer.producer.stream.processor.csv.property.columnTypeMinimalLength.name" type="integer" description="%dataTransfer.producer.stream.processor.csv.property.columnTypeMinimalLength.description" defaultValue="1" required="false"/>
                </propertyGroup>
            </processor>
        </node>

--- a/plugins/org.jkiss.dbeaver.data.transfer/src/org/jkiss/dbeaver/tools/transfer/stream/importer/DataImporterCSV.java
+++ b/plugins/org.jkiss.dbeaver.data.transfer/src/org/jkiss/dbeaver/tools/transfer/stream/importer/DataImporterCSV.java
@@ -54,11 +54,6 @@ public class DataImporterCSV extends StreamImporterAbstract {
    private static final String PROP_EMPTY_STRING_NULL = "emptyStringNull";
    private static final String PROP_ESCAPE_CHAR = "escapeChar";

-    // Default length for new column. This is a "lower" bound, so sample data could be longer than this threshold
-    private static final int DEFAULT_COLUMN_LENGTH = 1024;
-    // Amount of sample rows used to determine approximate type and data length of the column
-    private static final int MAX_COLUMN_SAMPLES = 1000;
-
    public enum HeaderPosition {
        none,
        top,
@@ -74,6 +69,9 @@ public class DataImporterCSV extends StreamImporterAbstract {
        Map<String, Object> processorProperties = getSite().getProcessorProperties();
        HeaderPosition headerPosition = getHeaderPosition(processorProperties);

+        final int columnSamplesCount = Math.max(CommonUtils.toInt(processorProperties.get(PROP_COLUMN_TYPE_SAMPLES), 1000), 0);
+        final int columnMinimalLength = Math.max(CommonUtils.toInt(processorProperties.get(PROP_COLUMN_TYPE_LENGTH), 1), 1);
+
        try (Reader reader = openStreamReader(inputStream, processorProperties)) {
            try (CSVReader csvReader = openCSVReader(reader, processorProperties)) {
                String[] header = getNextLine(csvReader);
@@ -89,12 +87,12 @@ public class DataImporterCSV extends StreamImporterAbstract {
                    if (CommonUtils.isEmptyTrimmed(column)) {
                        column = "Column" + (i + 1);
                    }
-                    StreamDataImporterColumnInfo columnInfo = new StreamDataImporterColumnInfo(entityMapping, i, column, null, DEFAULT_COLUMN_LENGTH, DBPDataKind.UNKNOWN);
+                    StreamDataImporterColumnInfo columnInfo = new StreamDataImporterColumnInfo(entityMapping, i, column, null, columnMinimalLength, DBPDataKind.UNKNOWN);
                    columnInfo.setMappingMetadataPresent(headerPosition != HeaderPosition.none);
                    columnsInfo.add(columnInfo);
                }

-                for (int sample = 0; sample < MAX_COLUMN_SAMPLES; sample++) {
+                for (int sample = 0; sample < columnSamplesCount; sample++) {
                    String[] line;

                    if (sample == 0 && headerPosition == HeaderPosition.none) {

--- a/plugins/org.jkiss.dbeaver.data.transfer/src/org/jkiss/dbeaver/tools/transfer/stream/importer/StreamImporterAbstract.java
+++ b/plugins/org.jkiss.dbeaver.data.transfer/src/org/jkiss/dbeaver/tools/transfer/stream/importer/StreamImporterAbstract.java
@@ -44,6 +44,8 @@ public abstract class StreamImporterAbstract implements IStreamDataImporter {

    protected static final String PROP_TIMESTAMP_FORMAT = "timestampFormat";
    protected static final String PROP_TIMESTAMP_ZONE = "timestampZone";
+    protected static final String PROP_COLUMN_TYPE_SAMPLES = "columnTypeSamplesCount";
+    protected static final String PROP_COLUMN_TYPE_LENGTH = "columnTypeMinimalLength";

    private IStreamDataImporterSite site;