From 3928386766b5b64f7f5e5dda6664589f3f23dcc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=A2=A6=E5=A2=83=E8=BF=B7=E7=A6=BB?= Date: Mon, 2 May 2022 11:13:30 +0800 Subject: [PATCH] add StringUtils and test --- build.sbt | 1 - .../org/bitlap/csv/core/StringUtils.scala | 28 +++++++++++++++++++ .../core/test/CsvableAndScalableTest.scala | 21 ++++++++++++++ .../csv/core/test/StringUtilsTest.scala | 7 +++++ 4 files changed, 56 insertions(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 3c088de..759e53d 100644 --- a/build.sbt +++ b/build.sbt @@ -1,4 +1,3 @@ -import sbt.librarymanagement.InclExclRule import sbt.{ Def, Test } import sbtrelease.ReleaseStateTransformations._ diff --git a/csv-core/src/main/scala/org/bitlap/csv/core/StringUtils.scala b/csv-core/src/main/scala/org/bitlap/csv/core/StringUtils.scala index 1b693f4..901569f 100644 --- a/csv-core/src/main/scala/org/bitlap/csv/core/StringUtils.scala +++ b/csv-core/src/main/scala/org/bitlap/csv/core/StringUtils.scala @@ -22,6 +22,8 @@ package org.bitlap.csv.core import scala.collection.mutable.ListBuffer +import java.util.regex.Pattern +import scala.util.matching.Regex /** * split csv column value by columnSeparator. @@ -31,6 +33,32 @@ import scala.collection.mutable.ListBuffer */ object StringUtils { + private val regex: Regex = "\\{(.*?)\\}".r + private val kvr: Regex = "(.*):(.*)".r + private val pattern: Pattern = Pattern.compile(regex.toString()) + + def extraJsonPairs(input: String): String = { + val matcher = pattern.matcher(input) + while (matcher.find) { + val tail = matcher.group().tail.init + if (tail != null && tail.nonEmpty) { + return tail + } else return null + } + + null + } + + def extraJsonValues[T <: Product](jsonString: String)(func: (String, String) => T): List[T] = { + val pairs = extraJsonPairs(jsonString) + if (pairs == null) return Nil + val jsonElements = pairs.split(",") + val kvs = jsonElements.collect { + case kvr(k, v) if k.length > 2 && v.length > 2 => k.init.tail -> v.init.tail + } + kvs.toList.map(f => func(f._1, f._2)) + } + def splitColumns(line: String, columnSeparator: Char): List[String] = { val listBuffer = ListBuffer[String]() val columnBuffer = ListBuffer[Char]() diff --git a/csv-core/src/test/scala/org/bitlap/csv/core/test/CsvableAndScalableTest.scala b/csv-core/src/test/scala/org/bitlap/csv/core/test/CsvableAndScalableTest.scala index 25980da..d88b10e 100644 --- a/csv-core/src/test/scala/org/bitlap/csv/core/test/CsvableAndScalableTest.scala +++ b/csv-core/src/test/scala/org/bitlap/csv/core/test/CsvableAndScalableTest.scala @@ -21,6 +21,8 @@ package org.bitlap.csv.core.test +import org.bitlap.csv.core.StringUtils + import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import org.bitlap.csv.core.ScalableBuilder @@ -122,4 +124,23 @@ class CsvableAndScalableTest extends AnyFlatSpec with Matchers { assert(metrics.head.get.dimensions.head.key == "city") assert(metrics.head.get.dimensions.head.value == "北京") } + + "CsvableAndScalable3" should "ok when using StringUtils" in { + val metrics = csvData + .split("\n") + .map(csv => + ScalableBuilder[Metric2] + .setField[Seq[Dimension3]]( + _.dimensions, + dims => StringUtils.extraJsonValues[Dimension3](dims)((k, v) => Dimension3(k, v)) + ) + .build(csv) + .toScala + ) + + println(metrics.toList) + + assert(metrics.head.get.dimensions.head.key == "city") + assert(metrics.head.get.dimensions.head.value == "北京") + } } diff --git a/csv-core/src/test/scala/org/bitlap/csv/core/test/StringUtilsTest.scala b/csv-core/src/test/scala/org/bitlap/csv/core/test/StringUtilsTest.scala index bf580e2..fe02723 100644 --- a/csv-core/src/test/scala/org/bitlap/csv/core/test/StringUtilsTest.scala +++ b/csv-core/src/test/scala/org/bitlap/csv/core/test/StringUtilsTest.scala @@ -37,4 +37,11 @@ class StringUtilsTest extends AnyFlatSpec with Matchers { println(csv) assert(csv.size == 8) } + + "StringUtilsTest2" should "ok" in { + val line = """abc,"{""a"":""b"",""c"":""d""}",d,12,2,false,0.1,0.23333""" + val csv = StringUtils.extraJsonValues[Dimension3](line)((k, v) => Dimension3(k, v)) + println(csv) + assert(csv.toString() == "List(Dimension3(a,b), Dimension3(c,d))") + } } -- GitLab