diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index e39296525d0f0fd1b6945a4cdd3cc5c9c4889b50..cc2d187558c8380cf4eaeac744183cc9ef5cbbfd 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1,7 +1,9 @@ +import gzip import json import logging import os import random +import StringIO import threading import helpers.client @@ -57,6 +59,11 @@ def prepare_s3_bucket(cluster): minio_client.make_bucket(cluster.minio_restricted_bucket) +def put_s3_file_content(cluster, bucket, filename, data): + buf = StringIO.StringIO(data) + cluster.minio_client.put_object(bucket, filename, buf, len(data)) + + # Returns content of given S3 file as string. def get_s3_file_content(cluster, bucket, filename): # type: (ClickHouseCluster, str) -> str @@ -344,3 +351,78 @@ def test_infinite_redirect(cluster): exception_raised = True finally: assert exception_raised + + +def test_storage_s3_get_gzip(cluster): + bucket = cluster.minio_bucket + instance = cluster.instances["dummy"] + filename = "test_get_gzip.bin" + name = "test_get_gzip" + data = [ + "Sophia Intrieri,55", + "Jack Taylor,71", + "Christopher Silva,66", + "Clifton Purser,35", + "Richard Aceuedo,43", + "Lisa Hensley,31", + "Alice Wehrley,1", + "Mary Farmer,47", + "Samara Ramirez,19", + "Shirley Lloyd,51", + "Santos Cowger,0", + "Richard Mundt,88", + "Jerry Gonzalez,15", + "Angela James,10", + "Norman Ortega,33", + "" + ] + buf = StringIO.StringIO() + compressed = gzip.GzipFile(fileobj=buf, mode="wb") + compressed.write("\n".join(data)) + compressed.close() + put_s3_file_content(cluster, bucket, filename, buf.getvalue()) + + try: + run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV', 'gzip')".format( + name, cluster.minio_host, cluster.minio_port, bucket, filename)) + + run_query(instance, "SELECT sum(id) FROM {}".format(name)).splitlines() == ["565"] + + finally: + run_query(instance, "DROP TABLE {}".format(name)) + + +def test_storage_s3_put_gzip(cluster): + bucket = cluster.minio_bucket + instance = cluster.instances["dummy"] + filename = "test_put_gzip.bin" + name = "test_put_gzip" + data = [ + "'Joseph Tomlinson',5", + "'Earnest Essary',44", + "'Matha Pannell',24", + "'Michael Shavers',46", + "'Elias Groce',38", + "'Pamela Bramlet',50", + "'Lewis Harrell',49", + "'Tamara Fyall',58", + "'George Dixon',38", + "'Alice Walls',49", + "'Paula Mais',24", + "'Myrtle Pelt',93", + "'Sylvia Naffziger',18", + "'Amanda Cave',83", + "'Yolanda Joseph',89" + ] + try: + run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV', 'gzip')".format( + name, cluster.minio_host, cluster.minio_port, bucket, filename)) + + run_query(instance, "INSERT INTO {} VALUES ({})".format(name, "),(".join(data))) + + buf = StringIO.StringIO(get_s3_file_content(cluster, bucket, filename)) + f = gzip.GzipFile(fileobj=buf, mode="rb") + uncompressed_content = f.read() + assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 708 + finally: + run_query(instance, "DROP TABLE {}".format(name))