insertFromCSVPerformance.py 5.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-

import sys
import taos
import time
import datetime
import csv
import random
import pandas as pd
import argparse
import os.path

class insertFromCSVPerformace:
25
    def __init__(self, commitID, dbName, tbName, branchName, buildType):
26 27
        self.commitID = commitID
        self.dbName = dbName
28
        self.tbName = tbName
29
        self.branchName = branchName
30
        self.type = buildType
31 32 33 34
        self.ts = 1500074556514
        self.host = "127.0.0.1"
        self.user = "root"
        self.password = "taosdata"
35
        self.config = "/etc/perf"
36 37 38
        self.conn = taos.connect(
            self.host,
            self.user,
39
            self.password,            
40
            self.config)
41 42 43 44 45 46 47
        self.host2 = "192.168.1.179"    
        self.conn2 = taos.connect(
            host = self.host2,
            user = self.user,
            password = self.password,
            config = self.config)

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
    def writeCSV(self):
        with open('test3.csv','w', encoding='utf-8', newline='') as csvFile:
            writer = csv.writer(csvFile, dialect='excel')
            for i in range(1000000):
                newTimestamp = self.ts + random.randint(10000000, 10000000000) + random.randint(1000, 10000000) + random.randint(1, 1000)
                d = datetime.datetime.fromtimestamp(newTimestamp / 1000)
                dt = str(d.strftime("%Y-%m-%d %H:%M:%S.%f"))
                writer.writerow(["'%s'" % dt, random.randint(1, 100), random.uniform(1, 100), random.randint(1, 100), random.randint(1, 100)])
    
    def removCSVHeader(self):
        data = pd.read_csv("ordered.csv")
        data = data.drop([0])
        data.to_csv("ordered.csv", header = False, index = False)
    
    def run(self):
        cursor = self.conn.cursor()
64
        cursor.execute("create database if not exists %s" % self.dbName)
65 66 67 68 69
        cursor.execute("use %s" % self.dbName)
        print("==================== CSV insert performance ====================")
        
        totalTime = 0
        for i in range(10):
70
            cursor.execute("drop table if exists t1")  
71 72 73
            cursor.execute("create table if not exists t1(ts timestamp, c1 int, c2 float, c3 int, c4 int)")
            startTime = time.time()
            cursor.execute("insert into t1 file 'outoforder.csv'")
74
            totalTime += time.time() - startTime                      
75 76 77 78 79
        out_of_order_time = (float) (totalTime / 10)
        print("Out of Order - Insert time: %f" % out_of_order_time)                      
        
        totalTime = 0
        for i in range(10):
80
            cursor.execute("drop table if exists t2")
81 82 83
            cursor.execute("create table if not exists t2(ts timestamp, c1 int, c2 float, c3 int, c4 int)")
            startTime = time.time()
            cursor.execute("insert into t2 file 'ordered.csv'")
84
            totalTime += time.time() - startTime            
85 86 87

        in_order_time = (float) (totalTime / 10)
        print("In order - Insert time: %f" % in_order_time)
88 89 90 91 92 93 94 95
        cursor.close()


        cursor2 = self.conn2.cursor()
        cursor2.execute("create database if not exists %s" % self.dbName)
        cursor2.execute("use %s" % self.dbName)
        cursor2.execute("create table if not exists %s(ts timestamp, in_order_time float, out_of_order_time float, commit_id binary(50), branch binary(50), type binary(20))" % self.tbName)     
        cursor2.execute("insert into %s values(now, %f, %f, '%s', '%s', '%s')" % (self.tbName, in_order_time, out_of_order_time, self.commitID, self.branchName, self.type))
96

97 98
        cursor2.close()
        
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
if __name__ == '__main__':
    parser = argparse.ArgumentParser()    
    parser.add_argument(
        '-c',
        '--commit-id',
        action='store',
        default='null',
        type=str,
        help='git commit id (default: null)')
    parser.add_argument(
        '-d',
        '--database-name',
        action='store',
        default='perf',
        type=str,
        help='Database name to be created (default: perf)')
    parser.add_argument(
        '-t',
117
        '--table-name',
118 119 120 121 122 123 124 125 126 127 128
        action='store',
        default='csv_insert',
        type=str,
        help='Database name to be created (default: csv_insert)')
    parser.add_argument(
        '-b',
        '--branch-name',
        action='store',
        default='develop',
        type=str,
        help='branch name (default: develop)')
129 130 131 132 133 134 135
    parser.add_argument(
        '-T',
        '--build-type',
        action='store',
        default='glibc',
        type=str,
        help='build type (default: glibc)')
136 137
    
    args = parser.parse_args()
138
    perftest = insertFromCSVPerformace(args.commit_id, args.database_name, args.table_name, args.branch_name, args.build_type)
139
    perftest.run()