insertFromCSVPerformance.py 5.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-

import sys
import taos
import time
import datetime
import csv
import random
import pandas as pd
import argparse
import os.path

class insertFromCSVPerformace:
25
    def __init__(self, commitID, dbName, tbName, branchName, buildType):
26 27
        self.commitID = commitID
        self.dbName = dbName
28
        self.tbName = tbName
29
        self.branchName = branchName
30
        self.type = buildType
31
        self.ts = 1500000000000
32 33 34
        self.host = "127.0.0.1"
        self.user = "root"
        self.password = "taosdata"
35
        self.config = "/etc/perf"
36 37 38
        self.conn = taos.connect(
            self.host,
            self.user,
39
            self.password,            
40
            self.config)
41 42 43 44 45 46 47
        self.host2 = "192.168.1.179"    
        self.conn2 = taos.connect(
            host = self.host2,
            user = self.user,
            password = self.password,
            config = self.config)

48
    def writeCSV(self):
49 50 51
        tsset = set()
        rows = 0
        with open('test4.csv','w', encoding='utf-8', newline='') as csvFile:
52
            writer = csv.writer(csvFile, dialect='excel')
53 54 55 56 57 58 59 60 61 62
            while True:
                newTimestamp = self.ts + random.randint(1, 10) * 10000000000 + random.randint(1, 10) * 1000000000 + random.randint(1, 10) * 100000000 + random.randint(1, 10) * 10000000 + random.randint(1, 10) * 1000000 + random.randint(1, 10) * 100000 + random.randint(1, 10) * 10000 + random.randint(1, 10) * 1000 + random.randint(1, 10) * 100 + random.randint(1, 10) * 10 + random.randint(1, 10)
                if newTimestamp not in tsset:
                    tsset.add(newTimestamp)
                    d = datetime.datetime.fromtimestamp(newTimestamp / 1000)
                    dt = str(d.strftime("%Y-%m-%d %H:%M:%S.%f"))
                    writer.writerow(["'%s'" % dt, random.randint(1, 100), random.uniform(1, 100), random.randint(1, 100), random.randint(1, 100)])
                    rows += 1
                    if rows == 2000000:
                        break
63 64 65 66 67 68 69 70
    
    def removCSVHeader(self):
        data = pd.read_csv("ordered.csv")
        data = data.drop([0])
        data.to_csv("ordered.csv", header = False, index = False)
    
    def run(self):
        cursor = self.conn.cursor()
71
        cursor.execute("create database if not exists %s" % self.dbName)
72 73 74 75 76
        cursor.execute("use %s" % self.dbName)
        print("==================== CSV insert performance ====================")
        
        totalTime = 0
        for i in range(10):
77
            cursor.execute("drop table if exists t1")  
78 79 80
            cursor.execute("create table if not exists t1(ts timestamp, c1 int, c2 float, c3 int, c4 int)")
            startTime = time.time()
            cursor.execute("insert into t1 file 'outoforder.csv'")
81 82 83
            totalTime += time.time() - startTime 
            time.sleep(1)
                                 
84 85 86 87 88
        out_of_order_time = (float) (totalTime / 10)
        print("Out of Order - Insert time: %f" % out_of_order_time)                      
        
        totalTime = 0
        for i in range(10):
89
            cursor.execute("drop table if exists t2")
90 91 92
            cursor.execute("create table if not exists t2(ts timestamp, c1 int, c2 float, c3 int, c4 int)")
            startTime = time.time()
            cursor.execute("insert into t2 file 'ordered.csv'")
93 94
            totalTime += time.time() - startTime
            time.sleep(1)          
95 96 97

        in_order_time = (float) (totalTime / 10)
        print("In order - Insert time: %f" % in_order_time)
98 99 100 101 102 103 104 105
        cursor.close()


        cursor2 = self.conn2.cursor()
        cursor2.execute("create database if not exists %s" % self.dbName)
        cursor2.execute("use %s" % self.dbName)
        cursor2.execute("create table if not exists %s(ts timestamp, in_order_time float, out_of_order_time float, commit_id binary(50), branch binary(50), type binary(20))" % self.tbName)     
        cursor2.execute("insert into %s values(now, %f, %f, '%s', '%s', '%s')" % (self.tbName, in_order_time, out_of_order_time, self.commitID, self.branchName, self.type))
106

107 108
        cursor2.close()
        
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
if __name__ == '__main__':
    parser = argparse.ArgumentParser()    
    parser.add_argument(
        '-c',
        '--commit-id',
        action='store',
        default='null',
        type=str,
        help='git commit id (default: null)')
    parser.add_argument(
        '-d',
        '--database-name',
        action='store',
        default='perf',
        type=str,
        help='Database name to be created (default: perf)')
    parser.add_argument(
        '-t',
127
        '--table-name',
128 129 130 131 132 133 134 135 136 137 138
        action='store',
        default='csv_insert',
        type=str,
        help='Database name to be created (default: csv_insert)')
    parser.add_argument(
        '-b',
        '--branch-name',
        action='store',
        default='develop',
        type=str,
        help='branch name (default: develop)')
139 140 141 142 143 144 145
    parser.add_argument(
        '-T',
        '--build-type',
        action='store',
        default='glibc',
        type=str,
        help='build type (default: glibc)')
146 147
    
    args = parser.parse_args()
148
    perftest = insertFromCSVPerformace(args.commit_id, args.database_name, args.table_name, args.branch_name, args.build_type)
149
    perftest.run()