insertFromCSVPerformance.py 5.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-

import sys
import taos
import time
import datetime
import csv
import random
import pandas as pd
import argparse
import os.path

class insertFromCSVPerformace:
    def __init__(self, commitID, dbName, tbName, branchName, buildType):
        self.commitID = commitID
        self.dbName = dbName
        self.tbName = tbName
        self.branchName = branchName
        self.type = buildType
        self.ts = 1500000000000
        self.host = "127.0.0.1"
        self.user = "root"
        self.password = "taosdata"
P
Ping Xiao 已提交
35
        self.config = "/etc/%s" % self.branchName
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
        self.conn = taos.connect(
            self.host,
            self.user,
            self.password,            
            self.config)
        self.host2 = "192.168.1.179"    
        self.conn2 = taos.connect(
            host = self.host2,
            user = self.user,
            password = self.password,
            config = self.config)

    def writeCSV(self):
        tsset = set()
        rows = 0
        with open('test4.csv','w', encoding='utf-8', newline='') as csvFile:
            writer = csv.writer(csvFile, dialect='excel')
            while True:
                newTimestamp = self.ts + random.randint(1, 10) * 10000000000 + random.randint(1, 10) * 1000000000 + random.randint(1, 10) * 100000000 + random.randint(1, 10) * 10000000 + random.randint(1, 10) * 1000000 + random.randint(1, 10) * 100000 + random.randint(1, 10) * 10000 + random.randint(1, 10) * 1000 + random.randint(1, 10) * 100 + random.randint(1, 10) * 10 + random.randint(1, 10)
                if newTimestamp not in tsset:
                    tsset.add(newTimestamp)
                    d = datetime.datetime.fromtimestamp(newTimestamp / 1000)
                    dt = str(d.strftime("%Y-%m-%d %H:%M:%S.%f"))
                    writer.writerow(["'%s'" % dt, random.randint(1, 100), random.uniform(1, 100), random.randint(1, 100), random.randint(1, 100)])
                    rows += 1
                    if rows == 2000000:
                        break
    
    def removCSVHeader(self):
        data = pd.read_csv("ordered.csv")
        data = data.drop([0])
        data.to_csv("ordered.csv", header = False, index = False)
    
    def run(self):
        cursor = self.conn.cursor()
        cursor.execute("create database if not exists %s" % self.dbName)
        cursor.execute("use %s" % self.dbName)
        print("==================== CSV insert performance ====================")
        
        totalTime = 0
        for i in range(10):
            cursor.execute("drop table if exists t1")  
            cursor.execute("create table if not exists t1(ts timestamp, c1 int, c2 float, c3 int, c4 int)")
            startTime = time.time()
            cursor.execute("insert into t1 file 'outoforder.csv'")
            totalTime += time.time() - startTime 
            time.sleep(1)
                                 
        out_of_order_time = (float) (totalTime / 10)
        print("Out of Order - Insert time: %f" % out_of_order_time)                      
        
        totalTime = 0
        for i in range(10):
            cursor.execute("drop table if exists t2")
            cursor.execute("create table if not exists t2(ts timestamp, c1 int, c2 float, c3 int, c4 int)")
            startTime = time.time()
            cursor.execute("insert into t2 file 'ordered.csv'")
            totalTime += time.time() - startTime
            time.sleep(1)          

        in_order_time = (float) (totalTime / 10)
        print("In order - Insert time: %f" % in_order_time)
P
Ping Xiao 已提交
98
        cursor.execute("drop database if exists %s" % self.dbName)
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
        cursor.close()


        cursor2 = self.conn2.cursor()
        cursor2.execute("create database if not exists %s" % self.dbName)
        cursor2.execute("use %s" % self.dbName)
        cursor2.execute("create table if not exists %s(ts timestamp, in_order_time float, out_of_order_time float, commit_id binary(50), branch binary(50), type binary(20))" % self.tbName)     
        cursor2.execute("insert into %s values(now, %f, %f, '%s', '%s', '%s')" % (self.tbName, in_order_time, out_of_order_time, self.commitID, self.branchName, self.type))

        cursor2.close()
        
if __name__ == '__main__':
    parser = argparse.ArgumentParser()    
    parser.add_argument(
        '-c',
        '--commit-id',
        action='store',
        default='null',
        type=str,
        help='git commit id (default: null)')
    parser.add_argument(
        '-d',
        '--database-name',
        action='store',
        default='perf',
        type=str,
        help='Database name to be created (default: perf)')
    parser.add_argument(
        '-t',
        '--table-name',
        action='store',
        default='csv_insert',
        type=str,
        help='Database name to be created (default: csv_insert)')
    parser.add_argument(
        '-b',
        '--branch-name',
        action='store',
P
Ping Xiao 已提交
137
        default='2.4',
138
        type=str,
P
Ping Xiao 已提交
139
        help='branch name (default: 2.4)')
140 141 142 143 144 145 146 147 148 149 150
    parser.add_argument(
        '-T',
        '--build-type',
        action='store',
        default='glibc',
        type=str,
        help='build type (default: glibc)')
    
    args = parser.parse_args()
    perftest = insertFromCSVPerformace(args.commit_id, args.database_name, args.table_name, args.branch_name, args.build_type)
    perftest.run()