insertFromCSVPerformance.py 5.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-

import sys
import taos
import time
import datetime
import csv
import random
import pandas as pd
import argparse
import os.path

class insertFromCSVPerformace:
    def __init__(self, commitID, dbName, stbName, branchName):
        self.commitID = commitID
        self.dbName = dbName
        self.stbName = stbName
        self.branchName = branchName
        self.ts = 1500074556514
        self.host = "127.0.0.1"
        self.user = "root"
        self.password = "taosdata"
        self.config = "/etc/taosperf"
        self.conn = taos.connect(
            self.host,
            self.user,
            self.password,
            self.config)
        
    def writeCSV(self):
        with open('test3.csv','w', encoding='utf-8', newline='') as csvFile:
            writer = csv.writer(csvFile, dialect='excel')
            for i in range(1000000):
                newTimestamp = self.ts + random.randint(10000000, 10000000000) + random.randint(1000, 10000000) + random.randint(1, 1000)
                d = datetime.datetime.fromtimestamp(newTimestamp / 1000)
                dt = str(d.strftime("%Y-%m-%d %H:%M:%S.%f"))
                writer.writerow(["'%s'" % dt, random.randint(1, 100), random.uniform(1, 100), random.randint(1, 100), random.randint(1, 100)])
    
    def removCSVHeader(self):
        data = pd.read_csv("ordered.csv")
        data = data.drop([0])
        data.to_csv("ordered.csv", header = False, index = False)
    
    def createTables(self):
        cursor = self.conn.cursor()
                    
        cursor.execute("create database if not exists %s" % self.dbName)
        cursor.execute("use %s" % self.dbName)
        cursor.execute("create table if not exists %s(ts timestamp, in_order_time float, out_of_order_time float, commit_id binary(50)) tags(branch binary(50))" % self.stbName)
        cursor.execute("create table if not exists %s using %s tags('%s')" % (self.branchName, self.stbName, self.branchName))

        cursor.execute("create table if not exists t1(ts timestamp, c1 int, c2 float, c3 int, c4 int)")
        cursor.execute("create table if not exists t2(ts timestamp, c1 int, c2 float, c3 int, c4 int)")

        cursor.close()
    
    def run(self):
        cursor = self.conn.cursor()
        cursor.execute("use %s" % self.dbName)
        print("==================== CSV insert performance ====================")
        
        totalTime = 0
        for i in range(10):
            cursor.execute("create table if not exists t1(ts timestamp, c1 int, c2 float, c3 int, c4 int)")
            startTime = time.time()
            cursor.execute("insert into t1 file 'outoforder.csv'")
            totalTime += time.time() - startTime
            cursor.execute("drop table if exists t1")            
        out_of_order_time = (float) (totalTime / 10)
        print("Out of Order - Insert time: %f" % out_of_order_time)                      
        
        totalTime = 0
        for i in range(10):
            cursor.execute("create table if not exists t2(ts timestamp, c1 int, c2 float, c3 int, c4 int)")
            startTime = time.time()
            cursor.execute("insert into t2 file 'ordered.csv'")
            totalTime += time.time() - startTime
            cursor.execute("drop table if exists t2")

        in_order_time = (float) (totalTime / 10)
        print("In order - Insert time: %f" % in_order_time)
        cursor.execute("insert into %s values(now, %f, %f, '%s')" % (self.branchName, in_order_time, out_of_order_time, self.commitID))
        

if __name__ == '__main__':
    parser = argparse.ArgumentParser()    
    parser.add_argument(
        '-c',
        '--commit-id',
        action='store',
        default='null',
        type=str,
        help='git commit id (default: null)')
    parser.add_argument(
        '-d',
        '--database-name',
        action='store',
        default='perf',
        type=str,
        help='Database name to be created (default: perf)')
    parser.add_argument(
        '-t',
        '--stable-name',
        action='store',
        default='csv_insert',
        type=str,
        help='Database name to be created (default: csv_insert)')
    parser.add_argument(
        '-b',
        '--branch-name',
        action='store',
        default='develop',
        type=str,
        help='branch name (default: develop)')
    
    args = parser.parse_args()
    perftest = insertFromCSVPerformace(args.commit_id, args.database_name, args.stable_name, args.branch_name)

    perftest.createTables()
    perftest.run()