From 4e56c7b9bbb4f0851c5eff6d52f85b321cff74d2 Mon Sep 17 00:00:00 2001
From: hanxinke <hanxinke@huawei.com>
Date: Thu, 23 Jul 2020 19:16:04 +0800
Subject: [PATCH] atune: add header to dataset and add new identification
 scenarios

---
 analysis/atuned/collector.py                  | 32 +++++---
 analysis/engine/classification.py             |  2 +-
 analysis/engine/utils/utils.py                |  3 +-
 .../optimizer/workload_characterization.py    | 32 ++++++--
 common/profile/service.go                     | 51 ++++++++++++
 database/init.sql                             | 22 +++++
 modules/client/profile/profile_collection.go  | 12 +--
 modules/server/profile/profile.go             | 43 ++++------
 profiles/big-data/hadoop-hdfs/dfsio-hdd.conf  | 22 +----
 profiles/big-data/hadoop-hdfs/dfsio-ssd.conf  |  2 +-
 profiles/database/mysql/2p-sysbench-hdd.conf  | 48 ++---------
 profiles/database/mysql/2p-sysbench-ssd.conf  |  2 +-
 .../database/postgresql/2p-sysbench-hdd.conf  | 35 ++------
 .../database/postgresql/2p-sysbench-ssd.conf  |  2 +-
 profiles/include/ceph-vdbench.conf            | 55 +++++++++++++
 profiles/include/hdfs-dfsio.conf              | 61 ++++++++++++++
 profiles/include/mysql-2p-sysbench.conf       | 81 +++++++++++++++++++
 profiles/include/postgresql-2p-sysbench.conf  | 69 ++++++++++++++++
 profiles/storage/ceph/vdbench-hdd.conf        | 14 +---
 profiles/storage/ceph/vdbench-ssd.conf        |  2 +-
 20 files changed, 437 insertions(+), 153 deletions(-)
 create mode 100644 profiles/include/ceph-vdbench.conf
 create mode 100644 profiles/include/hdfs-dfsio.conf
 create mode 100644 profiles/include/mysql-2p-sysbench.conf
 create mode 100644 profiles/include/postgresql-2p-sysbench.conf

diff --git a/analysis/atuned/collector.py b/analysis/atuned/collector.py
index ea21669..6c67d6f 100755
--- a/analysis/atuned/collector.py
+++ b/analysis/atuned/collector.py
@@ -38,14 +38,22 @@ class Collector(Resource):
         args = COLLECTOR_POST_PARSER.parse_args()
         current_app.logger.info(args)
         n_pipe = get_npipe(args.get("pipe"))
-        if n_pipe is None:
-            abort(404)
-
         monitors = []
         mpis = []
+        field_name = []
         for monitor in args.get(self.monitors):
             monitors.append([monitor["module"], monitor["purpose"], monitor["field"]])
             mpis.append(MPI.get_monitor(monitor["module"], monitor["purpose"]))
+            opts = monitor["field"].split(";")[1].split()
+            for opt in opts:
+                if opt.split("=")[0] in "--fields":
+                    field_name.append("%s.%s.%s" % (monitor["module"], monitor["purpose"],
+                                                    opt.split("=")[1]))
+        data_type = args.get("data_type")
+        if data_type != "":
+            field_name.append("workload.type")
+            field_name.append("workload.appname")
+
         collect_num = args.get("sample_num")
         if int(collect_num) < 1:
             abort("sample_num must be greater than 0")
@@ -53,7 +61,6 @@ class Collector(Resource):
         current_app.logger.info(monitors)
 
         data = []
-        data_type = args.get("data_type")
         for _ in range(collect_num):
             raw_data = MPI.get_monitors_data(monitors, mpis)
             current_app.logger.info(raw_data)
@@ -63,25 +70,28 @@ class Collector(Resource):
                 float_data.append(float(num))
 
             str_data = [str(round(data, 3)) for data in float_data]
-            n_pipe.write(" ".join(str_data) + "\n")
+            if n_pipe is not None:
+                n_pipe.write(" ".join(str_data) + "\n")
 
             if data_type != "":
-                float_data.append(data_type)
+                for type_name in data_type.split(":"):
+                    float_data.append(type_name)
             data.append(float_data)
 
-        n_pipe.close()
+        if n_pipe is not None:
+            n_pipe.close()
 
         path = args.get("file")
-        save_file(path, data)
+        save_file(path, data, field_name)
         result = {}
         result["path"] = path
         return result, 200
 
 
-def save_file(file_name, datas):
+def save_file(file_name, datas, field):
     """save file"""
     path = os.path.dirname(file_name.strip())
     if not os.path.exists(path):
         os.makedirs(path, 0o750)
-    writer = pd.DataFrame(columns=None, data=datas)
-    writer.to_csv(file_name, encoding='utf-8', header=0, index=False)
+    writer = pd.DataFrame(columns=field, data=datas)
+    writer.to_csv(file_name, encoding='utf-8', index=False)
diff --git a/analysis/engine/classification.py b/analysis/engine/classification.py
index 2b2af33..15e2a1d 100644
--- a/analysis/engine/classification.py
+++ b/analysis/engine/classification.py
@@ -47,7 +47,7 @@ class Classification(Resource):
         model = args.get(self.model, None)
         data = utils.read_from_csv(data_path)
         os.remove(data_path)
-        if not data:
+        if data.empty:
             abort("data may be not exist")
 
         classification = WorkloadCharacterization(model_path)
diff --git a/analysis/engine/utils/utils.py b/analysis/engine/utils/utils.py
index 5e101b7..21e8cc2 100644
--- a/analysis/engine/utils/utils.py
+++ b/analysis/engine/utils/utils.py
@@ -30,8 +30,7 @@ def read_from_csv(path):
         return None
 
     with open(path, 'r') as file:
-        data = pd.read_csv(file, header=None)
-        data = np.array(data).tolist()
+        data = pd.read_csv(file, header=0)
 
     return data
 
diff --git a/analysis/optimizer/workload_characterization.py b/analysis/optimizer/workload_characterization.py
index 44194ef..4fec556 100644
--- a/analysis/optimizer/workload_characterization.py
+++ b/analysis/optimizer/workload_characterization.py
@@ -40,6 +40,24 @@ class WorkloadCharacterization:
         self.tencoder = LabelEncoder()
         self.aencoder = LabelEncoder()
         self.dataset = None
+        self.data_features = ['CPU.STAT.usr', 'CPU.STAT.nice', 'CPU.STAT.sys', 'CPU.STAT.iowait',
+                              'CPU.STAT.irq', 'CPU.STAT.soft', 'CPU.STAT.steal', 'CPU.STAT.guest',
+                              'CPU.STAT.util', 'CPU.STAT.cutil', 'STORAGE.STAT.rs',
+                              'STORAGE.STAT.ws', 'STORAGE.STAT.rMBs', 'STORAGE.STAT.wMBs',
+                              'STORAGE.STAT.rrqm', 'STORAGE.STAT.wrqm', 'STORAGE.STAT.rareq-sz',
+                              'STORAGE.STAT.wareq-sz', 'STORAGE.STAT.r_await',
+                              'STORAGE.STAT.w_await', 'STORAGE.STAT.util', 'STORAGE.STAT.aqu-sz',
+                              'NET.STAT.rxkBs', 'NET.STAT.txkBs', 'NET.STAT.rxpcks',
+                              'NET.STAT.txpcks', 'NET.STAT.ifutil', 'NET.ESTAT.errs',
+                              'NET.ESTAT.util', 'MEM.BANDWIDTH.Total_Util', 'PERF.STAT.IPC',
+                              'PERF.STAT.CACHE-MISS-RATIO', 'PERF.STAT.MPKI',
+                              'PERF.STAT.ITLB-LOAD-MISS-RATIO', 'PERF.STAT.DTLB-LOAD-MISS-RATIO',
+                              'PERF.STAT.SBPI', 'PERF.STAT.SBPC', 'MEM.VMSTAT.procs.b',
+                              'MEM.VMSTAT.io.bo', 'MEM.VMSTAT.system.in', 'MEM.VMSTAT.system.cs',
+                              'MEM.VMSTAT.util.swap', 'MEM.VMSTAT.util.cpu', 'MEM.VMSTAT.procs.r',
+                              'SYS.TASKS.procs', 'SYS.TASKS.cswchs', 'SYS.LDAVG.runq-sz',
+                              'SYS.LDAVG.plist-sz', 'SYS.LDAVG.ldavg-1', 'SYS.LDAVG.ldavg-5',
+                              'SYS.FDUTIL.fd-util']
 
     def parsing(self, data_path, header=0, analysis=False):
         """
@@ -49,8 +67,12 @@ class WorkloadCharacterization:
         """
         df_content = []
         csvfiles = glob.glob(data_path)
+        selected_cols = self.data_features
+        selected_cols.append('workload.type')
+        selected_cols.append('workload.appname')
+
         for csv in csvfiles:
-            data = pd.read_csv(csv, index_col=None, header=header)
+            data = pd.read_csv(csv, index_col=None, header=header, usecols=selected_cols)
             df_content.append(data)
             dataset = pd.concat(df_content, sort=False)
         self.dataset = dataset
@@ -134,7 +156,7 @@ class WorkloadCharacterization:
         w_array = np.ones(y_train.shape[0], dtype='float')
         for i, val in enumerate(y_train):
             w_array[i] = class_weights[val]
-        model = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=0)
+        model = RandomForestClassifier(n_estimators=150, oob_score=True, random_state=0)
         model.fit(x_train, y_train, sample_weight=w_array)
         y_pred = model.predict(x_test)
         print("the accuracy of random forest classifier is %f" % accuracy_score(y_test, y_pred))
@@ -216,7 +238,7 @@ class WorkloadCharacterization:
         identify the workload_type according to input data
         :param data:  input data
         """
-        data = pd.DataFrame(data)
+        data = data[self.data_features]
         tencoder_path = os.path.join(self.model_path, "tencoder.pkl")
         aencoder_path = os.path.join(self.model_path, "aencoder.pkl")
         scaler_path = os.path.join(self.model_path, "scaler.pkl")
@@ -285,8 +307,8 @@ class WorkloadCharacterization:
         encodername = modelname + '_encoder.pkl'
 
         data_path = os.path.join(data_path, "*.csv")
-        self.parsing(data_path, header=None)
-        x_axis = self.scaler.fit_transform(self.dataset.iloc[:, :-1])
+        self.parsing(data_path)
+        x_axis = self.scaler.fit_transform(self.dataset.iloc[:, :-2])
         y_axis = self.aencoder.fit_transform(self.dataset.iloc[:, -1])
         joblib.dump(self.scaler, os.path.join(dirname, scalername))
         joblib.dump(self.aencoder, os.path.join(dirname, encodername))
diff --git a/common/profile/service.go b/common/profile/service.go
index 4146d95..32c3a9f 100644
--- a/common/profile/service.go
+++ b/common/profile/service.go
@@ -14,8 +14,10 @@
 package profile
 
 import (
+	"fmt"
 	"gitee.com/openeuler/A-Tune/common/config"
 	"gitee.com/openeuler/A-Tune/common/utils"
+	"github.com/go-ini/ini"
 	"io/ioutil"
 	"os"
 	"path"
@@ -104,3 +106,52 @@ func UpdateProfile(profileName string, data string) error {
 
 	return nil
 }
+
+// GetProfileInclude method get include info in profile
+func GetProfileInclude(name string) (string, error) {
+	var file *ini.File
+	err := filepath.Walk(config.DefaultProfilePath, func(absPath string, info os.FileInfo, err error) error {
+		if !info.IsDir() {
+			absFilename := absPath[len(config.DefaultProfilePath)+1:]
+			filenameOnly := strings.TrimSuffix(strings.ReplaceAll(absFilename, "/", "-"),
+				path.Ext(info.Name()))
+			if filenameOnly == name {
+				file, err = ini.Load(absPath)
+				if err != nil {
+					return err
+				}
+				return nil
+			}
+		}
+		return nil
+	})
+
+	if err != nil {
+		return "", err
+	}
+
+	if file == nil {
+		return "", fmt.Errorf("%s profile is not found", name)
+	}
+
+	for _, section := range file.Sections() {
+		if section.Name() == "main" {
+			if section.HasKey("include") {
+				key, _ := section.GetKey("include")
+				values := make([]string, 0)
+				for _, includeValue := range strings.Split(key.Value(), ",") {
+					segValue := strings.SplitN(includeValue, "-", 2)
+					value := segValue[0]
+					if len(segValue) >= 2 {
+						value = segValue[1]
+					}
+					values = append(values, value)
+				}
+
+				return strings.Join(values, "-"), nil
+			}
+		}
+	}
+
+	return "", nil
+}
diff --git a/database/init.sql b/database/init.sql
index 7d21a88..b5448aa 100755
--- a/database/init.sql
+++ b/database/init.sql
@@ -81,24 +81,46 @@ INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("default",
 INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("ceph", "ceph", "", 0);
 INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("hadoop_hdd", "hadoop-hdfs", "", 0);
 INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("hadoop_ssd", "hadoop-hdfs", "", 0);
+INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("spark", "spark", "", 0);
+INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("mongodb", "mongodb", "", 0);
 INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("mariadb", "mariadb", "", 0);
 INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("mysql", "mysql", "", 0);
+INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("postgresql", "postgresql", "", 0);
 INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("nginx", "nginx", "", 0);
+INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("apache", "apache-traffic-server", "", 0);
 INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("redis", "redis", "", 0);
 INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("speccpu", "speccpu", "", 0);
 INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("specjbb", "specjbb", "", 0);
+INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("dubbo", "dubbo", "", 0);
+INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("robox", "robox", "", 0);
+INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("kvm", "kvm", "", 0);
+INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("hpc", "gatk4", "", 0);
+INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("olc", "consumer-cloud", "", 0);
+INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("mariadb_kvm", "mariadb_kvm", "", 0);
+INSERT INTO class_apps(class, apps, resource_limit, deletable) VALUES("mariadb_docker", "mariadb_docker", "", 0);
 
 -- class_profile:
 INSERT INTO class_profile(class, profile_type, active) VALUES("default", "default-default", 0);
 INSERT INTO class_profile(class, profile_type, active) VALUES("ceph", "storage-ceph-vdbench-hdd", 0);
 INSERT INTO class_profile(class, profile_type, active) VALUES("hadoop_hdd", "big-data-hadoop-hdfs-dfsio-hdd", 0);
 INSERT INTO class_profile(class, profile_type, active) VALUES("hadoop_ssd", "big-data-hadoop-hdfs-dfsio-ssd", 0);
+INSERT INTO class_profile(class, profile_type, active) VALUES("spark", "big-data-hadoop-spark-hibench", 0);
+INSERT INTO class_profile(class, profile_type, active) VALUES("mongodb", "database-mongodb-2p-sysbench", 0);
 INSERT INTO class_profile(class, profile_type, active) VALUES("mariadb", "database-mariadb-2p-tpcc-c3", 0);
 INSERT INTO class_profile(class, profile_type, active) VALUES("mysql", "database-mysql-2p-sysbench-hdd", 0);
+INSERT INTO class_profile(class, profile_type, active) VALUES("postgresql", "database-postgresql-2p-sysbench-hdd", 0);
 INSERT INTO class_profile(class, profile_type, active) VALUES("nginx", "web-nginx-https-short-connection", 0);
+INSERT INTO class_profile(class, profile_type, active) VALUES("apache", "web-apache-traffic-server-spirent-pingpo", 0);
 INSERT INTO class_profile(class, profile_type, active) VALUES("redis", "in-memory-database-redis-redis-benchmark", 0);
 INSERT INTO class_profile(class, profile_type, active) VALUES("speccpu", "basic-test-suite-speccpu-speccpu2006", 0);
 INSERT INTO class_profile(class, profile_type, active) VALUES("specjbb", "basic-test-suite-specjbb-specjbb2015", 0);
+INSERT INTO class_profile(class, profile_type, active) VALUES("dubbo", "middleware-dubbo-dubbo-benchmark", 0);
+INSERT INTO class_profile(class, profile_type, active) VALUES("robox", "arm-native-android-container-robox", 0);
+INSERT INTO class_profile(class, profile_type, active) VALUES("kvm", "cloud-compute-kvm-host", 0);
+INSERT INTO class_profile(class, profile_type, active) VALUES("hpc", "hpc-gatk4-human-genome", 0);
+INSERT INTO class_profile(class, profile_type, active) VALUES("olc", "virtualization-consumer-cloud-olc", 0);
+INSERT INTO class_profile(class, profile_type, active) VALUES("mariadb_kvm", "virtualization-mariadb-2p-tpcc-c3", 0);
+INSERT INTO class_profile(class, profile_type, active) VALUES("mariadb_docker", "docker-mariadb-2p-tpcc-c3", 0);
 
 -- Performance Point
 INSERT INTO tuned_item(property, item) VALUES("check_environment", "Check");
diff --git a/modules/client/profile/profile_collection.go b/modules/client/profile/profile_collection.go
index a3ee9a1..182236c 100644
--- a/modules/client/profile/profile_collection.go
+++ b/modules/client/profile/profile_collection.go
@@ -62,8 +62,8 @@ var collectionCommand = cli.Command{
 			Value: "",
 		},
 		cli.StringFlag{
-			Name:  "workload_type,t",
-			Usage: "the workload type of the collected data",
+			Name:  "app_type,t",
+			Usage: "the app type of the collected data",
 			Value: "",
 		},
 	},
@@ -71,7 +71,7 @@ var collectionCommand = cli.Command{
 		desc := `
 	 collect data for train machine learning model, you must set the command options
 	 which has no default value, the output_path must be a absolute path.
-	     example: atune-adm collection -f mysql -i 5 -d 1200 -o /home -b sda -n eth0 -t idle`
+	     example: atune-adm collection -f mysql -i 5 -d 1200 -o /home -b sda -n eth0 -t mysql`
 		return desc
 	}(),
 	Action: collection,
@@ -112,9 +112,9 @@ func checkCollectionCtx(ctx *cli.Context) error {
 		return fmt.Errorf("error: network must be specified")
 	}
 
-	if ctx.String("workload_type") == "" {
+	if ctx.String("app_type") == "" {
 		_ = cli.ShowCommandHelp(ctx, "collection")
-		return fmt.Errorf("error: workload type must be specified")
+		return fmt.Errorf("error: app type must be specified")
 	}
 
 	if ctx.String("output_path") == "" {
@@ -160,7 +160,7 @@ func collection(ctx *cli.Context) error {
 		OutputPath: outputPath,
 		Block:      ctx.String("disk"),
 		Network:    ctx.String("network"),
-		Type:       ctx.String("workload_type"),
+		Type:       ctx.String("app_type"),
 	}
 
 	svc := PB.NewProfileMgrClient(c.Connection())
diff --git a/modules/server/profile/profile.go b/modules/server/profile/profile.go
index 21bde6f..d8049c0 100644
--- a/modules/server/profile/profile.go
+++ b/modules/server/profile/profile.go
@@ -843,12 +843,18 @@ func (s *ProfileServer) Collection(message *PB.CollectFlag, stream PB.ProfileMgr
 		return fmt.Errorf("input:%s is invalid", message.GetNetwork())
 	}
 
-	classApps := &sqlstore.GetClassApp{Class: message.GetType()}
-	if err = sqlstore.GetClassApps(classApps); err != nil {
+	classProfile := &sqlstore.GetClass{Class: message.GetType()}
+	if err = sqlstore.GetClasses(classProfile); err != nil {
 		return err
 	}
-	if len(classApps.Result) == 0 {
-		return fmt.Errorf("workload type %s is not exist, please use define command first", message.GetType())
+	if len(classProfile.Result) == 0 {
+		return fmt.Errorf("app type %s is not exist, use define command first", message.GetType())
+	}
+
+	profileType := classProfile.Result[0].ProfileType
+	include, err := profile.GetProfileInclude(profileType)
+	if err != nil {
+		return err
 	}
 
 	exist, err := utils.PathExist(message.GetOutputPath())
@@ -867,25 +873,6 @@ func (s *ProfileServer) Collection(message *PB.CollectFlag, stream PB.ProfileMgr
 		return err
 	}
 
-	npipe, err := utils.CreateNamedPipe()
-	if err != nil {
-		return fmt.Errorf("create named pipe failed")
-	}
-
-	defer os.Remove(npipe)
-
-	go func() {
-		file, _ := os.OpenFile(npipe, os.O_RDONLY, os.ModeNamedPipe)
-		reader := bufio.NewReader(file)
-
-		scanner := bufio.NewScanner(reader)
-
-		for scanner.Scan() {
-			line := scanner.Text()
-			_ = stream.Send(&PB.AckCheck{Name: line, Status: utils.INFO})
-		}
-	}()
-
 	collections, err := sqlstore.GetCollections()
 	if err != nil {
 		log.Errorf("inquery collection tables error: %v", err)
@@ -925,11 +912,15 @@ func (s *ProfileServer) Collection(message *PB.CollectFlag, stream PB.ProfileMgr
 	collectorBody := new(CollectorPost)
 	collectorBody.SampleNum = int(message.GetDuration() / message.GetInterval())
 	collectorBody.Monitors = monitors
-	collectorBody.Pipe = npipe
-	nowTime := time.Now().Format("20200721-194550")
+	nowTime := time.Now().Format("20060702-150405")
 	fileName := fmt.Sprintf("%s-%s.csv", message.GetWorkload(), nowTime)
 	collectorBody.File = path.Join(message.GetOutputPath(), fileName)
-	collectorBody.DataType = message.GetType()
+	if include == "" {
+		include = "default"
+	}
+	collectorBody.DataType = fmt.Sprintf("%s:%s", include, message.GetType())
+
+	_ = stream.Send(&PB.AckCheck{Name: "start to collect data"})
 
 	_, err = collectorBody.Post()
 	if err != nil {
diff --git a/profiles/big-data/hadoop-hdfs/dfsio-hdd.conf b/profiles/big-data/hadoop-hdfs/dfsio-hdd.conf
index ba95fbc..2261ebd 100644
--- a/profiles/big-data/hadoop-hdfs/dfsio-hdd.conf
+++ b/profiles/big-data/hadoop-hdfs/dfsio-hdd.conf
@@ -12,7 +12,7 @@
 # hdfs dfsio hdd A-Tune configuration
 #
 [main]
-include = default-default
+include = include-hdfs-dfsio
 
 [kernel_config]
 #TODO CONFIG
@@ -24,24 +24,17 @@ include = default-default
 #TODO CONFIG
 
 [sysfs]
-block/{disk}/queue/scheduler = bfq
-block/{disk}/queue/nr_requests = 2048
 block/{disk}/queue/nomerges = 2
-block/{disk}/queue/iosched/slice_idle = 12
 block/{disk}/queue/iosched/low_latency = 0
 
 [systemctl]
 #TODO CONFIG
 
 [sysctl]
-vm.dirty_ratio = 90
-vm.dirty_background_ratio = 30
-vm.dirty_expire_centisecs = 3000
-vm.dirty_writeback_centisecs = 1000
+#TODO CONFIG
 
 [script]
-ethtool = -K {network} gro on | -K {network} gso on | -K {network} tso on | -G {network} rx 4096 | -G {network} tx 4096
-ifconfig = {network} mtu 9000
+#TODO CONFIG
 
 [ulimit]
 #TODO CONFIG
@@ -53,11 +46,4 @@ ifconfig = {network} mtu 9000
 #TODO CONFIG
 
 [tip]
-The checksum can ensure the integrity and accuracy of data. Disabling checksum will improve the performance but data integrity can not be ensure. = filesystem
-set the block size to 65536 and set reserved blocks percentage to 0 = filesystem
-use largefile4 and write-back mode = filesystem
-disable lazy init and the barrier function = filesystem
-enable noatime or nodirname to prevent the inode access records from being updated = filesystem
-disable cgroup files and blocks = filesystem
-set the TieredStopAtLevel to 1 = application
-set the dfs.block.size to 32M in hdfs-site.xml = application
+#TODO CONFIG
diff --git a/profiles/big-data/hadoop-hdfs/dfsio-ssd.conf b/profiles/big-data/hadoop-hdfs/dfsio-ssd.conf
index c1a975b..cd2b723 100644
--- a/profiles/big-data/hadoop-hdfs/dfsio-ssd.conf
+++ b/profiles/big-data/hadoop-hdfs/dfsio-ssd.conf
@@ -12,7 +12,7 @@
 # hdfs dfsio ssd A-Tune configuration
 #
 [main]
-include = big-data-hadoop-hdfs-dfsio-hdd
+include = include-hdfs-dfsio
 
 [kernel_config]
 #TODO CONFIG
diff --git a/profiles/database/mysql/2p-sysbench-hdd.conf b/profiles/database/mysql/2p-sysbench-hdd.conf
index 384923a..ecdcd95 100644
--- a/profiles/database/mysql/2p-sysbench-hdd.conf
+++ b/profiles/database/mysql/2p-sysbench-hdd.conf
@@ -12,7 +12,7 @@
 # mysql 2p sysbench hdd A-Tune configuration
 #
 [main]
-include = default-default
+include = include-mysql-2p-sysbench
 
 [kernel_config]
 #TODO CONFIG
@@ -21,53 +21,22 @@ include = default-default
 #TODO CONFIG
 
 [bootloader.grub2]
-iommu.passthrough = 1
-iommu.strict = 0
+#TODO CONFIG
 
 [sysfs]
-block/{disk}/queue/read_ahead_kb = 64
 block/{disk}/queue/scheduler = mq-deadline
-block/{disk}/queue/nr_requests = 2048
-block/{disk}/device/queue_depth = 256
-kernel/mm/transparent_hugepage/defrag = never
-kernel/mm/transparent_hugepage/enabled = never
 
 [systemctl]
-sysmonitor = stop
-irqbalance = stop
-firewalld = stop
+#TODO CONFIG
 
 [sysctl]
-vm.swappiness = 1
-vm.dirty_ratio = 5
-# schedule
-kernel.sched_cfs_bandwidth_slice_us = 21000
-kernel.sched_migration_cost_ns = 1381000
-kernel.sched_latency_ns = 16110000
-kernel.sched_min_granularity_ns = 8250000
-kernel.sched_nr_migrate = 53
-kernel.sched_wakeup_granularity_ns = 50410000
-
-# network core
-net.core.rmem_default = 21299200
-net.core.rmem_max = 21299200
-net.core.wmem_default = 21299200
-net.core.wmem_max = 21299200
-net.ipv4.tcp_rmem = 40960 8738000 62914560
-net.ipv4.tcp_wmem = 40960 8738000 62914560
-net.core.dev_weight = 97
-
-# support more connections for mysql
-net.ipv4.tcp_max_syn_backlog = 20480
-net.core.somaxconn = 1280
-net.ipv4.tcp_max_tw_buckets = 360000
+#TODO CONFIG
 
 [script]
-prefetch = off
+#TODO CONFIG
 
 [ulimit]
-{user}.hard.nofile = 102400
-{user}.soft.nofile = 102400
+#TODO CONFIG
 
 [schedule_policy]
 #TODO CONFIG
@@ -76,7 +45,4 @@ prefetch = off
 #TODO CONFIG
 
 [tip]
-use xfs and enable noatime or nodirname to prevent the inode access records from being updated = filesystem
-set innodb_spin_wait_delay to 130, set innodb_sync_spin_loops to 70 and set innodb_numa_interleave to ON = application
-Setting innodb_flush_log_at_trx_commit to 2 and Setting sync_binlog to 1000 will improve performance but can not ensure data reliability. = application
-add -O3 compilation parameter in cmake = application
+#TODO CONFIG
diff --git a/profiles/database/mysql/2p-sysbench-ssd.conf b/profiles/database/mysql/2p-sysbench-ssd.conf
index c71e6cc..91eea92 100644
--- a/profiles/database/mysql/2p-sysbench-ssd.conf
+++ b/profiles/database/mysql/2p-sysbench-ssd.conf
@@ -12,7 +12,7 @@
 # mysql 2p sysbench ssd A-Tune configuration
 #
 [main]
-include = database-mysql-2p-sysbench-hdd
+include = include-mysql-2p-sysbench
 
 [kernel_config]
 #TODO CONFIG
diff --git a/profiles/database/postgresql/2p-sysbench-hdd.conf b/profiles/database/postgresql/2p-sysbench-hdd.conf
index 01b8c29..6e74787 100644
--- a/profiles/database/postgresql/2p-sysbench-hdd.conf
+++ b/profiles/database/postgresql/2p-sysbench-hdd.conf
@@ -12,7 +12,7 @@
 # postgresql 2p sysbench hdd A-Tune configuration
 #
 [main]
-include = default-default
+include = include-postgresql-2p-sysbench
 
 [kernel_config]
 #TODO CONFIG
@@ -21,43 +21,22 @@ include = default-default
 #TODO CONFIG
 
 [bootloader.grub2]
-iommu.passthrough = 1
+#TODO CONFIG
 
 [sysfs]
 block/{disk}/queue/scheduler = mq-deadline
 
 [systemctl]
-sysmonitor = stop
-irqbalance = stop
+#TODO CONFIG
 
 [sysctl]
-# schedule
-kernel.sched_cfs_bandwidth_slice_us = 21000
-kernel.sched_migration_cost_ns = 1381000
-kernel.sched_latency_ns = 16110000
-kernel.sched_min_granularity_ns = 8250000
-kernel.sched_nr_migrate = 53
-kernel.sched_wakeup_granularity_ns = 50410000
-
-# network core
-net.core.rmem_default = 21299200
-net.core.rmem_max = 21299200
-net.core.wmem_default = 21299200
-net.core.wmem_max = 21299200
-net.ipv4.tcp_rmem = 40960 8738000 62914560
-net.ipv4.tcp_wmem = 40960 8738000 62914560
-net.core.dev_weight = 97
-
-# support more connections for mysql
-net.ipv4.tcp_max_syn_backlog = 20480
-net.core.somaxconn = 1280
+#TODO CONFIG
 
 [script]
-prefetch = off
+#TODO CONFIG
 
 [ulimit]
-{user}.hard.nofile = 102400
-{user}.soft.nofile = 102400
+#TODO CONFIG
 
 [schedule_policy]
 #TODO CONFIG
@@ -66,4 +45,4 @@ prefetch = off
 #TODO CONFIG
 
 [tip]
-use xfs and enable noatime or nodirname to prevent the inode access records from being updated = filesystem
+#TODO CONFIG
diff --git a/profiles/database/postgresql/2p-sysbench-ssd.conf b/profiles/database/postgresql/2p-sysbench-ssd.conf
index 4f20e6f..27cf03b 100644
--- a/profiles/database/postgresql/2p-sysbench-ssd.conf
+++ b/profiles/database/postgresql/2p-sysbench-ssd.conf
@@ -12,7 +12,7 @@
 # postgresql 2p sysbench ssd A-Tune configuration
 #
 [main]
-include = database-postgresql-2p-sysbench-hdd
+include = include-postgresql-2p-sysbench
 
 [kernel_config]
 #TODO CONFIG
diff --git a/profiles/include/ceph-vdbench.conf b/profiles/include/ceph-vdbench.conf
new file mode 100644
index 0000000..f5ecdd3
--- /dev/null
+++ b/profiles/include/ceph-vdbench.conf
@@ -0,0 +1,55 @@
+# Copyright (c) 2020 Huawei Technologies Co., Ltd.
+# A-Tune is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#     http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+# Create: 2020-07-23
+#
+# ceph vdbench A-Tune configuration
+#
+[main]
+include = default-default
+
+[kernel_config]
+#TODO CONFIG
+
+[bios]
+#TODO CONFIG
+
+[bootloader.grub2]
+#TODO CONFIG
+
+[sysfs]
+block/{disk}/queue/write_cache = write through
+
+[systemctl]
+sysmonitor = stop
+irqbalance = stop
+
+[sysctl]
+#TODO CONFIG
+
+[script]
+#TODO CONFIG
+
+[ulimit]
+#TODO CONFIG
+
+[schedule_policy]
+#TODO CONFIG
+
+[check]
+#TODO CONFIG
+
+[tip]
+set congested_write_threshold_us to 60ms for bcache = application
+optimize bcache mechanism, change synchronous write to asynchronous write and delete journal write process = block
+set stripe unit to 128M = block
+set bluestore_max_blob_size_ssd to 128K and set osd_disk_threads to 2 in ceph.conf = application
+set TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES to 128M = application
+ceph osd set noscrub and nodeep-scrub = application
+adjust the weight of each osd in ceph cluster to balance the number of osd pgs = application
diff --git a/profiles/include/hdfs-dfsio.conf b/profiles/include/hdfs-dfsio.conf
new file mode 100644
index 0000000..d923edc
--- /dev/null
+++ b/profiles/include/hdfs-dfsio.conf
@@ -0,0 +1,61 @@
+# Copyright (c) 2020 Huawei Technologies Co., Ltd.
+# A-Tune is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#     http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+# Create: 2020-07-23
+#
+# hdfs dfsio A-Tune configuration
+#
+[main]
+include = default-default
+
+[kernel_config]
+#TODO CONFIG
+
+[bios]
+#TODO CONFIG
+
+[bootloader.grub2]
+#TODO CONFIG
+
+[sysfs]
+block/{disk}/queue/scheduler = bfq
+block/{disk}/queue/nr_requests = 2048
+block/{disk}/queue/iosched/slice_idle = 12
+
+[systemctl]
+#TODO CONFIG
+
+[sysctl]
+vm.dirty_ratio = 90
+vm.dirty_background_ratio = 30
+vm.dirty_expire_centisecs = 3000
+vm.dirty_writeback_centisecs = 1000
+
+[script]
+ethtool = -K {network} gro on | -K {network} gso on | -K {network} tso on | -G {network} rx 4096 | -G {network} tx 4096
+ifconfig = {network} mtu 9000
+
+[ulimit]
+#TODO CONFIG
+
+[schedule_policy]
+#TODO CONFIG
+
+[check]
+#TODO CONFIG
+
+[tip]
+The checksum can ensure the integrity and accuracy of data. Disabling checksum will improve the performance but data integrity can not be ensure. = filesystem
+set the block size to 65536 and set reserved blocks percentage to 0 = filesystem
+use largefile4 and write-back mode = filesystem
+disable lazy init and the barrier function = filesystem
+enable noatime or nodirname to prevent the inode access records from being updated = filesystem
+disable cgroup files and blocks = filesystem
+set the TieredStopAtLevel to 1 = application
+set the dfs.block.size to 32M in hdfs-site.xml = application
diff --git a/profiles/include/mysql-2p-sysbench.conf b/profiles/include/mysql-2p-sysbench.conf
new file mode 100644
index 0000000..b490512
--- /dev/null
+++ b/profiles/include/mysql-2p-sysbench.conf
@@ -0,0 +1,81 @@
+# Copyright (c) 2020 Huawei Technologies Co., Ltd.
+# A-Tune is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#     http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+# Create: 2020-07-23
+#
+# mysql 2p sysbench A-Tune configuration
+#
+[main]
+include = default-default
+
+[kernel_config]
+#TODO CONFIG
+
+[bios]
+#TODO CONFIG
+
+[bootloader.grub2]
+iommu.passthrough = 1
+iommu.strict = 0
+
+[sysfs]
+block/{disk}/queue/read_ahead_kb = 64
+block/{disk}/queue/nr_requests = 2048
+block/{disk}/device/queue_depth = 256
+kernel/mm/transparent_hugepage/defrag = never
+kernel/mm/transparent_hugepage/enabled = never
+
+[systemctl]
+sysmonitor = stop
+irqbalance = stop
+firewalld = stop
+
+[sysctl]
+vm.swappiness = 1
+vm.dirty_ratio = 5
+# schedule
+kernel.sched_cfs_bandwidth_slice_us = 21000
+kernel.sched_migration_cost_ns = 1381000
+kernel.sched_latency_ns = 16110000
+kernel.sched_min_granularity_ns = 8250000
+kernel.sched_nr_migrate = 53
+kernel.sched_wakeup_granularity_ns = 50410000
+
+# network core
+net.core.rmem_default = 21299200
+net.core.rmem_max = 21299200
+net.core.wmem_default = 21299200
+net.core.wmem_max = 21299200
+net.ipv4.tcp_rmem = 40960 8738000 62914560
+net.ipv4.tcp_wmem = 40960 8738000 62914560
+net.core.dev_weight = 97
+
+# support more connections for mysql
+net.ipv4.tcp_max_syn_backlog = 20480
+net.core.somaxconn = 1280
+net.ipv4.tcp_max_tw_buckets = 360000
+
+[script]
+prefetch = off
+
+[ulimit]
+{user}.hard.nofile = 102400
+{user}.soft.nofile = 102400
+
+[schedule_policy]
+#TODO CONFIG
+
+[check]
+#TODO CONFIG
+
+[tip]
+use xfs and enable noatime or nodirname to prevent the inode access records from being updated = filesystem
+set innodb_spin_wait_delay to 130, set innodb_sync_spin_loops to 70 and set innodb_numa_interleave to ON = application
+Setting innodb_flush_log_at_trx_commit to 2 and Setting sync_binlog to 1000 will improve performance but can not ensure data reliability. = application
+add -O3 compilation parameter in cmake = application
diff --git a/profiles/include/postgresql-2p-sysbench.conf b/profiles/include/postgresql-2p-sysbench.conf
new file mode 100644
index 0000000..7f05fad
--- /dev/null
+++ b/profiles/include/postgresql-2p-sysbench.conf
@@ -0,0 +1,69 @@
+# Copyright (c) 2020 Huawei Technologies Co., Ltd.
+# A-Tune is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#     http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+# Create: 2020-07-23
+#
+# postgresql 2p sysbench A-Tune configuration
+#
+[main]
+include = default-default
+
+[kernel_config]
+#TODO CONFIG
+
+[bios]
+#TODO CONFIG
+
+[bootloader.grub2]
+iommu.passthrough = 1
+
+[sysfs]
+#TODO CONFIG
+
+[systemctl]
+sysmonitor = stop
+irqbalance = stop
+
+[sysctl]
+# schedule
+kernel.sched_cfs_bandwidth_slice_us = 21000
+kernel.sched_migration_cost_ns = 1381000
+kernel.sched_latency_ns = 16110000
+kernel.sched_min_granularity_ns = 8250000
+kernel.sched_nr_migrate = 53
+kernel.sched_wakeup_granularity_ns = 50410000
+
+# network core
+net.core.rmem_default = 21299200
+net.core.rmem_max = 21299200
+net.core.wmem_default = 21299200
+net.core.wmem_max = 21299200
+net.ipv4.tcp_rmem = 40960 8738000 62914560
+net.ipv4.tcp_wmem = 40960 8738000 62914560
+net.core.dev_weight = 97
+
+# support more connections for mysql
+net.ipv4.tcp_max_syn_backlog = 20480
+net.core.somaxconn = 1280
+
+[script]
+prefetch = off
+
+[ulimit]
+{user}.hard.nofile = 102400
+{user}.soft.nofile = 102400
+
+[schedule_policy]
+#TODO CONFIG
+
+[check]
+#TODO CONFIG
+
+[tip]
+use xfs and enable noatime or nodirname to prevent the inode access records from being updated = filesystem
diff --git a/profiles/storage/ceph/vdbench-hdd.conf b/profiles/storage/ceph/vdbench-hdd.conf
index f80c8a2..22d67f9 100644
--- a/profiles/storage/ceph/vdbench-hdd.conf
+++ b/profiles/storage/ceph/vdbench-hdd.conf
@@ -12,7 +12,7 @@
 # ceph vdbench hdd A-Tune configuration
 #
 [main]
-include = default-default
+include = include-ceph-vdbench
 
 [kernel_config]
 #TODO CONFIG
@@ -24,12 +24,10 @@ include = default-default
 #TODO CONFIG
 
 [sysfs]
-block/{disk}/queue/write_cache = write through
 block/{disk}/queue/scheduler = mq-deadline
 
 [systemctl]
-sysmonitor = stop
-irqbalance = stop
+#TODO CONFIG
 
 [sysctl]
 #TODO CONFIG
@@ -47,10 +45,4 @@ irqbalance = stop
 #TODO CONFIG
 
 [tip]
-set congested_write_threshold_us to 60ms for bcache = application
-optimize bcache mechanism, change synchronous write to asynchronous write and delete journal write process = block
-set stripe unit to 128M = block
-set bluestore_max_blob_size_ssd to 128K and set osd_disk_threads to 2 in ceph.conf = application
-set TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES to 128M = application
-ceph osd set noscrub and nodeep-scrub = application
-adjust the weight of each osd in ceph cluster to balance the number of osd pgs = application
+#TODO CONFIG
diff --git a/profiles/storage/ceph/vdbench-ssd.conf b/profiles/storage/ceph/vdbench-ssd.conf
index 592c656..9c8a209 100644
--- a/profiles/storage/ceph/vdbench-ssd.conf
+++ b/profiles/storage/ceph/vdbench-ssd.conf
@@ -12,7 +12,7 @@
 # ceph vdbench ssd A-Tune configuration
 #
 [main]
-include = storage-ceph-vdbench-hdd
+include = include-ceph-vdbench
 
 [kernel_config]
 #TODO CONFIG
-- 
GitLab