!1249 Add GNN dataset processing API

Merge pull request !1249 from heleiwang/hlw_gnn_data

!1249 Add GNN dataset processing API
Merge pull request !1249 from heleiwang/hlw_gnn_data
3363d4e8 · mindspore-ci-bot · Gitee · 49d81260 · 599a449e · 3363d4e8
42 changed file
--- a/example/graph_to_mindrecord/README.md
+++ b/example/graph_to_mindrecord/README.md
+# Guideline to Efficiently Generating MindRecord
+
+<!-- TOC -->
+
+- [What does the example do](#what-does-the-example-do)
+- [Example test for Cora](#example-test-for-cora)
+- [How to use the example for other dataset](#how-to-use-the-example-for-other-dataset)
+    - [Create work space](#create-work-space)
+    - [Implement data generator](#implement-data-generator)
+    - [Run data generator](#run-data-generator)
+
+
+<!-- /TOC -->
+
+## What does the example do
+
+This example provides an efficient way to generate MindRecord. Users only need to define the parallel granularity of training data reading and the data reading function of a single task. That is, they can efficiently convert the user's training data into MindRecord.
+
+1.  write_cora.sh: entry script, users need to modify parameters according to their own training data.
+2.  writer.py: main script, called by write_cora.sh, it mainly reads user training data in parallel and generates MindRecord.
+3.  cora/mr_api.py: uers define their own parallel granularity of training data reading and single task reading function through the cora.
+
+## Example test for Cora
+
+1. Download and prepare the Cora dataset as required.
+
+    > [Cora dataset download address](https://github.com/jzaldi/datasets/tree/master/cora)
+
+
+2. Edit write_cora.sh and modify the parameters
+    ```
+    --mindrecord_file: output MindRecord file.
+    --mindrecord_partitions: the partitions for MindRecord.
+    ```
+
+3. Run the bash script
+    ```bash  
+    bash write_cora.sh
+    ```  
+
+## How to use the example for other dataset
+
+### Create work space
+
+Assume the dataset name is 'xyz'
+* Create work space from cora
+    ```shell
+    cd ${your_mindspore_home}/example/graph_to_mindrecord
+    cp -r cora xyz
+    ```
+
+### Implement data generator
+
+Edit dictionary data generator.
+* Edit file 
+    ```shell
+    cd ${your_mindspore_home}/example/graph_to_mindrecord
+    vi xyz/mr_api.py
+    ```
+
+Two API, 'mindrecord_task_number' and 'mindrecord_dict_data', must be implemented.
+- 'mindrecord_task_number()' returns number of tasks. Return 1 if data row is generated serially. Return N if generator can be split into N parallel-run tasks.
+- 'mindrecord_dict_data(task_id)' yields dictionary data row by row. 'task_id' is 0..N-1, if N is return value of mindrecord_task_number()
+
+### Run data generator
+
+* run python script 
+    ```shell
+    cd ${your_mindspore_home}/example/graph_to_mindrecord
+    python writer.py --mindrecord_script xyz [...]
+    ```
+    > You can put this command in script **write_xyz.sh** for easy execution
+
--- a/example/graph_to_mindrecord/citeseer/__init__.py
+++ b/example/graph_to_mindrecord/citeseer/__init__.py
--- a/example/graph_to_mindrecord/citeseer/mr_api.py
+++ b/example/graph_to_mindrecord/citeseer/mr_api.py
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+User-defined API for MindRecord GNN writer.
+"""
+import csv
+import os
+
+import numpy as np
+import scipy.sparse as sp
+
+# parse args from command line parameter 'graph_api_args'
+#     args delimiter is ':'
+args = os.environ['graph_api_args'].split(':')
+CITESEER_CONTENT_FILE = args[0]
+CITESEER_CITES_FILE = args[1]
+CITESEER_MINDRECRD_LABEL_FILE = CITESEER_CONTENT_FILE + "_label_mindrecord"
+CITESEER_MINDRECRD_ID_MAP_FILE = CITESEER_CONTENT_FILE + "_id_mindrecord"
+
+node_id_map = {}
+
+# profile:  (num_features, feature_data_types, feature_shapes)
+node_profile = (2, ["float32", "int64"], [[-1], [-1]])
+edge_profile = (0, [], [])
+
+
+def _normalize_citeseer_features(features):
+    features = np.array(features)
+    row_sum = np.array(features.sum(1))
+    r_inv = np.power(row_sum * 1.0, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    features = r_mat_inv.dot(features)
+    return features
+
+
+def yield_nodes(task_id=0):
+    """
+    Generate node data
+
+    Yields:
+        data (dict): data row which is dict.
+    """
+    print("Node task is {}".format(task_id))
+    label_types = {}
+    label_size = 0
+    node_num = 0
+    with open(CITESEER_CONTENT_FILE) as content_file:
+        content_reader = csv.reader(content_file, delimiter='\t')
+        line_count = 0
+        for row in content_reader:
+            if not row[-1] in label_types:
+                label_types[row[-1]] = label_size
+                label_size += 1
+            if not row[0] in node_id_map:
+                node_id_map[row[0]] = node_num
+                node_num += 1
+            raw_features = [[int(x) for x in row[1:-1]]]
+            node = {'id': node_id_map[row[0]], 'type': 0, 'feature_1': _normalize_citeseer_features(raw_features),
+                    'feature_2': [label_types[row[-1]]]}
+            yield node
+            line_count += 1
+    print('Processed {} lines for nodes.'.format(line_count))
+    # print('label types {}.'.format(label_types))
+    with open(CITESEER_MINDRECRD_LABEL_FILE, 'w') as f:
+        for k in label_types:
+            print(k + ',' + str(label_types[k]), file=f)
+
+
+def yield_edges(task_id=0):
+    """
+    Generate edge data
+
+    Yields:
+        data (dict): data row which is dict.
+    """
+    print("Edge task is {}".format(task_id))
+    # print(map_string_int)
+    with open(CITESEER_CITES_FILE) as cites_file:
+        cites_reader = csv.reader(cites_file, delimiter='\t')
+        line_count = 0
+        for row in cites_reader:
+            if not row[0] in node_id_map:
+                print('Source node {} does not exist.'.format(row[0]))
+                continue
+            if not row[1] in node_id_map:
+                print('Destination node {} does not exist.'.format(row[1]))
+                continue
+            line_count += 1
+            edge = {'id': line_count,
+                    'src_id': node_id_map[row[0]], 'dst_id': node_id_map[row[1]], 'type': 0}
+            yield edge
+
+        with open(CITESEER_MINDRECRD_ID_MAP_FILE, 'w') as f:
+            for k in node_id_map:
+                print(k + ',' + str(node_id_map[k]), file=f)
+        print('Processed {} lines for edges.'.format(line_count))
--- a/example/graph_to_mindrecord/cora/__init__.py
+++ b/example/graph_to_mindrecord/cora/__init__.py
--- a/example/graph_to_mindrecord/cora/mr_api.py
+++ b/example/graph_to_mindrecord/cora/mr_api.py
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+User-defined API for MindRecord GNN writer.
+"""
+import csv
+import os
+
+import numpy as np
+import scipy.sparse as sp
+
+# parse args from command line parameter 'graph_api_args'
+#     args delimiter is ':'
+args = os.environ['graph_api_args'].split(':')
+CORA_CONTENT_FILE = args[0]
+CORA_CITES_FILE = args[1]
+CORA_MINDRECRD_LABEL_FILE = CORA_CONTENT_FILE + "_label_mindrecord"
+CORA_CONTENT_ID_MAP_FILE = CORA_CONTENT_FILE + "_id_mindrecord"
+
+node_id_map = {}
+
+# profile:  (num_features, feature_data_types, feature_shapes)
+node_profile = (2, ["float32", "int64"], [[-1], [-1]])
+edge_profile = (0, [], [])
+
+
+def _normalize_cora_features(features):
+    features = np.array(features)
+    row_sum = np.array(features.sum(1))
+    r_inv = np.power(row_sum * 1.0, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    features = r_mat_inv.dot(features)
+    return features
+
+
+def yield_nodes(task_id=0):
+    """
+    Generate node data
+
+    Yields:
+        data (dict): data row which is dict.
+    """
+    print("Node task is {}".format(task_id))
+    label_types = {}
+    label_size = 0
+    node_num = 0
+    with open(CORA_CONTENT_FILE) as content_file:
+        content_reader = csv.reader(content_file, delimiter=',')
+        line_count = 0
+        for row in content_reader:
+            if line_count == 0:
+                line_count += 1
+                continue
+            if not row[0] in node_id_map:
+                node_id_map[row[0]] = node_num
+                node_num += 1
+            if not row[-1] in label_types:
+                label_types[row[-1]] = label_size
+                label_size += 1
+            raw_features = [[int(x) for x in row[1:-1]]]
+            node = {'id': node_id_map[row[0]], 'type': 0, 'feature_1': _normalize_cora_features(raw_features),
+                    'feature_2': [label_types[row[-1]]]}
+            yield node
+            line_count += 1
+    print('Processed {} lines for nodes.'.format(line_count))
+    print('label types {}.'.format(label_types))
+    with open(CORA_MINDRECRD_LABEL_FILE, 'w') as f:
+        for k in label_types:
+            print(k + ',' + str(label_types[k]), file=f)
+
+
+def yield_edges(task_id=0):
+    """
+    Generate edge data
+
+    Yields:
+        data (dict): data row which is dict.
+    """
+    print("Edge task is {}".format(task_id))
+    with open(CORA_CITES_FILE) as cites_file:
+        cites_reader = csv.reader(cites_file, delimiter=',')
+        line_count = 0
+        for row in cites_reader:
+            if line_count == 0:
+                line_count += 1
+                continue
+            if not row[0] in node_id_map:
+                print('Source node {} does not exist.'.format(row[0]))
+                continue
+            if not row[1] in node_id_map:
+                print('Destination node {} does not exist.'.format(row[1]))
+                continue
+            edge = {'id': line_count,
+                    'src_id': node_id_map[row[0]], 'dst_id': node_id_map[row[1]], 'type': 0}
+            yield edge
+            line_count += 1
+        print('Processed {} lines for edges.'.format(line_count))
+    with open(CORA_CONTENT_ID_MAP_FILE, 'w') as f:
+        for k in node_id_map:
+            print(k + ',' + str(node_id_map[k]), file=f)
--- a/example/graph_to_mindrecord/read_citeseer.sh
+++ b/example/graph_to_mindrecord/read_citeseer.sh
+#!/bin/bash
+python reader.py --path "/tmp/citeseer/mindrecord/citeseer_mr"
--- a/example/graph_to_mindrecord/read_cora.sh
+++ b/example/graph_to_mindrecord/read_cora.sh
+#!/bin/bash
+python reader.py --path "/tmp/cora/mindrecord/cora_mr"
--- a/example/graph_to_mindrecord/reader.py
+++ b/example/graph_to_mindrecord/reader.py
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+######################## write mindrecord example ########################
+Write mindrecord by data dictionary:
+python writer.py --mindrecord_script /YourScriptPath ...
+"""
+import argparse
+
+import mindspore.dataset as ds
+
+parser = argparse.ArgumentParser(description='Mind record reader')
+parser.add_argument('--path', type=str, default="/tmp/cora/mindrecord/cora_mr",
+                    help='data file')
+args = parser.parse_args()
+
+data_set = ds.MindDataset(args.path)
+num_iter = 0
+for item in data_set.create_dict_iterator():
+    print(item)
+    num_iter += 1
+print("Total items # is {}".format(num_iter))
--- a/example/graph_to_mindrecord/write_citeseer.sh
+++ b/example/graph_to_mindrecord/write_citeseer.sh
+#!/bin/bash
+rm /tmp/citeseer/mindrecord/*
+
+python writer.py --mindrecord_script citeseer \
+--mindrecord_file "/tmp/citeseer/mindrecord/citeseer_mr" \
+--mindrecord_partitions 1 \
+--mindrecord_header_size_by_bit 18 \
+--mindrecord_page_size_by_bit 20 \
+--graph_api_args "/tmp/citeseer/dataset/citeseer.content:/tmp/citeseer/dataset/citeseer.cites"
--- a/example/graph_to_mindrecord/write_cora.sh
+++ b/example/graph_to_mindrecord/write_cora.sh
+#!/bin/bash
+rm /tmp/cora/mindrecord/*
+
+python writer.py --mindrecord_script cora \
+--mindrecord_file "/tmp/cora/mindrecord/cora_mr" \
+--mindrecord_partitions 1 \
+--mindrecord_header_size_by_bit 18 \
+--mindrecord_page_size_by_bit 20 \
+--graph_api_args "/tmp/cora/dataset/cora_content.csv:/tmp/cora/dataset/cora_cites.csv"
--- a/example/graph_to_mindrecord/writer.py
+++ b/example/graph_to_mindrecord/writer.py
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+######################## write mindrecord example ########################
+Write mindrecord by data dictionary:
+python writer.py --mindrecord_script /YourScriptPath ...
+"""
+import argparse
+import os
+import time
+from importlib import import_module
+from multiprocessing import Pool
+
+from mindspore.mindrecord import FileWriter
+from mindspore.mindrecord import GraphMapSchema
+
+
+def exec_task(task_id, parallel_writer=True):
+    """
+    Execute task with specified task id
+    """
+    print("exec task {}, parallel: {} ...".format(task_id, parallel_writer))
+    imagenet_iter = mindrecord_dict_data(task_id)
+    batch_size = 512
+    transform_count = 0
+    while True:
+        data_list = []
+        try:
+            for _ in range(batch_size):
+                data = imagenet_iter.__next__()
+                if 'dst_id' in data:
+                    data = graph_map_schema.transform_edge(data)
+                else:
+                    data = graph_map_schema.transform_node(data)
+                data_list.append(data)
+                transform_count += 1
+            writer.write_raw_data(data_list, parallel_writer=parallel_writer)
+            print("transformed {} record...".format(transform_count))
+        except StopIteration:
+            if data_list:
+                writer.write_raw_data(data_list, parallel_writer=parallel_writer)
+                print("transformed {} record...".format(transform_count))
+            break
+
+
+def read_args():
+    """
+    read args
+    """
+    parser = argparse.ArgumentParser(description='Mind record writer')
+    parser.add_argument('--mindrecord_script', type=str, default="template",
+                        help='path where script is saved')
+
+    parser.add_argument('--mindrecord_file', type=str, default="/tmp/mindrecord/xyz",
+                        help='written file name prefix')
+
+    parser.add_argument('--mindrecord_partitions', type=int, default=1,
+                        help='number of written files')
+
+    parser.add_argument('--mindrecord_header_size_by_bit', type=int, default=24,
+                        help='mindrecord file header size')
+
+    parser.add_argument('--mindrecord_page_size_by_bit', type=int, default=25,
+                        help='mindrecord file page size')
+
+    parser.add_argument('--mindrecord_workers', type=int, default=8,
+                        help='number of parallel workers')
+
+    parser.add_argument('--num_node_tasks', type=int, default=1,
+                        help='number of node tasks')
+
+    parser.add_argument('--num_edge_tasks', type=int, default=1,
+                        help='number of node tasks')
+
+    parser.add_argument('--graph_api_args', type=str, default="/tmp/nodes.csv:/tmp/edges.csv",
+                        help='nodes and edges data file, csv format with header.')
+
+    ret_args = parser.parse_args()
+
+    return ret_args
+
+
+def init_writer(mr_schema):
+    """
+    init writer
+    """
+    print("Init writer  ...")
+    mr_writer = FileWriter(args.mindrecord_file, args.mindrecord_partitions)
+
+    # set the header size
+    if args.mindrecord_header_size_by_bit != 24:
+        header_size = 1 << args.mindrecord_header_size_by_bit
+        mr_writer.set_header_size(header_size)
+
+    # set the page size
+    if args.mindrecord_page_size_by_bit != 25:
+        page_size = 1 << args.mindrecord_page_size_by_bit
+        mr_writer.set_page_size(page_size)
+
+    # create the schema
+    mr_writer.add_schema(mr_schema, "mindrecord_graph_schema")
+
+    # open file and set header
+    mr_writer.open_and_set_header()
+
+    return mr_writer
+
+
+def run_parallel_workers(num_tasks):
+    """
+    run parallel workers
+    """
+    # set number of workers
+    num_workers = args.mindrecord_workers
+
+    task_list = list(range(num_tasks))
+
+    if num_workers > num_tasks:
+        num_workers = num_tasks
+
+    if os.name == 'nt':
+        for window_task_id in task_list:
+            exec_task(window_task_id, False)
+    elif num_tasks > 1:
+        with Pool(num_workers) as p:
+            p.map(exec_task, task_list)
+    else:
+        exec_task(0, False)
+
+
+if __name__ == "__main__":
+    args = read_args()
+    print(args)
+
+    start_time = time.time()
+
+    # pass mr_api arguments
+    os.environ['graph_api_args'] = args.graph_api_args
+
+    # import mr_api
+    try:
+        mr_api = import_module(args.mindrecord_script + '.mr_api')
+    except ModuleNotFoundError:
+        raise RuntimeError("Unknown module path: {}".format(args.mindrecord_script + '.mr_api'))
+
+    # init graph schema
+    graph_map_schema = GraphMapSchema()
+
+    num_features, feature_data_types, feature_shapes = mr_api.node_profile
+    graph_map_schema.set_node_feature_profile(num_features, feature_data_types, feature_shapes)
+
+    num_features, feature_data_types, feature_shapes = mr_api.edge_profile
+    graph_map_schema.set_edge_feature_profile(num_features, feature_data_types, feature_shapes)
+
+    graph_schema = graph_map_schema.get_schema()
+
+    # init writer
+    writer = init_writer(graph_schema)
+
+    # write nodes data
+    mindrecord_dict_data = mr_api.yield_nodes
+    run_parallel_workers(args.num_node_tasks)
+
+    # write edges data
+    mindrecord_dict_data = mr_api.yield_edges
+    run_parallel_workers(args.num_edge_tasks)
+
+    # writer wrap up
+    ret = writer.commit()
+
+    end_time = time.time()
+    print("--------------------------------------------")
+    print("END. Total time: {}".format(end_time - start_time))
+    print("--------------------------------------------")
--- a/mindspore/ccsrc/dataset/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/CMakeLists.txt
@@ -66,6 +66,7 @@ set(submodules
    $<TARGET_OBJECTS:APItoPython>
    $<TARGET_OBJECTS:engine-datasetops-source>
    $<TARGET_OBJECTS:engine-datasetops-source-sampler>
+    $<TARGET_OBJECTS:engine-gnn>
    $<TARGET_OBJECTS:engine-datasetops>
    $<TARGET_OBJECTS:engine-opt>
    $<TARGET_OBJECTS:engine>

--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@@ -61,6 +61,7 @@
 #include "dataset/engine/jagged_connector.h"
 #include "dataset/engine/datasetops/source/text_file_op.h"
 #include "dataset/engine/datasetops/source/voc_op.h"
+#include "dataset/engine/gnn/graph.h"
 #include "dataset/kernels/data/to_float16_op.h"
 #include "dataset/util/random.h"
 #include "mindrecord/include/shard_operator.h"
@@ -513,6 +514,33 @@ void bindVocabObjects(py::module *m) {
    });
 }

+void bindGraphData(py::module *m) {
+  (void)py::class_<gnn::Graph, std::shared_ptr<gnn::Graph>>(*m, "Graph")
+    .def(py::init([](std::string dataset_file, int32_t num_workers) {
+      std::shared_ptr<gnn::Graph> g_out = std::make_shared<gnn::Graph>(dataset_file, num_workers);
+      THROW_IF_ERROR(g_out->Init());
+      return g_out;
+    }))
+    .def("get_nodes",
+         [](gnn::Graph &g, gnn::NodeType node_type, gnn::NodeIdType node_num) {
+           std::shared_ptr<Tensor> out;
+           THROW_IF_ERROR(g.GetNodes(node_type, node_num, &out));
+           return out;
+         })
+    .def("get_all_neighbors",
+         [](gnn::Graph &g, std::vector<gnn::NodeIdType> node_list, gnn::NodeType neighbor_type) {
+           std::shared_ptr<Tensor> out;
+           THROW_IF_ERROR(g.GetAllNeighbors(node_list, neighbor_type, &out));
+           return out;
+         })
+    .def("get_node_feature",
+         [](gnn::Graph &g, std::shared_ptr<Tensor> node_list, std::vector<gnn::FeatureType> feature_types) {
+           TensorRow out;
+           THROW_IF_ERROR(g.GetNodeFeature(node_list, feature_types, &out));
+           return out;
+         });
+}
+
 // This is where we externalize the C logic as python modules
 PYBIND11_MODULE(_c_dataengine, m) {
  m.doc() = "pybind11 for _c_dataengine";
@@ -578,6 +606,7 @@ PYBIND11_MODULE(_c_dataengine, m) {
  bindDatasetOps(&m);
  bindInfoObjects(&m);
  bindVocabObjects(&m);
+  bindGraphData(&m);
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/CMakeLists.txt
 add_subdirectory(datasetops)
 add_subdirectory(opt)
+add_subdirectory(gnn)
 if (ENABLE_TDTQUE)
  add_subdirectory(tdt)
 endif ()
@@ -15,7 +16,7 @@ add_library(engine OBJECT
 target_include_directories(engine PRIVATE ${pybind11_INCLUDE_DIRS})

 if (ENABLE_TDTQUE)
-  add_dependencies(engine engine-datasetops engine-datasetops-source engine-tdt engine-opt)
+  add_dependencies(engine engine-datasetops engine-datasetops-source engine-tdt engine-opt engine-gnn)
 else()
-  add_dependencies(engine engine-datasetops engine-datasetops-source engine-opt)
+  add_dependencies(engine engine-datasetops engine-datasetops-source engine-opt engine-gnn)
 endif ()
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
@@ -112,10 +112,10 @@ Status MindRecordOp::Init() {

  data_schema_ = std::make_unique<DataSchema>();

-  std::vector<std::string> col_names = shard_reader_->get_shard_column()->GetColumnName();
+  std::vector<std::string> col_names = shard_reader_->GetShardColumn()->GetColumnName();
  CHECK_FAIL_RETURN_UNEXPECTED(!col_names.empty(), "No schema found");
-  std::vector<mindrecord::ColumnDataType> col_data_types = shard_reader_->get_shard_column()->GeColumnDataType();
-  std::vector<std::vector<int64_t>> col_shapes = shard_reader_->get_shard_column()->GetColumnShape();
+  std::vector<mindrecord::ColumnDataType> col_data_types = shard_reader_->GetShardColumn()->GeColumnDataType();
+  std::vector<std::vector<int64_t>> col_shapes = shard_reader_->GetShardColumn()->GetColumnShape();

  bool load_all_cols = columns_to_load_.empty();  // if columns_to_load_ is empty it means load everything
  std::map<std::string, int32_t> colname_to_ind;
@@ -296,8 +296,7 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
    std::vector<int64_t> column_shape;

    // Get column data
-
-    auto has_column = shard_reader_->get_shard_column()->GetColumnValueByName(
+    auto has_column = shard_reader_->GetShardColumn()->GetColumnValueByName(
      column_name, columns_blob, columns_json, &data, &data_ptr, &n_bytes, &column_data_type, &column_data_type_size,
      &column_shape);
    if (has_column == MSRStatus::FAILED) {

--- a/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt
+add_library(engine-gnn OBJECT
+    graph.cc
+    graph_loader.cc
+    local_node.cc
+    local_edge.cc
+    feature.cc
+    )
--- a/mindspore/ccsrc/dataset/engine/gnn/edge.h
+++ b/mindspore/ccsrc/dataset/engine/gnn/edge.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_GNN_EDGE_H_
+#define DATASET_ENGINE_GNN_EDGE_H_
+
+#include <memory>
+#include <unordered_map>
+#include <utility>
+
+#include "dataset/util/status.h"
+#include "dataset/engine/gnn/feature.h"
+#include "dataset/engine/gnn/node.h"
+
+namespace mindspore {
+namespace dataset {
+namespace gnn {
+using EdgeType = int8_t;
+using EdgeIdType = int32_t;
+
+class Edge {
+ public:
+  // Constructor
+  // @param EdgeIdType id - edge id
+  // @param EdgeType type - edge type
+  // @param std::shared_ptr<Node> src_node - source node
+  // @param std::shared_ptr<Node> dst_node - destination node
+  Edge(EdgeIdType id, EdgeType type, std::shared_ptr<Node> src_node, std::shared_ptr<Node> dst_node)
+      : id_(id), type_(type), src_node_(src_node), dst_node_(dst_node) {}
+
+  virtual ~Edge() = default;
+
+  // @return NodeIdType - Returned edge id
+  EdgeIdType id() const { return id_; }
+
+  // @return NodeIdType - Returned edge type
+  EdgeType type() const { return type_; }
+
+  // Get the feature of a edge
+  // @param FeatureType feature_type - type of feature
+  // @param std::shared_ptr<Feature> *out_feature - Returned feature
+  // @return Status - The error code return
+  virtual Status GetFeatures(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) = 0;
+
+  // Get nodes on the edge
+  // @param std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>> *out_node - Source and destination nodes returned
+  Status GetNode(std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>> *out_node) {
+    *out_node = std::make_pair(src_node_, dst_node_);
+    return Status::OK();
+  }
+
+  // Set node to edge
+  // @param const std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>> &in_node -
+  Status SetNode(const std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>> &in_node) {
+    src_node_ = in_node.first;
+    dst_node_ = in_node.second;
+    return Status::OK();
+  }
+
+  // Update feature of edge
+  // @param std::shared_ptr<Feature> feature -
+  // @return Status - The error code return
+  virtual Status UpdateFeature(const std::shared_ptr<Feature> &feature) = 0;
+
+ protected:
+  EdgeIdType id_;
+  EdgeType type_;
+  std::shared_ptr<Node> src_node_;
+  std::shared_ptr<Node> dst_node_;
+};
+}  // namespace gnn
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_GNN_EDGE_H_
--- a/mindspore/ccsrc/dataset/engine/gnn/feature.cc
+++ b/mindspore/ccsrc/dataset/engine/gnn/feature.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "dataset/engine/gnn/feature.h"
+
+namespace mindspore {
+namespace dataset {
+namespace gnn {
+
+Feature::Feature(FeatureType type_name, std::shared_ptr<Tensor> value) : type_name_(type_name), value_(value) {}
+
+}  // namespace gnn
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/gnn/feature.h
+++ b/mindspore/ccsrc/dataset/engine/gnn/feature.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_GNN_FEATURE_H_
+#define DATASET_ENGINE_GNN_FEATURE_H_
+
+#include <memory>
+
+#include "dataset/core/tensor.h"
+#include "dataset/util/status.h"
+
+namespace mindspore {
+namespace dataset {
+namespace gnn {
+using FeatureType = int16_t;
+
+class Feature {
+ public:
+  // Constructor
+  // @param FeatureType type_name - feature type
+  // @param std::shared_ptr<Tensor> value - feature value
+  Feature(FeatureType type_name, std::shared_ptr<Tensor> value);
+
+  // Get feature value
+  // @return std::shared_ptr<Tensor> *out_value - feature value
+  const std::shared_ptr<Tensor> Value() const { return value_; }
+
+  // @return NodeIdType - Returned feature type
+  FeatureType type() const { return type_name_; }
+
+ private:
+  FeatureType type_name_;
+  std::shared_ptr<Tensor> value_;
+};
+}  // namespace gnn
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_GNN_FEATURE_H_
--- a/mindspore/ccsrc/dataset/engine/gnn/graph.cc
+++ b/mindspore/ccsrc/dataset/engine/gnn/graph.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "dataset/engine/gnn/graph.h"
+
+#include <algorithm>
+#include <functional>
+#include <numeric>
+#include <utility>
+
+#include "dataset/core/tensor_shape.h"
+
+namespace mindspore {
+namespace dataset {
+namespace gnn {
+
+Graph::Graph(std::string dataset_file, int32_t num_workers) : dataset_file_(dataset_file), num_workers_(num_workers) {
+  MS_LOG(INFO) << "num_workers:" << num_workers;
+}
+
+Status Graph::GetNodes(NodeType node_type, NodeIdType node_num, std::shared_ptr<Tensor> *out) {
+  auto itr = node_type_map_.find(node_type);
+  if (itr == node_type_map_.end()) {
+    std::string err_msg = "Invalid node type:" + std::to_string(node_type);
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  } else {
+    if (node_num == -1) {
+      RETURN_IF_NOT_OK(CreateTensorByVector<NodeIdType>({itr->second}, DataType(DataType::DE_INT32), out));
+    } else {
+    }
+  }
+  return Status::OK();
+}
+
+template <typename T>
+Status Graph::CreateTensorByVector(const std::vector<std::vector<T>> &data, DataType type,
+                                   std::shared_ptr<Tensor> *out) {
+  if (!type.IsCompatible<T>()) {
+    RETURN_STATUS_UNEXPECTED("Data type not compatible");
+  }
+  if (data.empty()) {
+    RETURN_STATUS_UNEXPECTED("Input data is emply");
+  }
+  std::shared_ptr<Tensor> tensor;
+  size_t m = data.size();
+  size_t n = data[0].size();
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(
+    &tensor, TensorImpl::kFlexible, TensorShape({static_cast<dsize_t>(m), static_cast<dsize_t>(n)}), type, nullptr));
+  T *ptr = reinterpret_cast<T *>(tensor->GetMutableBuffer());
+  for (auto id_m : data) {
+    CHECK_FAIL_RETURN_UNEXPECTED(id_m.size() == n, "Each member of the vector has a different size");
+    for (auto id_n : id_m) {
+      *ptr = id_n;
+      ptr++;
+    }
+  }
+  tensor->Squeeze();
+  *out = std::move(tensor);
+  return Status::OK();
+}
+
+template <typename T>
+Status Graph::ComplementVector(std::vector<std::vector<T>> *data, size_t max_size, T default_value) {
+  if (!data || data->empty()) {
+    RETURN_STATUS_UNEXPECTED("Input data is emply");
+  }
+  for (std::vector<T> &vec : *data) {
+    size_t size = vec.size();
+    if (size > max_size) {
+      RETURN_STATUS_UNEXPECTED("The max_size parameter is abnormal");
+    } else {
+      for (size_t i = 0; i < (max_size - size); ++i) {
+        vec.push_back(default_value);
+      }
+    }
+  }
+  return Status::OK();
+}
+
+Status Graph::GetEdges(EdgeType edge_type, EdgeIdType edge_num, std::shared_ptr<Tensor> *out) { return Status::OK(); }
+
+Status Graph::GetAllNeighbors(const std::vector<NodeIdType> &node_list, NodeType neighbor_type,
+                              std::shared_ptr<Tensor> *out) {
+  if (node_type_map_.find(neighbor_type) == node_type_map_.end()) {
+    std::string err_msg = "Invalid neighbor type:" + std::to_string(neighbor_type);
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+
+  std::vector<std::vector<NodeIdType>> neighbors;
+  size_t max_neighbor_num = 0;
+  neighbors.resize(node_list.size());
+  for (size_t i = 0; i < node_list.size(); ++i) {
+    auto itr = node_id_map_.find(node_list[i]);
+    if (itr != node_id_map_.end()) {
+      RETURN_IF_NOT_OK(itr->second->GetNeighbors(neighbor_type, -1, &neighbors[i]));
+      max_neighbor_num = max_neighbor_num > neighbors[i].size() ? max_neighbor_num : neighbors[i].size();
+    } else {
+      std::string err_msg = "Invalid node id:" + std::to_string(node_list[i]);
+      RETURN_STATUS_UNEXPECTED(err_msg);
+    }
+  }
+
+  RETURN_IF_NOT_OK(ComplementVector<NodeIdType>(&neighbors, max_neighbor_num, kDefaultNodeId));
+  RETURN_IF_NOT_OK(CreateTensorByVector<NodeIdType>(neighbors, DataType(DataType::DE_INT32), out));
+
+  return Status::OK();
+}
+
+Status Graph::GetSampledNeighbor(const std::vector<NodeIdType> &node_list, const std::vector<NodeIdType> &neighbor_nums,
+                                 const std::vector<NodeType> &neighbor_types, std::shared_ptr<Tensor> *out) {
+  return Status::OK();
+}
+
+Status Graph::GetNegSampledNeighbor(const std::vector<NodeIdType> &node_list, NodeIdType samples_num,
+                                    NodeType neg_neighbor_type, std::shared_ptr<Tensor> *out) {
+  return Status::OK();
+}
+
+Status Graph::RandomWalk(const std::vector<NodeIdType> &node_list, const std::vector<NodeType> &meta_path, float p,
+                         float q, NodeIdType default_node, std::shared_ptr<Tensor> *out) {
+  return Status::OK();
+}
+
+Status Graph::GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
+  auto itr = default_feature_map_.find(feature_type);
+  if (itr == default_feature_map_.end()) {
+    std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  } else {
+    *out_feature = itr->second;
+  }
+  return Status::OK();
+}
+
+Status Graph::GetNodeFeature(const std::shared_ptr<Tensor> &nodes, const std::vector<FeatureType> &feature_types,
+                             TensorRow *out) {
+  if (!nodes || nodes->Size() == 0) {
+    RETURN_STATUS_UNEXPECTED("Inpude nodes is empty");
+  }
+  TensorRow tensors;
+  for (auto f_type : feature_types) {
+    std::shared_ptr<Feature> default_feature;
+    // If no feature can be obtained, fill in the default value
+    RETURN_IF_NOT_OK(GetNodeDefaultFeature(f_type, &default_feature));
+
+    TensorShape shape(default_feature->Value()->shape());
+    auto shape_vec = nodes->shape().AsVector();
+    dsize_t size = std::accumulate(shape_vec.begin(), shape_vec.end(), 1, std::multiplies<dsize_t>());
+    shape = shape.PrependDim(size);
+    std::shared_ptr<Tensor> fea_tensor;
+    RETURN_IF_NOT_OK(
+      Tensor::CreateTensor(&fea_tensor, TensorImpl::kFlexible, shape, default_feature->Value()->type(), nullptr));
+
+    dsize_t index = 0;
+    for (auto node_itr = nodes->begin<NodeIdType>(); node_itr != nodes->end<NodeIdType>(); ++node_itr) {
+      auto itr = node_id_map_.find(*node_itr);
+      std::shared_ptr<Feature> feature;
+      if (itr != node_id_map_.end()) {
+        if (!itr->second->GetFeatures(f_type, &feature).IsOk()) {
+          feature = default_feature;
+        }
+      } else {
+        if (*node_itr == kDefaultNodeId) {
+          feature = default_feature;
+        } else {
+          std::string err_msg = "Invalid node id:" + std::to_string(*node_itr);
+          RETURN_STATUS_UNEXPECTED(err_msg);
+        }
+      }
+      RETURN_IF_NOT_OK(fea_tensor->InsertTensor({index}, feature->Value()));
+      index++;
+    }
+
+    TensorShape reshape(nodes->shape());
+    for (auto s : default_feature->Value()->shape().AsVector()) {
+      reshape = reshape.AppendDim(s);
+    }
+    RETURN_IF_NOT_OK(fea_tensor->Reshape(reshape));
+    fea_tensor->Squeeze();
+    tensors.push_back(fea_tensor);
+  }
+  *out = std::move(tensors);
+  return Status::OK();
+}
+
+Status Graph::GetEdgeFeature(const std::shared_ptr<Tensor> &edges, const std::vector<FeatureType> &feature_types,
+                             TensorRow *out) {
+  return Status::OK();
+}
+
+Status Graph::Init() {
+  RETURN_IF_NOT_OK(LoadNodeAndEdge());
+  return Status::OK();
+}
+
+Status Graph::GetMetaInfo(std::vector<NodeMetaInfo> *node_info, std::vector<EdgeMetaInfo> *edge_info) {
+  node_info->reserve(node_type_map_.size());
+  for (auto node : node_type_map_) {
+    NodeMetaInfo n_info;
+    n_info.type = node.first;
+    n_info.num = node.second.size();
+    auto itr = node_feature_map_.find(node.first);
+    if (itr != node_feature_map_.end()) {
+      for (auto f_type : itr->second) {
+        n_info.feature_type.push_back(f_type);
+      }
+      std::sort(n_info.feature_type.begin(), n_info.feature_type.end());
+    }
+    node_info->push_back(n_info);
+  }
+
+  edge_info->reserve(edge_type_map_.size());
+  for (auto edge : edge_type_map_) {
+    EdgeMetaInfo e_info;
+    e_info.type = edge.first;
+    e_info.num = edge.second.size();
+    auto itr = edge_feature_map_.find(edge.first);
+    if (itr != edge_feature_map_.end()) {
+      for (auto f_type : itr->second) {
+        e_info.feature_type.push_back(f_type);
+      }
+    }
+    edge_info->push_back(e_info);
+  }
+  return Status::OK();
+}
+
+Status Graph::LoadNodeAndEdge() {
+  GraphLoader gl(dataset_file_, num_workers_);
+  // ask graph_loader to load everything into memory
+  RETURN_IF_NOT_OK(gl.InitAndLoad());
+  // get all maps
+  RETURN_IF_NOT_OK(gl.GetNodesAndEdges(&node_id_map_, &edge_id_map_, &node_type_map_, &edge_type_map_,
+                                       &node_feature_map_, &edge_feature_map_, &default_feature_map_));
+  return Status::OK();
+}
+}  // namespace gnn
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/gnn/graph.h
+++ b/mindspore/ccsrc/dataset/engine/gnn/graph.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_GNN_GRAPH_H_
+#define DATASET_ENGINE_GNN_GRAPH_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "dataset/core/tensor.h"
+#include "dataset/engine/gnn/graph_loader.h"
+#include "dataset/engine/gnn/feature.h"
+#include "dataset/engine/gnn/node.h"
+#include "dataset/engine/gnn/edge.h"
+#include "dataset/util/status.h"
+
+namespace mindspore {
+namespace dataset {
+namespace gnn {
+
+struct NodeMetaInfo {
+  NodeType type;
+  NodeIdType num;
+  std::vector<FeatureType> feature_type;
+  NodeMetaInfo() {
+    type = 0;
+    num = 0;
+  }
+};
+
+struct EdgeMetaInfo {
+  EdgeType type;
+  EdgeIdType num;
+  std::vector<FeatureType> feature_type;
+  EdgeMetaInfo() {
+    type = 0;
+    num = 0;
+  }
+};
+
+class Graph {
+ public:
+  // Constructor
+  // @param std::string dataset_file -
+  // @param int32_t num_workers - number of parallel threads
+  Graph(std::string dataset_file, int32_t num_workers);
+
+  ~Graph() = default;
+
+  // Get the nodes from the graph.
+  // @param NodeType node_type - type of node
+  // @param NodeIdType node_num - Number of nodes to be acquired, if -1 means all nodes are acquired
+  // @param std::shared_ptr<Tensor> *out - Returned nodes id
+  // @return Status - The error code return
+  Status GetNodes(NodeType node_type, NodeIdType node_num, std::shared_ptr<Tensor> *out);
+
+  // Get the edges from the graph.
+  // @param NodeType edge_type - type of edge
+  // @param NodeIdType edge_num - Number of edges to be acquired, if -1 means all edges are acquired
+  // @param std::shared_ptr<Tensor> *out - Returned edge ids
+  // @return Status - The error code return
+  Status GetEdges(EdgeType edge_type, EdgeIdType edge_num, std::shared_ptr<Tensor> *out);
+
+  // All neighbors of the acquisition node.
+  // @param std::vector<NodeType> node_list - List of nodes
+  // @param NodeType neighbor_type - The type of neighbor. If the type does not exist, an error will be reported
+  // @param std::shared_ptr<Tensor> *out - Returned neighbor's id. Because the number of neighbors at different nodes is
+  // different, the returned tensor is output according to the maximum number of neighbors. If the number of neighbors
+  // is not enough, fill in tensor as -1.
+  // @return Status - The error code return
+  Status GetAllNeighbors(const std::vector<NodeIdType> &node_list, NodeType neighbor_type,
+                         std::shared_ptr<Tensor> *out);
+
+  Status GetSampledNeighbor(const std::vector<NodeIdType> &node_list, const std::vector<NodeIdType> &neighbor_nums,
+                            const std::vector<NodeType> &neighbor_types, std::shared_ptr<Tensor> *out);
+  Status GetNegSampledNeighbor(const std::vector<NodeIdType> &node_list, NodeIdType samples_num,
+                               NodeType neg_neighbor_type, std::shared_ptr<Tensor> *out);
+  Status RandomWalk(const std::vector<NodeIdType> &node_list, const std::vector<NodeType> &meta_path, float p, float q,
+                    NodeIdType default_node, std::shared_ptr<Tensor> *out);
+
+  // Get the feature of a node
+  // @param std::shared_ptr<Tensor> nodes - List of nodes
+  // @param std::vector<FeatureType> feature_types - Types of features, An error will be reported if the feature type
+  // does not exist.
+  // @param TensorRow *out - Returned features
+  // @return Status - The error code return
+  Status GetNodeFeature(const std::shared_ptr<Tensor> &nodes, const std::vector<FeatureType> &feature_types,
+                        TensorRow *out);
+
+  // Get the feature of a edge
+  // @param std::shared_ptr<Tensor> edget - List of edges
+  // @param std::vector<FeatureType> feature_types - Types of features, An error will be reported if the feature type
+  // does not exist.
+  // @param Tensor *out - Returned features
+  // @return Status - The error code return
+  Status GetEdgeFeature(const std::shared_ptr<Tensor> &edget, const std::vector<FeatureType> &feature_types,
+                        TensorRow *out);
+
+  // Get meta information of graph
+  // @param std::vector<NodeMetaInfo> *node_info - Returned meta information of node
+  // @param std::vector<NodeMetaInfo> *node_info - Returned meta information of edge
+  // @return Status - The error code return
+  Status GetMetaInfo(std::vector<NodeMetaInfo> *node_info, std::vector<EdgeMetaInfo> *edge_info);
+
+  Status Init();
+
+ private:
+  // Load graph data from mindrecord file
+  // @return Status - The error code return
+  Status LoadNodeAndEdge();
+
+  // Create Tensor By Vector
+  // @param std::vector<std::vector<T>> &data -
+  // @param DataType type -
+  // @param std::shared_ptr<Tensor> *out -
+  // @return Status - The error code return
+  template <typename T>
+  Status CreateTensorByVector(const std::vector<std::vector<T>> &data, DataType type, std::shared_ptr<Tensor> *out);
+
+  // Complete vector
+  // @param std::vector<std::vector<T>> *data - To be completed vector
+  // @param size_t max_size - The size of the completed vector
+  // @param T default_value - Filled default
+  // @return Status - The error code return
+  template <typename T>
+  Status ComplementVector(std::vector<std::vector<T>> *data, size_t max_size, T default_value);
+
+  // Get the default feature of a node
+  // @param FeatureType feature_type -
+  // @param std::shared_ptr<Feature> *out_feature - Returned feature
+  // @return Status - The error code return
+  Status GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature);
+
+  std::string dataset_file_;
+  int32_t num_workers_;  // The number of worker threads
+
+  std::unordered_map<NodeType, std::vector<NodeIdType>> node_type_map_;
+  std::unordered_map<NodeIdType, std::shared_ptr<Node>> node_id_map_;
+
+  std::unordered_map<EdgeType, std::vector<EdgeIdType>> edge_type_map_;
+  std::unordered_map<EdgeIdType, std::shared_ptr<Edge>> edge_id_map_;
+
+  std::unordered_map<NodeType, std::unordered_set<FeatureType>> node_feature_map_;
+  std::unordered_map<NodeType, std::unordered_set<FeatureType>> edge_feature_map_;
+
+  std::unordered_map<FeatureType, std::shared_ptr<Feature>> default_feature_map_;
+};
+}  // namespace gnn
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_GNN_GRAPH_H_
--- a/mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc
+++ b/mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <future>
+#include <tuple>
+#include <utility>
+
+#include "dataset/engine/gnn/graph_loader.h"
+#include "mindspore/ccsrc/mindrecord/include/shard_error.h"
+#include "dataset/engine/gnn/local_edge.h"
+#include "dataset/engine/gnn/local_node.h"
+
+using ShardTuple = std::vector<std::tuple<std::vector<uint8_t>, mindspore::mindrecord::json>>;
+
+namespace mindspore {
+namespace dataset {
+namespace gnn {
+
+using mindrecord::MSRStatus;
+
+GraphLoader::GraphLoader(std::string mr_filepath, int32_t num_workers)
+    : mr_path_(mr_filepath),
+      num_workers_(num_workers),
+      row_id_(0),
+      keys_({"first_id", "second_id", "third_id", "attribute", "type", "node_feature_index", "edge_feature_index"}) {}
+
+Status GraphLoader::GetNodesAndEdges(NodeIdMap *n_id_map, EdgeIdMap *e_id_map, NodeTypeMap *n_type_map,
+                                     EdgeTypeMap *e_type_map, NodeFeatureMap *n_feature_map,
+                                     EdgeFeatureMap *e_feature_map, DefaultFeatureMap *default_feature_map) {
+  for (std::deque<std::shared_ptr<Node>> &dq : n_deques_) {
+    while (dq.empty() == false) {
+      std::shared_ptr<Node> node_ptr = dq.front();
+      n_id_map->insert({node_ptr->id(), node_ptr});
+      (*n_type_map)[node_ptr->type()].push_back(node_ptr->id());
+      dq.pop_front();
+    }
+  }
+
+  for (std::deque<std::shared_ptr<Edge>> &dq : e_deques_) {
+    while (dq.empty() == false) {
+      std::shared_ptr<Edge> edge_ptr = dq.front();
+      std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>> p;
+      RETURN_IF_NOT_OK(edge_ptr->GetNode(&p));
+      auto src_itr = n_id_map->find(p.first->id()), dst_itr = n_id_map->find(p.second->id());
+      CHECK_FAIL_RETURN_UNEXPECTED(src_itr != n_id_map->end(), "invalid src_id:" + std::to_string(src_itr->first));
+      CHECK_FAIL_RETURN_UNEXPECTED(dst_itr != n_id_map->end(), "invalid src_id:" + std::to_string(dst_itr->first));
+      RETURN_IF_NOT_OK(edge_ptr->SetNode({src_itr->second, dst_itr->second}));
+      RETURN_IF_NOT_OK(src_itr->second->AddNeighbor(dst_itr->second));
+      e_id_map->insert({edge_ptr->id(), edge_ptr});  // add edge to edge_id_map_
+      (*e_type_map)[edge_ptr->type()].push_back(edge_ptr->id());
+      dq.pop_front();
+    }
+  }
+
+  for (auto &itr : *n_type_map) itr.second.shrink_to_fit();
+  for (auto &itr : *e_type_map) itr.second.shrink_to_fit();
+
+  MergeFeatureMaps(n_feature_map, e_feature_map, default_feature_map);
+  return Status::OK();
+}
+
+Status GraphLoader::InitAndLoad() {
+  CHECK_FAIL_RETURN_UNEXPECTED(num_workers_ > 0, "num_reader can't be < 1\n");
+  CHECK_FAIL_RETURN_UNEXPECTED(row_id_ == 0, "InitAndLoad Can only be called once!\n");
+  n_deques_.resize(num_workers_);
+  e_deques_.resize(num_workers_);
+  n_feature_maps_.resize(num_workers_);
+  e_feature_maps_.resize(num_workers_);
+  default_feature_maps_.resize(num_workers_);
+  std::vector<std::future<Status>> r_codes(num_workers_);
+
+  shard_reader_ = std::make_unique<ShardReader>();
+  CHECK_FAIL_RETURN_UNEXPECTED(shard_reader_->Open({mr_path_}, true, num_workers_) == MSRStatus::SUCCESS,
+                               "Fail to open" + mr_path_);
+  CHECK_FAIL_RETURN_UNEXPECTED(shard_reader_->GetShardHeader()->GetSchemaCount() > 0, "No schema found!");
+  CHECK_FAIL_RETURN_UNEXPECTED(shard_reader_->Launch(true) == MSRStatus::SUCCESS, "fail to launch mr");
+
+  mindrecord::json schema = (shard_reader_->GetShardHeader()->GetSchemas()[0]->GetSchema())["schema"];
+  for (const std::string &key : keys_) {
+    if (schema.find(key) == schema.end()) {
+      RETURN_STATUS_UNEXPECTED(key + ":doesn't exist in schema:" + schema.dump());
+    }
+  }
+
+  // launching worker threads
+  for (int wkr_id = 0; wkr_id < num_workers_; ++wkr_id) {
+    r_codes[wkr_id] = std::async(std::launch::async, &GraphLoader::WorkerEntry, this, wkr_id);
+  }
+  // wait for threads to finish and check its return code
+  for (int wkr_id = 0; wkr_id < num_workers_; ++wkr_id) {
+    RETURN_IF_NOT_OK(r_codes[wkr_id].get());
+  }
+  return Status::OK();
+}
+
+Status GraphLoader::LoadNode(const std::vector<uint8_t> &col_blob, const mindrecord::json &col_jsn,
+                             std::shared_ptr<Node> *node, NodeFeatureMap *feature_map,
+                             DefaultFeatureMap *default_feature) {
+  NodeIdType node_id = col_jsn["first_id"];
+  NodeType node_type = static_cast<NodeType>(col_jsn["type"]);
+  (*node) = std::make_shared<LocalNode>(node_id, node_type);
+  std::vector<int32_t> indices;
+  RETURN_IF_NOT_OK(LoadFeatureIndex("node_feature_index", col_blob, col_jsn, &indices));
+
+  for (int32_t ind : indices) {
+    std::shared_ptr<Tensor> tensor;
+    RETURN_IF_NOT_OK(LoadFeatureTensor("node_feature_" + std::to_string(ind), col_blob, col_jsn, &tensor));
+    RETURN_IF_NOT_OK((*node)->UpdateFeature(std::make_shared<Feature>(ind, tensor)));
+    (*feature_map)[node_type].insert(ind);
+    if ((*default_feature)[ind] == nullptr) {
+      std::shared_ptr<Tensor> zero_tensor;
+      RETURN_IF_NOT_OK(Tensor::CreateTensor(&zero_tensor, TensorImpl::kFlexible, tensor->shape(), tensor->type()));
+      RETURN_IF_NOT_OK(zero_tensor->Zero());
+      (*default_feature)[ind] = std::make_shared<Feature>(ind, zero_tensor);
+    }
+  }
+  return Status::OK();
+}
+
+Status GraphLoader::LoadEdge(const std::vector<uint8_t> &col_blob, const mindrecord::json &col_jsn,
+                             std::shared_ptr<Edge> *edge, EdgeFeatureMap *feature_map,
+                             DefaultFeatureMap *default_feature) {
+  EdgeIdType edge_id = col_jsn["first_id"];
+  EdgeType edge_type = static_cast<EdgeType>(col_jsn["type"]);
+  NodeIdType src_id = col_jsn["second_id"], dst_id = col_jsn["third_id"];
+  std::shared_ptr<Node> src = std::make_shared<LocalNode>(src_id, -1);
+  std::shared_ptr<Node> dst = std::make_shared<LocalNode>(dst_id, -1);
+  (*edge) = std::make_shared<LocalEdge>(edge_id, edge_type, src, dst);
+  std::vector<int32_t> indices;
+  RETURN_IF_NOT_OK(LoadFeatureIndex("edge_feature_index", col_blob, col_jsn, &indices));
+  for (int32_t ind : indices) {
+    std::shared_ptr<Tensor> tensor;
+    RETURN_IF_NOT_OK(LoadFeatureTensor("edge_feature_" + std::to_string(ind), col_blob, col_jsn, &tensor));
+    RETURN_IF_NOT_OK((*edge)->UpdateFeature(std::make_shared<Feature>(ind, tensor)));
+    (*feature_map)[edge_type].insert(ind);
+    if ((*default_feature)[ind] == nullptr) {
+      std::shared_ptr<Tensor> zero_tensor;
+      RETURN_IF_NOT_OK(Tensor::CreateTensor(&zero_tensor, TensorImpl::kFlexible, tensor->shape(), tensor->type()));
+      RETURN_IF_NOT_OK(zero_tensor->Zero());
+      (*default_feature)[ind] = std::make_shared<Feature>(ind, zero_tensor);
+    }
+  }
+  return Status::OK();
+}
+
+Status GraphLoader::LoadFeatureTensor(const std::string &key, const std::vector<uint8_t> &col_blob,
+                                      const mindrecord::json &col_jsn, std::shared_ptr<Tensor> *tensor) {
+  const unsigned char *data = nullptr;
+  std::unique_ptr<unsigned char[]> data_ptr;
+  uint64_t n_bytes = 0, col_type_size = 1;
+  mindrecord::ColumnDataType col_type = mindrecord::ColumnNoDataType;
+  std::vector<int64_t> column_shape;
+  MSRStatus rs = shard_reader_->GetShardColumn()->GetColumnValueByName(
+    key, col_blob, col_jsn, &data, &data_ptr, &n_bytes, &col_type, &col_type_size, &column_shape);
+  CHECK_FAIL_RETURN_UNEXPECTED(rs == mindrecord::SUCCESS, "fail to load column" + key);
+  if (data == nullptr) data = reinterpret_cast<const unsigned char *>(&data_ptr[0]);
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, TensorImpl::kFlexible,
+                                        std::move(TensorShape({static_cast<dsize_t>(n_bytes / col_type_size)})),
+                                        std::move(DataType(mindrecord::ColumnDataTypeNameNormalized[col_type])), data));
+  return Status::OK();
+}
+
+Status GraphLoader::LoadFeatureIndex(const std::string &key, const std::vector<uint8_t> &col_blob,
+                                     const mindrecord::json &col_jsn, std::vector<int32_t> *indices) {
+  const unsigned char *data = nullptr;
+  std::unique_ptr<unsigned char[]> data_ptr;
+  uint64_t n_bytes = 0, col_type_size = 1;
+  mindrecord::ColumnDataType col_type = mindrecord::ColumnNoDataType;
+  std::vector<int64_t> column_shape;
+  MSRStatus rs = shard_reader_->GetShardColumn()->GetColumnValueByName(
+    key, col_blob, col_jsn, &data, &data_ptr, &n_bytes, &col_type, &col_type_size, &column_shape);
+  CHECK_FAIL_RETURN_UNEXPECTED(rs == mindrecord::SUCCESS, "fail to load column:" + key);
+
+  if (data == nullptr) data = reinterpret_cast<const unsigned char *>(&data_ptr[0]);
+
+  for (int i = 0; i < n_bytes; i += col_type_size) {
+    int32_t feature_ind = -1;
+    if (col_type == mindrecord::ColumnInt32) {
+      feature_ind = *(reinterpret_cast<const int32_t *>(data + i));
+    } else if (col_type == mindrecord::ColumnInt64) {
+      feature_ind = *(reinterpret_cast<const int64_t *>(data + i));
+    } else {
+      RETURN_STATUS_UNEXPECTED("Feature Index needs to be int32/int64 type!");
+    }
+    if (feature_ind >= 0) indices->push_back(feature_ind);
+  }
+  return Status::OK();
+}
+
+Status GraphLoader::WorkerEntry(int32_t worker_id) {
+  ShardTuple rows = shard_reader_->GetNextById(row_id_++, worker_id);
+  while (rows.empty() == false) {
+    for (const auto &tupled_row : rows) {
+      std::vector<uint8_t> col_blob = std::get<0>(tupled_row);
+      mindrecord::json col_jsn = std::get<1>(tupled_row);
+      std::string attr = col_jsn["attribute"];
+      if (attr == "n") {
+        std::shared_ptr<Node> node_ptr;
+        RETURN_IF_NOT_OK(
+          LoadNode(col_blob, col_jsn, &node_ptr, &(n_feature_maps_[worker_id]), &default_feature_maps_[worker_id]));
+        n_deques_[worker_id].emplace_back(node_ptr);
+      } else if (attr == "e") {
+        std::shared_ptr<Edge> edge_ptr;
+        RETURN_IF_NOT_OK(
+          LoadEdge(col_blob, col_jsn, &edge_ptr, &(e_feature_maps_[worker_id]), &default_feature_maps_[worker_id]));
+        e_deques_[worker_id].emplace_back(edge_ptr);
+      } else {
+        MS_LOG(WARNING) << "attribute:" << attr << " is neither edge nor node.";
+      }
+    }
+    rows = shard_reader_->GetNextById(row_id_++, worker_id);
+  }
+  return Status::OK();
+}
+
+void GraphLoader::MergeFeatureMaps(NodeFeatureMap *n_feature_map, EdgeFeatureMap *e_feature_map,
+                                   DefaultFeatureMap *default_feature_map) {
+  for (int wkr_id = 0; wkr_id < num_workers_; wkr_id++) {
+    for (auto &m : n_feature_maps_[wkr_id]) {
+      for (auto &n : m.second) (*n_feature_map)[m.first].insert(n);
+    }
+    for (auto &m : e_feature_maps_[wkr_id]) {
+      for (auto &n : m.second) (*e_feature_map)[m.first].insert(n);
+    }
+    for (auto &m : default_feature_maps_[wkr_id]) {
+      (*default_feature_map)[m.first] = m.second;
+    }
+  }
+  n_feature_maps_.clear();
+  e_feature_maps_.clear();
+}
+
+}  // namespace gnn
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/gnn/graph_loader.h
+++ b/mindspore/ccsrc/dataset/engine/gnn/graph_loader.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_GNN_GRAPH_LOADER_H_
+#define DATASET_ENGINE_GNN_GRAPH_LOADER_H_
+
+#include <deque>
+#include <memory>
+#include <queue>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "dataset/core/data_type.h"
+#include "dataset/core/tensor.h"
+#include "dataset/engine/gnn/feature.h"
+#include "dataset/engine/gnn/graph.h"
+#include "dataset/engine/gnn/node.h"
+#include "dataset/engine/gnn/edge.h"
+#include "dataset/util/status.h"
+#include "mindrecord/include/shard_reader.h"
+namespace mindspore {
+namespace dataset {
+namespace gnn {
+
+using mindrecord::ShardReader;
+using NodeIdMap = std::unordered_map<NodeIdType, std::shared_ptr<Node>>;
+using EdgeIdMap = std::unordered_map<EdgeIdType, std::shared_ptr<Edge>>;
+using NodeTypeMap = std::unordered_map<NodeType, std::vector<NodeIdType>>;
+using EdgeTypeMap = std::unordered_map<EdgeType, std::vector<EdgeIdType>>;
+using NodeFeatureMap = std::unordered_map<NodeType, std::unordered_set<FeatureType>>;
+using EdgeFeatureMap = std::unordered_map<EdgeType, std::unordered_set<FeatureType>>;
+using DefaultFeatureMap = std::unordered_map<FeatureType, std::shared_ptr<Feature>>;
+
+// this class interfaces with the underlying storage format (mindrecord)
+// it returns raw nodes and edges via GetNodesAndEdges
+// it is then the responsibility of graph to construct itself based on the nodes and edges
+// if needed, this class could become a base where each derived class handles a specific storage format
+class GraphLoader {
+ public:
+  explicit GraphLoader(std::string mr_filepath, int32_t num_workers = 4);
+
+  // Init mindrecord and load everything into memory multi-threaded
+  // @return Status - the status code
+  Status InitAndLoad();
+
+  // this function will query mindrecord and construct all nodes and edges
+  // nodes and edges are added to map without any connection. That's because there nodes and edges are read in
+  // random order. src_node and dst_node in Edge are node_id only with -1 as type.
+  // features attached to each node and edge are expected to be filled correctly
+  Status GetNodesAndEdges(NodeIdMap *, EdgeIdMap *, NodeTypeMap *, EdgeTypeMap *, NodeFeatureMap *, EdgeFeatureMap *,
+                          DefaultFeatureMap *);
+
+ private:
+  //
+  // worker thread that reads mindrecord file
+  // @param int32_t worker_id - id of each worker
+  // @return Status - the status code
+  Status WorkerEntry(int32_t worker_id);
+
+  // Load a node based on 1 row of mindrecord, returns a shared_ptr<Node>
+  // @param std::vector<uint8_t> &blob - contains data in blob field in mindrecord
+  // @param mindrecord::json &jsn - contains raw data
+  // @param std::shared_ptr<Node> *node - return value
+  // @param NodeFeatureMap *feature_map -
+  // @param DefaultFeatureMap *default_feature -
+  // @return Status - the status code
+  Status LoadNode(const std::vector<uint8_t> &blob, const mindrecord::json &jsn, std::shared_ptr<Node> *node,
+                  NodeFeatureMap *feature_map, DefaultFeatureMap *default_feature);
+
+  // @param std::vector<uint8_t> &blob - contains data in blob field in mindrecord
+  // @param mindrecord::json &jsn - contains raw data
+  // @param std::shared_ptr<Edge> *edge - return value, the edge ptr, edge is not yet connected
+  // @param FeatureMap *feature_map
+  // @param DefaultFeatureMap *default_feature -
+  // @return Status - the status code
+  Status LoadEdge(const std::vector<uint8_t> &blob, const mindrecord::json &jsn, std::shared_ptr<Edge> *edge,
+                  EdgeFeatureMap *feature_map, DefaultFeatureMap *default_feature);
+
+  // @param std::string key - column name
+  // @param std::vector<uint8_t> &blob - contains data in blob field in mindrecord
+  // @param mindrecord::json &jsn - contains raw data
+  // @param std::vector<int32_t> *ind - return value, list of feature index in int32_t
+  // @return Status - the status code
+  Status LoadFeatureIndex(const std::string &key, const std::vector<uint8_t> &blob, const mindrecord::json &jsn,
+                          std::vector<int32_t> *ind);
+
+  // @param std::string &key - column name
+  // @param std::vector<uint8_t> &blob - contains data in blob field in mindrecord
+  // @param mindrecord::json &jsn - contains raw data
+  // @param std::shared_ptr<Tensor> *tensor - return value feature tensor
+  // @return Status - the status code
+  Status LoadFeatureTensor(const std::string &key, const std::vector<uint8_t> &blob, const mindrecord::json &jsn,
+                           std::shared_ptr<Tensor> *tensor);
+
+  // merge NodeFeatureMap and EdgeFeatureMap of each worker into 1
+  void MergeFeatureMaps(NodeFeatureMap *, EdgeFeatureMap *, DefaultFeatureMap *);
+
+  const int32_t num_workers_;
+  std::atomic_int row_id_;
+  std::string mr_path_;
+  std::unique_ptr<ShardReader> shard_reader_;
+  std::vector<std::deque<std::shared_ptr<Node>>> n_deques_;
+  std::vector<std::deque<std::shared_ptr<Edge>>> e_deques_;
+  std::vector<NodeFeatureMap> n_feature_maps_;
+  std::vector<EdgeFeatureMap> e_feature_maps_;
+  std::vector<DefaultFeatureMap> default_feature_maps_;
+  const std::vector<std::string> keys_;
+};
+}  // namespace gnn
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_GNN_GRAPH_LOADER_H_
--- a/mindspore/ccsrc/dataset/engine/gnn/local_edge.cc
+++ b/mindspore/ccsrc/dataset/engine/gnn/local_edge.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "dataset/engine/gnn/local_edge.h"
+
+#include <string>
+
+namespace mindspore {
+namespace dataset {
+namespace gnn {
+
+LocalEdge::LocalEdge(EdgeIdType id, EdgeType type, std::shared_ptr<Node> src_node, std::shared_ptr<Node> dst_node)
+    : Edge(id, type, src_node, dst_node) {}
+
+Status LocalEdge::GetFeatures(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
+  auto itr = features_.find(feature_type);
+  if (itr != features_.end()) {
+    *out_feature = itr->second;
+    return Status::OK();
+  } else {
+    std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+}
+
+Status LocalEdge::UpdateFeature(const std::shared_ptr<Feature> &feature) {
+  auto itr = features_.find(feature->type());
+  if (itr != features_.end()) {
+    RETURN_STATUS_UNEXPECTED("Feature already exists");
+  } else {
+    features_[feature->type()] = feature;
+    return Status::OK();
+  }
+}
+}  // namespace gnn
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/gnn/local_edge.h
+++ b/mindspore/ccsrc/dataset/engine/gnn/local_edge.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_GNN_LOCAL_EDGE_H_
+#define DATASET_ENGINE_GNN_LOCAL_EDGE_H_
+
+#include <memory>
+#include <unordered_map>
+#include <utility>
+
+#include "dataset/util/status.h"
+#include "dataset/engine/gnn/edge.h"
+#include "dataset/engine/gnn/feature.h"
+#include "dataset/engine/gnn/node.h"
+
+namespace mindspore {
+namespace dataset {
+namespace gnn {
+
+class LocalEdge : public Edge {
+ public:
+  // Constructor
+  // @param EdgeIdType id - edge id
+  // @param EdgeType type - edge type
+  // @param std::shared_ptr<Node> src_node - source node
+  // @param std::shared_ptr<Node> dst_node - destination node
+  LocalEdge(EdgeIdType id, EdgeType type, std::shared_ptr<Node> src_node, std::shared_ptr<Node> dst_node);
+
+  ~LocalEdge() = default;
+
+  // Get the feature of a edge
+  // @param FeatureType feature_type - type of feature
+  // @param std::shared_ptr<Feature> *out_feature - Returned feature
+  // @return Status - The error code return
+  Status GetFeatures(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) override;
+
+  // Update feature of edge
+  // @param std::shared_ptr<Feature> feature -
+  // @return Status - The error code return
+  Status UpdateFeature(const std::shared_ptr<Feature> &feature) override;
+
+ private:
+  std::unordered_map<FeatureType, std::shared_ptr<Feature>> features_;
+};
+}  // namespace gnn
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_GNN_LOCAL_EDGE_H_
--- a/mindspore/ccsrc/dataset/engine/gnn/local_node.cc
+++ b/mindspore/ccsrc/dataset/engine/gnn/local_node.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "dataset/engine/gnn/local_node.h"
+
+#include <algorithm>
+#include <string>
+#include <utility>
+
+#include "dataset/engine/gnn/edge.h"
+
+namespace mindspore {
+namespace dataset {
+namespace gnn {
+
+LocalNode::LocalNode(NodeIdType id, NodeType type) : Node(id, type) {}
+
+Status LocalNode::GetFeatures(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
+  auto itr = features_.find(feature_type);
+  if (itr != features_.end()) {
+    *out_feature = itr->second;
+    return Status::OK();
+  } else {
+    std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+}
+
+Status LocalNode::GetNeighbors(NodeType neighbor_type, int32_t samples_num, std::vector<NodeIdType> *out_neighbors) {
+  std::vector<NodeIdType> neighbors;
+  auto itr = neighbor_nodes_.find(neighbor_type);
+  if (itr != neighbor_nodes_.end()) {
+    if (samples_num == -1) {
+      // Return all neighbors
+      neighbors.resize(itr->second.size() + 1);
+      neighbors[0] = id_;
+      std::transform(itr->second.begin(), itr->second.end(), neighbors.begin() + 1,
+                     [](const std::shared_ptr<Node> node) { return node->id(); });
+    } else {
+    }
+  } else {
+    neighbors.push_back(id_);
+    MS_LOG(DEBUG) << "No neighbors. node_id:" << id_ << " neighbor_type:" << neighbor_type;
+  }
+  *out_neighbors = std::move(neighbors);
+  return Status::OK();
+}
+
+Status LocalNode::AddNeighbor(const std::shared_ptr<Node> &node) {
+  auto itr = neighbor_nodes_.find(node->type());
+  if (itr != neighbor_nodes_.end()) {
+    itr->second.push_back(node);
+  } else {
+    neighbor_nodes_[node->type()] = {node};
+  }
+  return Status::OK();
+}
+
+Status LocalNode::UpdateFeature(const std::shared_ptr<Feature> &feature) {
+  auto itr = features_.find(feature->type());
+  if (itr != features_.end()) {
+    RETURN_STATUS_UNEXPECTED("Feature already exists");
+  } else {
+    features_[feature->type()] = feature;
+    return Status::OK();
+  }
+}
+
+}  // namespace gnn
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/gnn/local_node.h
+++ b/mindspore/ccsrc/dataset/engine/gnn/local_node.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_GNN_LOCAL_NODE_H_
+#define DATASET_ENGINE_GNN_LOCAL_NODE_H_
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "dataset/util/status.h"
+#include "dataset/engine/gnn/node.h"
+#include "dataset/engine/gnn/feature.h"
+
+namespace mindspore {
+namespace dataset {
+namespace gnn {
+
+class LocalNode : public Node {
+ public:
+  // Constructor
+  // @param NodeIdType id - node id
+  // @param NodeType type - node type
+  LocalNode(NodeIdType id, NodeType type);
+
+  ~LocalNode() = default;
+
+  // Get the feature of a node
+  // @param FeatureType feature_type - type of feature
+  // @param std::shared_ptr<Feature> *out_feature - Returned feature
+  // @return Status - The error code return
+  Status GetFeatures(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) override;
+
+  // Get the neighbors of a node
+  // @param NodeType neighbor_type - type of neighbor
+  // @param int32_t samples_num - Number of neighbors to be acquired, if -1 means all neighbors are acquired
+  // @param std::vector<NodeIdType> *out_neighbors - Returned neighbors id
+  // @return Status - The error code return
+  Status GetNeighbors(NodeType neighbor_type, int32_t samples_num, std::vector<NodeIdType> *out_neighbors) override;
+
+  // Add neighbor of node
+  // @param std::shared_ptr<Node> node -
+  // @return Status - The error code return
+  Status AddNeighbor(const std::shared_ptr<Node> &node) override;
+
+  // Update feature of node
+  // @param std::shared_ptr<Feature> feature -
+  // @return Status - The error code return
+  Status UpdateFeature(const std::shared_ptr<Feature> &feature) override;
+
+ private:
+  std::unordered_map<FeatureType, std::shared_ptr<Feature>> features_;
+  std::unordered_map<NodeType, std::vector<std::shared_ptr<Node>>> neighbor_nodes_;
+};
+}  // namespace gnn
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_GNN_LOCAL_NODE_H_
--- a/mindspore/ccsrc/dataset/engine/gnn/node.h
+++ b/mindspore/ccsrc/dataset/engine/gnn/node.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_GNN_NODE_H_
+#define DATASET_ENGINE_GNN_NODE_H_
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "dataset/util/status.h"
+#include "dataset/engine/gnn/feature.h"
+
+namespace mindspore {
+namespace dataset {
+namespace gnn {
+using NodeType = int8_t;
+using NodeIdType = int32_t;
+
+constexpr NodeIdType kDefaultNodeId = -1;
+
+class Node {
+ public:
+  // Constructor
+  // @param NodeIdType id - node id
+  // @param NodeType type - node type
+  Node(NodeIdType id, NodeType type) : id_(id), type_(type) {}
+
+  virtual ~Node() = default;
+
+  // @return NodeIdType - Returned node id
+  NodeIdType id() const { return id_; }
+
+  // @return NodeIdType - Returned node type
+  NodeType type() const { return type_; }
+
+  // Get the feature of a node
+  // @param FeatureType feature_type - type of feature
+  // @param std::shared_ptr<Feature> *out_feature - Returned feature
+  // @return Status - The error code return
+  virtual Status GetFeatures(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) = 0;
+
+  // Get the neighbors of a node
+  // @param NodeType neighbor_type - type of neighbor
+  // @param int32_t samples_num - Number of neighbors to be acquired, if -1 means all neighbors are acquired
+  // @param std::vector<NodeIdType> *out_neighbors - Returned neighbors id
+  // @return Status - The error code return
+  virtual Status GetNeighbors(NodeType neighbor_type, int32_t samples_num, std::vector<NodeIdType> *out_neighbors) = 0;
+
+  // Add neighbor of node
+  // @param std::shared_ptr<Node> node -
+  // @return Status - The error code return
+  virtual Status AddNeighbor(const std::shared_ptr<Node> &node) = 0;
+
+  // Update feature of node
+  // @param std::shared_ptr<Feature> feature -
+  // @return Status - The error code return
+  virtual Status UpdateFeature(const std::shared_ptr<Feature> &feature) = 0;
+
+ protected:
+  NodeIdType id_;
+  NodeType type_;
+};
+}  // namespace gnn
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_GNN_NODE_H_
--- a/mindspore/ccsrc/mindrecord/include/shard_reader.h
+++ b/mindspore/ccsrc/mindrecord/include/shard_reader.h
@@ -114,7 +114,7 @@ class ShardReader {

  /// \brief aim to get columns context
  /// \return the columns
-  std::shared_ptr<ShardColumn> get_shard_column() const;
+  std::shared_ptr<ShardColumn> GetShardColumn() const;

  /// \brief get the number of shards
  /// \return # of shards

--- a/mindspore/ccsrc/mindrecord/io/shard_reader.cc
+++ b/mindspore/ccsrc/mindrecord/io/shard_reader.cc
@@ -232,7 +232,7 @@ void ShardReader::Close() {

 std::shared_ptr<ShardHeader> ShardReader::GetShardHeader() const { return shard_header_; }

-std::shared_ptr<ShardColumn> ShardReader::get_shard_column() const { return shard_column_; }
+std::shared_ptr<ShardColumn> ShardReader::GetShardColumn() const { return shard_column_; }

 int ShardReader::GetShardCount() const { return shard_header_->GetShardCount(); }


--- a/mindspore/dataset/__init__.py
+++ b/mindspore/dataset/__init__.py
@@ -25,9 +25,10 @@ from .engine.datasets import StorageDataset, TFRecordDataset, ImageFolderDataset
 from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, SequentialSampler, SubsetRandomSampler, \
    WeightedRandomSampler, Sampler
 from .engine.serializer_deserializer import serialize, deserialize, show
+from .engine.graphdata import GraphData

 __all__ = ["config", "ImageFolderDatasetV2", "MnistDataset", "StorageDataset",
           "MindDataset", "GeneratorDataset", "TFRecordDataset",
           "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset",
           "VOCDataset", "TextFileDataset", "Schema", "DistributedSampler", "PKSampler", "RandomSampler",
-           "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler", "zip"]
+           "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler", "zip", "GraphData"]
--- a/mindspore/dataset/engine/graphdata.py
+++ b/mindspore/dataset/engine/graphdata.py
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+graphdata.py supports loading graph dataset for GNN network training,
+and provides operations related to graph data.
+"""
+import numpy as np
+from mindspore._c_dataengine import Graph
+from mindspore._c_dataengine import Tensor
+
+from .validators import check_gnn_get_all_nodes, check_gnn_get_all_neighbors, check_gnn_get_node_feature
+
+
+class GraphData:
+    """
+    Reads th graph dataset used for GNN training from the shared file and database.
+
+    Args:
+        dataset_file (str): One of file names in dataset.
+        num_parallel_workers (int, optional): Number of workers to process the Dataset in parallel
+            (default=None).
+    """
+
+    def __init__(self, dataset_file, num_parallel_workers=None):
+        self._dataset_file = dataset_file
+        if num_parallel_workers is None:
+            num_parallel_workers = 1
+        self._graph = Graph(dataset_file, num_parallel_workers)
+
+    @check_gnn_get_all_nodes
+    def get_all_nodes(self, node_type):
+        """
+        Get all nodes in the graph.
+
+        Args:
+            node_type (int): Specify the tpye of node.
+
+        Returns:
+            numpy.ndarray: array of nodes.
+
+        Examples:
+            >>> import mindspore.dataset as ds
+            >>> data_graph = ds.GraphData('dataset_file', 2)
+            >>> nodes = data_graph.get_all_nodes(0)
+
+        Raises:
+            TypeError: If `node_type` is not integer.
+        """
+        return self._graph.get_nodes(node_type, -1).as_array()
+
+    @check_gnn_get_all_neighbors
+    def get_all_neighbors(self, node_list, neighbor_type):
+        """
+        Get `neighbor_type` neighbors of the nodes in `node_list`.
+
+        Args:
+            node_list (list or numpy.ndarray): The given list of nodes.
+            neighbor_type (int): Specify the tpye of neighbor.
+
+        Returns:
+            numpy.ndarray: array of nodes.
+
+        Examples:
+            >>> import mindspore.dataset as ds
+            >>> data_graph = ds.GraphData('dataset_file', 2)
+            >>> nodes = data_graph.get_all_nodes(0)
+            >>> neighbors = data_graph.get_all_neighbors(nodes[0], 0)
+
+        Raises:
+            TypeError: If `node_list` is not list or ndarray.
+            TypeError: If `neighbor_type` is not integer.
+        """
+        return self._graph.get_all_neighbors(node_list, neighbor_type).as_array()
+
+    @check_gnn_get_node_feature
+    def get_node_feature(self, node_list, feature_types):
+        """
+        Get `feature_types` feature of the nodes in `node_list`.
+
+        Args:
+            node_list (list or numpy.ndarray): The given list of nodes.
+            feature_types (list or ndarray): The given list of feature types.
+
+        Returns:
+            numpy.ndarray: array of features.
+
+        Examples:
+            >>> import mindspore.dataset as ds
+            >>> data_graph = ds.GraphData('dataset_file', 2)
+            >>> nodes = data_graph.get_all_nodes(0)
+            >>> features = data_graph.get_node_feature(nodes[0], [1])
+
+        Raises:
+            TypeError: If `node_list` is not list or ndarray.
+            TypeError: If `feature_types` is not list or ndarray.
+        """
+        if isinstance(node_list, list):
+            node_list = np.array(node_list, dtype=np.int32)
+        return [t.as_array() for t in self._graph.get_node_feature(Tensor(node_list), feature_types)]
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -1032,6 +1032,7 @@ def check_textfiledataset(method):

    return new_method

+
 def check_split(method):
    """check the input arguments of split."""

@@ -1072,3 +1073,85 @@ def check_split(method):
        return method(*args, **kwargs)

    return new_method
+
+
+def check_list_or_ndarray(param, param_name):
+    if (not isinstance(param, list)) and (not hasattr(param, 'tolist')):
+        raise TypeError("Wrong input type for {0}, should be list, got {1}".format(
+            param_name, type(param)))
+
+
+def check_gnn_get_all_nodes(method):
+    """A wrapper that wrap a parameter checker to the GNN `get_all_nodes` function."""
+    @wraps(method)
+    def new_method(*args, **kwargs):
+        param_dict = make_param_dict(method, args, kwargs)
+
+        # check node_type; required argument
+        check_type(param_dict.get("node_type"), 'node_type', int)
+
+        return method(*args, **kwargs)
+
+    return new_method
+
+
+def check_gnn_get_all_neighbors(method):
+    """A wrapper that wrap a parameter checker to the GNN `get_all_neighbors` function."""
+
+    @wraps(method)
+    def new_method(*args, **kwargs):
+        param_dict = make_param_dict(method, args, kwargs)
+
+        # check node_list; required argument
+        check_list_or_ndarray(param_dict.get("node_list"), 'node_list')
+
+        # check neighbor_type; required argument
+        check_type(param_dict.get("neighbor_type"), 'neighbor_type', int)
+
+        return method(*args, **kwargs)
+
+    return new_method
+
+
+def check_aligned_list(param, param_name):
+    """Check whether the structure of each member of the list is the same."""
+    if not isinstance(param, list):
+        raise TypeError("Parameter {0} is not a list".format(param_name))
+    membor_have_list = None
+    list_len = None
+    for membor in param:
+        if isinstance(membor, list):
+            check_aligned_list(membor, param_name)
+            if membor_have_list not in (None, True):
+                raise TypeError("The type of each member of the parameter {0} is inconsistent".format(
+                    param_name))
+            if list_len is not None and len(membor) != list_len:
+                raise TypeError("The size of each member of parameter {0} is inconsistent".format(
+                    param_name))
+            membor_have_list = True
+            list_len = len(membor)
+        else:
+            if membor_have_list not in (None, False):
+                raise TypeError("The type of each member of the parameter {0} is inconsistent".format(
+                    param_name))
+            membor_have_list = False
+
+
+def check_gnn_get_node_feature(method):
+    """A wrapper that wrap a parameter checker to the GNN `get_node_feature` function."""
+    @wraps(method)
+    def new_method(*args, **kwargs):
+        param_dict = make_param_dict(method, args, kwargs)
+
+        # check node_list; required argument
+        node_list = param_dict.get("node_list")
+        check_list_or_ndarray(node_list, 'node_list')
+        if isinstance(node_list, list):
+            check_aligned_list(node_list, 'node_list')
+
+        # check feature_types; required argument
+        check_list_or_ndarray(param_dict.get("feature_types"), 'feature_types')
+
+        return method(*args, **kwargs)
+
+    return new_method
--- a/mindspore/mindrecord/__init__.inter
+++ b/mindspore/mindrecord/__init__.inter
@@ -32,6 +32,7 @@ from .mindpage import MindPage
 from .shardutils import SUCCESS, FAILED
 from .tools.cifar10_to_mr import Cifar10ToMR
 from .tools.cifar100_to_mr import Cifar100ToMR
+from .tools.graph_map_schema import GraphMapSchema
 from .tools.imagenet_to_mr import ImageNetToMR
 from .tools.mnist_to_mr import MnistToMR


--- a/mindspore/mindrecord/__init__.py
+++ b/mindspore/mindrecord/__init__.py
@@ -29,9 +29,10 @@ from .common.exceptions import *
 from .shardutils import SUCCESS, FAILED
 from .tools.cifar10_to_mr import Cifar10ToMR
 from .tools.cifar100_to_mr import Cifar100ToMR
+from .tools.graph_map_schema import GraphMapSchema
 from .tools.imagenet_to_mr import ImageNetToMR
 from .tools.mnist_to_mr import MnistToMR

-__all__ = ['FileWriter', 'FileReader', 'MindPage',
+__all__ = ['FileWriter', 'FileReader', 'MindPage', 'GraphMapSchema',
           'Cifar10ToMR', 'Cifar100ToMR', 'ImageNetToMR', 'MnistToMR',
           'SUCCESS', 'FAILED']
--- a/mindspore/mindrecord/tools/graph_map_schema.py
+++ b/mindspore/mindrecord/tools/graph_map_schema.py
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Graph data convert tool for MindRecord.
+"""
+import numpy as np
+
+__all__ = ['GraphMapSchema']
+
+
+class GraphMapSchema:
+    """
+    Class is for transformation from graph data to MindRecord.
+    """
+
+    def __init__(self):
+        """
+        init
+        """
+        self.num_node_features = 0
+        self.num_edge_features = 0
+        self.union_schema_in_mindrecord = {
+            "first_id": {"type": "int64"},
+            "second_id": {"type": "int64"},
+            "third_id": {"type": "int64"},
+            "type": {"type": "int32"},
+            "attribute": {"type": "string"},  # 'n' for ndoe, 'e' for edge
+            "node_feature_index": {"type": "int32", "shape": [-1]},
+            "edge_feature_index": {"type": "int32", "shape": [-1]}
+        }
+
+    def get_schema(self):
+        """
+        Get schema
+        """
+        return self.union_schema_in_mindrecord
+
+    def set_node_feature_profile(self, num_features, features_data_type, features_shape):
+        """
+        Set node features profile
+        """
+        if num_features != len(features_data_type) or num_features != len(features_shape):
+            raise ValueError("Node feature profile is not match.")
+
+        self.num_node_features = num_features
+        for i in range(num_features):
+            k = i + 1
+            field_key = 'node_feature_' + str(k)
+            field_value = {"type": features_data_type[i], "shape": features_shape[i]}
+            self.union_schema_in_mindrecord[field_key] = field_value
+
+    def set_edge_feature_profile(self, num_features, features_data_type, features_shape):
+        """
+        Set edge features profile
+        """
+        if num_features != len(features_data_type) or num_features != len(features_shape):
+            raise ValueError("Edge feature profile is not match.")
+
+        self.num_edge_features = num_features
+        for i in range(num_features):
+            k = i + 1
+            field_key = 'edge_feature_' + str(k)
+            field_value = {"type": features_data_type[i], "shape": features_shape[i]}
+            self.union_schema_in_mindrecord[field_key] = field_value
+
+    def transform_node(self, node):
+        """
+        Executes transformation from node data to union format.
+        Args:
+            node(schema): node's data
+        Returns:
+            graph data with union schema
+        """
+        node_graph = {"first_id": node["id"], "second_id": 0, "third_id": 0, "attribute": 'n', "type": node["type"],
+                      "node_feature_index": []}
+        for i in range(self.num_node_features):
+            k = i + 1
+            node_field_key = 'feature_' + str(k)
+            graph_field_key = 'node_feature_' + str(k)
+            graph_field_type = self.union_schema_in_mindrecord[graph_field_key]["type"]
+            if node_field_key in node:
+                node_graph["node_feature_index"].append(k)
+                node_graph[graph_field_key] = np.reshape(np.array(node[node_field_key], dtype=graph_field_type), [-1])
+            else:
+                node_graph[graph_field_key] = np.reshape(np.array([0], dtype=graph_field_type), [-1])
+
+        if node_graph["node_feature_index"]:
+            node_graph["node_feature_index"] = np.array(node_graph["node_feature_index"], dtype="int32")
+        else:
+            node_graph["node_feature_index"] = np.array([-1], dtype="int32")
+
+        node_graph["edge_feature_index"] = np.array([-1], dtype="int32")
+        for i in range(self.num_edge_features):
+            k = i + 1
+            graph_field_key = 'edge_feature_' + str(k)
+            graph_field_type = self.union_schema_in_mindrecord[graph_field_key]["type"]
+            node_graph[graph_field_key] = np.reshape(np.array([0], dtype=graph_field_type), [-1])
+        return node_graph
+
+    def transform_edge(self, edge):
+        """
+        Executes transformation from edge data to union format.
+        Args:
+            edge(schema): edge's data
+        Returns:
+            graph data with union schema
+        """
+        edge_graph = {"first_id": edge["id"], "second_id": edge["src_id"], "third_id": edge["dst_id"], "attribute": 'e',
+                      "type": edge["type"], "edge_feature_index": []}
+
+        for i in range(self.num_edge_features):
+            k = i + 1
+            edge_field_key = 'feature_' + str(k)
+            graph_field_key = 'edge_feature_' + str(k)
+            graph_field_type = self.union_schema_in_mindrecord[graph_field_key]["type"]
+            if edge_field_key in edge:
+                edge_graph["edge_feature_index"].append(k)
+                edge_graph[graph_field_key] = np.reshape(np.array(edge[edge_field_key], dtype=graph_field_type), [-1])
+            else:
+                edge_graph[graph_field_key] = np.reshape(np.array([0], dtype=graph_field_type), [-1])
+
+        if edge_graph["edge_feature_index"]:
+            edge_graph["edge_feature_index"] = np.array(edge_graph["edge_feature_index"], dtype="int32")
+        else:
+            edge_graph["edge_feature_index"] = np.array([-1], dtype="int32")
+
+        edge_graph["node_feature_index"] = np.array([-1], dtype="int32")
+        for i in range(self.num_node_features):
+            k = i + 1
+            graph_field_key = 'node_feature_' + str(k)
+            graph_field_type = self.union_schema_in_mindrecord[graph_field_key]["type"]
+            edge_graph[graph_field_key] = np.array([0], dtype=graph_field_type)
+        return edge_graph
--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@@ -70,6 +70,7 @@ SET(DE_UT_SRCS
    concat_op_test.cc
    jieba_tokenizer_op_test.cc
    tokenizer_op_test.cc
+    gnn_graph_test.cc
    )

 add_executable(de_ut_tests ${DE_UT_SRCS})

--- a/tests/ut/cpp/dataset/gnn_graph_test.cc
+++ b/tests/ut/cpp/dataset/gnn_graph_test.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string>
+#include <memory>
+
+#include "common/common.h"
+#include "gtest/gtest.h"
+#include "dataset/util/status.h"
+#include "dataset/engine/gnn/node.h"
+#include "dataset/engine/gnn/graph_loader.h"
+
+using namespace mindspore::dataset;
+using namespace mindspore::dataset::gnn;
+
+class MindDataTestGNNGraph : public UT::Common {
+ protected:
+  MindDataTestGNNGraph() = default;
+};
+
+TEST_F(MindDataTestGNNGraph, TestGraphLoader) {
+  std::string path = "data/mindrecord/testGraphData/testdata";
+  GraphLoader gl(path, 4);
+  EXPECT_TRUE(gl.InitAndLoad().IsOk());
+  NodeIdMap n_id_map;
+  EdgeIdMap e_id_map;
+  NodeTypeMap n_type_map;
+  EdgeTypeMap e_type_map;
+  NodeFeatureMap n_feature_map;
+  EdgeFeatureMap e_feature_map;
+  DefaultFeatureMap default_feature_map;
+  EXPECT_TRUE(gl.GetNodesAndEdges(&n_id_map, &e_id_map, &n_type_map, &e_type_map, &n_feature_map, &e_feature_map,
+                                  &default_feature_map)
+                .IsOk());
+  EXPECT_EQ(n_id_map.size(), 20);
+  EXPECT_EQ(e_id_map.size(), 20);
+  EXPECT_EQ(n_type_map[2].size(), 10);
+  EXPECT_EQ(n_type_map[1].size(), 10);
+}
+
+TEST_F(MindDataTestGNNGraph, TestGetAllNeighbors) {
+  std::string path = "data/mindrecord/testGraphData/testdata";
+  Graph graph(path, 1);
+  Status s = graph.Init();
+  EXPECT_TRUE(s.IsOk());
+
+  std::vector<NodeMetaInfo> node_info;
+  std::vector<EdgeMetaInfo> edge_info;
+  s = graph.GetMetaInfo(&node_info, &edge_info);
+  EXPECT_TRUE(s.IsOk());
+  EXPECT_TRUE(node_info.size() == 2);
+
+  std::shared_ptr<Tensor> nodes;
+  s = graph.GetNodes(node_info[1].type, -1, &nodes);
+  EXPECT_TRUE(s.IsOk());
+  std::vector<NodeIdType> node_list;
+  for (auto itr = nodes->begin<NodeIdType>(); itr != nodes->end<NodeIdType>(); ++itr) {
+    node_list.push_back(*itr);
+    if (node_list.size() >= 10) {
+      break;
+    }
+  }
+  std::shared_ptr<Tensor> neighbors;
+  s = graph.GetAllNeighbors(node_list, node_info[0].type, &neighbors);
+  EXPECT_TRUE(s.IsOk());
+  EXPECT_TRUE(neighbors->shape().ToString() == "<10,6>");
+  TensorRow features;
+  s = graph.GetNodeFeature(nodes, node_info[1].feature_type, &features);
+  EXPECT_TRUE(s.IsOk());
+  EXPECT_TRUE(features.size() == 3);
+  EXPECT_TRUE(features[0]->shape().ToString() == "<10,5>");
+  EXPECT_TRUE(features[0]->ToString() ==
+              "Tensor (shape: <10,5>, Type: int32)\n"
+              "[[0,1,0,0,0],[1,0,0,0,1],[0,0,1,1,0],[0,0,0,0,0],[1,1,0,1,0],[0,0,0,0,1],[0,1,0,0,0],[0,0,0,1,1],[0,1,1,"
+              "0,0],[0,1,0,1,0]]");
+  EXPECT_TRUE(features[1]->shape().ToString() == "<10>");
+  EXPECT_TRUE(features[1]->ToString() ==
+              "Tensor (shape: <10>, Type: float32)\n[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]");
+  EXPECT_TRUE(features[2]->shape().ToString() == "<10>");
+  EXPECT_TRUE(features[2]->ToString() == "Tensor (shape: <10>, Type: int32)\n[1,2,3,1,4,3,5,3,5,4]");
+}
--- a/tests/ut/data/mindrecord/testGraphData/testdata
+++ b/tests/ut/data/mindrecord/testGraphData/testdata
--- a/tests/ut/data/mindrecord/testGraphData/testdata.db
+++ b/tests/ut/data/mindrecord/testGraphData/testdata.db
--- a/tests/ut/python/dataset/test_graphdata.py
+++ b/tests/ut/python/dataset/test_graphdata.py
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import pytest
+import mindspore.dataset as ds
+from mindspore import log as logger
+
+DATASET_FILE = "../data/mindrecord/testGraphData/testdata"
+
+
+def test_graphdata_getfullneighbor():
+    g = ds.GraphData(DATASET_FILE, 2)
+    nodes = g.get_all_nodes(1)
+    assert len(nodes) is 10
+    nodes_list = nodes.tolist()
+    neighbor = g.get_all_neighbors(nodes_list, 2)
+    assert neighbor.shape == (10, 6)
+    row_tensor = g.get_node_feature(neighbor.tolist(), [2, 3])
+    assert row_tensor[0].shape == (10, 6)
+
+
+def test_graphdata_getnodefeature_input_check():
+    g = ds.GraphData(DATASET_FILE)
+    with pytest.raises(TypeError):
+        input_list = [1, [1, 1]]
+        g.get_node_feature(input_list, [1])
+
+    with pytest.raises(TypeError):
+        input_list = [[1, 1], 1]
+        g.get_node_feature(input_list, [1])
+
+    with pytest.raises(TypeError):
+        input_list = [[1, 1], [1, 1, 1]]
+        g.get_node_feature(input_list, [1])
+
+    with pytest.raises(TypeError):
+        input_list = [[1, 1, 1], [1, 1]]
+        g.get_node_feature(input_list, [1])
+
+    with pytest.raises(TypeError):
+        input_list = [[1, 1], [1, [1, 1]]]
+        g.get_node_feature(input_list, [1])
+
+    with pytest.raises(TypeError):
+        input_list = [[1, 1], [[1, 1], 1]]
+        g.get_node_feature(input_list, [1])
+
+    with pytest.raises(TypeError):
+        input_list = [[1, 1], [1, 1]]
+        g.get_node_feature(input_list, 1)
+
+    with pytest.raises(TypeError):
+        input_list = [[1, 1], [1, 1]]
+        g.get_node_feature(input_list, ["a"])
+
+    with pytest.raises(TypeError):
+        input_list = [[1, 1], [1, 1]]
+        g.get_node_feature(input_list, [1, "a"])
+
+
+if __name__ == '__main__':
+    test_graphdata_getfullneighbor()
+    logger.info('test_graphdata_getfullneighbor Ended.\n')
+    test_graphdata_getnodefeature_input_check()
+    logger.info('test_graphdata_getnodefeature_input_check Ended.\n')
--- a/tests/ut/python/dataset/test_minddataset_sampler.py
+++ b/tests/ut/python/dataset/test_minddataset_sampler.py
@@ -89,6 +89,8 @@ def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file):
    num_iter = 0
    for item in data_set.create_dict_iterator():
        logger.info("-------------- cv reader basic: {} ------------------------".format(num_iter))
+        logger.info("-------------- item[data]: \
+                {}------------------------".format(item["data"][:10]))
        logger.info("-------------- item[file_name]: \
                {}------------------------".format("".join([chr(x) for x in item["file_name"]])))
        logger.info("-------------- item[label]: {} ----------------------------".format(item["label"]))