add comment for channel

1fe73723 · barrierye · e9efd633 · 1fe73723 · 1fe73723 · 1fe73723
Showing with 159 addition and 107 deletion

python/pipeline/channel.py python/pipeline/channel.py +144 -97

python/pipeline/operator.py python/pipeline/operator.py +0 -3

python/pipeline/pipeline_server.py python/pipeline/pipeline_server.py +15 -7

未找到文件。
--- a/python/pipeline/channel.py
+++ b/python/pipeline/channel.py
@@ -143,6 +143,17 @@ class ProcessChannel(multiprocessing.queues.Queue):
    1. The ID of the data in the channel must be different.
    2. The function add_producer() and add_consumer() are not thread safe,
       and can only be called during initialization.
+
+    There are two buffers and one queue in Channel:
+
+        op_A \                                           / op_D
+        op_B - a. input_buf -> b. queue -> c. output_buf - op_E
+        op_C /                                           \ op_F
+    
+    a. In input_buf, the input of multiple predecessor Ops is packed by data ID.
+    b. The packed data will be stored in queue.
+    c. In order to support multiple successor Ops to retrieve data, output_buf
+        maintains the data obtained from queue.
    """

    def __init__(self, manager, name=None, maxsize=0, timeout=None):
@@ -162,19 +173,19 @@ class ProcessChannel(multiprocessing.queues.Queue):
        self._cv = multiprocessing.Condition()

        self._producers = []
-        self._producer_res_count = manager.dict()  # {data_id: count}
-        self._push_res = manager.dict()  # {data_id: {op_name: data}}
+        self.pushed_producer_count = manager.dict()  # {data_id: count}
+        self._input_buf = manager.dict()  # {data_id: {op_name: data}}

-        self._consumers = manager.dict()  # {op_name: idx}
-        self._idx_consumer_num = manager.dict()  # {idx: num}
-        self._consumer_base_idx = manager.Value('i', 0)
-        self._front_res = manager.list()
+        self._consumer_cursors = manager.dict()  # {op_name: cursor}
+        self._cursor_count = manager.dict()  # {cursor: count}
+        self._base_cursor = manager.Value('i', 0)
+        self._output_buf = manager.list()

    def get_producers(self):
        return self._producers

    def get_consumers(self):
-        return self._consumers.keys()
+        return self._consumer_cursors.keys()

    def _log(self, info_str):
        return "[{}] {}".format(self.name, info_str)
@@ -192,14 +203,14 @@ class ProcessChannel(multiprocessing.queues.Queue):

    def add_consumer(self, op_name):
        """ not thread safe, and can only be called during initialization. """
-        if op_name in self._consumers:
+        if op_name in self._consumer_cursors:
            raise ValueError(
                self._log("consumer({}) is already in channel".format(op_name)))
-        self._consumers[op_name] = 0
+        self._consumer_cursors[op_name] = 0

-        if self._idx_consumer_num.get(0) is None:
-            self._idx_consumer_num[0] = 0
-        self._idx_consumer_num[0] += 1
+        if self._cursor_count.get(0) is None:
+            self._cursor_count[0] = 0
+        self._cursor_count[0] += 1

    def push(self, channeldata, op_name=None):
        _LOGGER.debug(
@@ -235,24 +246,24 @@ class ProcessChannel(multiprocessing.queues.Queue):
        put_data = None
        with self._cv:
            _LOGGER.debug(self._log("{} get lock".format(op_name)))
-            if data_id not in self._push_res:
-                self._push_res[data_id] = {
+            if data_id not in self._input_buf:
+                self._input_buf[data_id] = {
                    name: None
                    for name in self._producers
                }
-                self._producer_res_count[data_id] = 0
+                self.pushed_producer_count[data_id] = 0
            # see: https://docs.python.org/3.6/library/multiprocessing.html?highlight=multiprocess#proxy-objects
-            # self._push_res[data_id][op_name] = channeldata
-            tmp_push_res = self._push_res[data_id]
-            tmp_push_res[op_name] = channeldata
-            self._push_res[data_id] = tmp_push_res
-
-            if self._producer_res_count[data_id] + 1 == producer_num:
-                put_data = self._push_res[data_id]
-                self._push_res.pop(data_id)
-                self._producer_res_count.pop(data_id)
+            # self._input_buf[data_id][op_name] = channeldata
+            tmp_input_buf = self._input_buf[data_id]
+            tmp_input_buf[op_name] = channeldata
+            self._input_buf[data_id] = tmp_input_buf
+
+            if self.pushed_producer_count[data_id] + 1 == producer_num:
+                put_data = self._input_buf[data_id]
+                self._input_buf.pop(data_id)
+                self.pushed_producer_count.pop(data_id)
            else:
-                self._producer_res_count[data_id] += 1
+                self.pushed_producer_count[data_id] += 1

            if put_data is None:
                _LOGGER.debug(
@@ -276,12 +287,12 @@ class ProcessChannel(multiprocessing.queues.Queue):

    def front(self, op_name=None):
        _LOGGER.debug(self._log("{} try to get data...".format(op_name)))
-        if len(self._consumers) == 0:
+        if len(self._consumer_cursors) == 0:
            raise Exception(
                self._log(
                    "expected number of consumers to be greater than 0, but the it is 0."
                ))
-        elif len(self._consumers) == 1:
+        elif len(self._consumer_cursors) == 1:
            resp = None
            with self._cv:
                while self._stop is False and resp is None:
@@ -312,16 +323,26 @@ class ProcessChannel(multiprocessing.queues.Queue):
                self._log(
                    "There are multiple consumers, so op_name cannot be None."))

+        # In output_buf, different Ops (according to op_name) have different
+        # cursors. In addition, there is a base_cursor. Their difference is
+        # the data_idx to be taken by the corresponding Op at the current
+        # time:    data_idx = consumer_cursor - base_cursor
+        # 
+        #            base_cursor    consumer_B_cursor (data_idx: 3)
+        #                 |                       |
+        # output_buf: | data0 | data1 | data2 | data3 |
+        #                 |
+        #   consumer_A_cursor (data_idx: 0)
        with self._cv:
-            # data_idx = consumer_idx - base_idx
-            while self._stop is False and self._consumers[
-                    op_name] - self._consumer_base_idx.value >= len(
-                        self._front_res):
+            # When the data required by the current Op is not in output_buf,
+            # it is necessary to obtain a data from queue and add it to output_buf.
+            while self._stop is False and self._consumer_cursors[
+                    op_name] - self._base_cursor.value >= len(self._output_buf):
                _LOGGER.debug(
                    self._log(
-                        "({}) B self._consumers: {}, self._consumer_base_idx: {}, len(self._front_res): {}".
-                        format(op_name, self._consumers, self.
-                               _consumer_base_idx.value, len(self._front_res))))
+                        "({}) B self._consumer_cursors: {}, self._base_cursor: {}, len(self._output_buf): {}".
+                        format(op_name, self._consumer_cursors,
+                               self._base_cursor.value, len(self._output_buf))))
                try:
                    _LOGGER.debug(
                        self._log("{} try to get(with channel size: {})".format(
@@ -333,7 +354,7 @@ class ProcessChannel(multiprocessing.queues.Queue):
                    # - https://bugs.python.org/issue18277
                    # - https://hg.python.org/cpython/rev/860fc6a2bd21
                    channeldata = self.get(timeout=1e-3)
-                    self._front_res.append(channeldata)
+                    self._output_buf.append(channeldata)
                    break
                except Queue.Empty:
                    _LOGGER.debug(
@@ -342,29 +363,31 @@ class ProcessChannel(multiprocessing.queues.Queue):
                            format(op_name, self.qsize())))
                    self._cv.wait()

-            consumer_idx = self._consumers[op_name]
-            base_idx = self._consumer_base_idx.value
-            data_idx = consumer_idx - base_idx
-            resp = self._front_res[data_idx]
+            consumer_cursor = self._consumer_cursors[op_name]
+            base_cursor = self._base_cursor.value
+            data_idx = consumer_cursor - base_cursor
+            resp = self._output_buf[data_idx]
            _LOGGER.debug(self._log("{} get data: {}".format(op_name, resp)))

-            self._idx_consumer_num[consumer_idx] -= 1
-            if consumer_idx == base_idx and self._idx_consumer_num[
-                    consumer_idx] == 0:
-                self._idx_consumer_num.pop(consumer_idx)
-                self._front_res.pop(0)
-                self._consumer_base_idx.value += 1
-
-            self._consumers[op_name] += 1
-            new_consumer_idx = self._consumers[op_name]
-            if self._idx_consumer_num.get(new_consumer_idx) is None:
-                self._idx_consumer_num[new_consumer_idx] = 0
-            self._idx_consumer_num[new_consumer_idx] += 1
+            self._cursor_count[consumer_cursor] -= 1
+            if consumer_cursor == base_cursor and self._cursor_count[
+                    consumer_cursor] == 0:
+                # When all the different Ops get the data that data_idx points
+                # to, pop the data from output_buf.
+                self._cursor_count.pop(consumer_cursor)
+                self._output_buf.pop(0)
+                self._base_cursor.value += 1
+
+            self._consumer_cursors[op_name] += 1
+            new_consumer_cursor = self._consumer_cursors[op_name]
+            if self._cursor_count.get(new_consumer_cursor) is None:
+                self._cursor_count[new_consumer_cursor] = 0
+            self._cursor_count[new_consumer_cursor] += 1
            _LOGGER.debug(
                self._log(
-                    "({}) A self._consumers: {}, self._consumer_base_idx: {}, len(self._front_res): {}".
-                    format(op_name, self._consumers, self._consumer_base_idx.
-                           value, len(self._front_res))))
+                    "({}) A self._consumer_cursors: {}, self._base_cursor: {}, len(self._output_buf): {}".
+                    format(op_name, self._consumer_cursors,
+                           self._base_cursor.value, len(self._output_buf))))
            _LOGGER.debug(self._log("{} notify all".format(op_name)))
            self._cv.notify_all()

@@ -394,6 +417,17 @@ class ThreadChannel(Queue.Queue):
    1. The ID of the data in the channel must be different.
    2. The function add_producer() and add_consumer() are not thread safe,
       and can only be called during initialization.
+
+    There are two buffers and one queue in Channel:
+
+        op_A \                                           / op_D
+        op_B - a. input_buf -> b. queue -> c. output_buf - op_E
+        op_C /                                           \ op_F
+    
+    a. In input_buf, the input of multiple predecessor Ops is packed by data ID.
+    b. The packed data will be stored in queue.
+    c. In order to support multiple successor Ops to retrieve data, output_buf
+        maintains the data obtained from queue.
    """

    def __init__(self, name=None, maxsize=-1, timeout=None):
@@ -406,19 +440,19 @@ class ThreadChannel(Queue.Queue):
        self._cv = threading.Condition()

        self._producers = []
-        self._producer_res_count = {}  # {data_id: count}
-        self._push_res = {}  # {data_id: {op_name: data}}
+        self.pushed_producer_count = {}  # {data_id: count}
+        self._input_buf = {}  # {data_id: {op_name: data}}

-        self._consumers = {}  # {op_name: idx}
-        self._idx_consumer_num = {}  # {idx: num}
-        self._consumer_base_idx = 0
-        self._front_res = []
+        self._consumer_cursors = {}  # {op_name: idx}
+        self._cursor_count = {}  # {cursor: count}
+        self._base_cursor = 0
+        self._output_buf = []

    def get_producers(self):
        return self._producers

    def get_consumers(self):
-        return self._consumers.keys()
+        return self._consumer_cursors.keys()

    def _log(self, info_str):
        return "[{}] {}".format(self.name, info_str)
@@ -436,14 +470,14 @@ class ThreadChannel(Queue.Queue):

    def add_consumer(self, op_name):
        """ not thread safe, and can only be called during initialization. """
-        if op_name in self._consumers:
+        if op_name in self._consumer_cursors:
            raise ValueError(
                self._log("consumer({}) is already in channel".format(op_name)))
-        self._consumers[op_name] = 0
+        self._consumer_cursors[op_name] = 0

-        if self._idx_consumer_num.get(0) is None:
-            self._idx_consumer_num[0] = 0
-        self._idx_consumer_num[0] += 1
+        if self._cursor_count.get(0) is None:
+            self._cursor_count[0] = 0
+        self._cursor_count[0] += 1

    def push(self, channeldata, op_name=None):
        _LOGGER.debug(
@@ -475,19 +509,19 @@ class ThreadChannel(Queue.Queue):
        put_data = None
        with self._cv:
            _LOGGER.debug(self._log("{} get lock".format(op_name)))
-            if data_id not in self._push_res:
-                self._push_res[data_id] = {
+            if data_id not in self._input_buf:
+                self._input_buf[data_id] = {
                    name: None
                    for name in self._producers
                }
-                self._producer_res_count[data_id] = 0
-            self._push_res[data_id][op_name] = channeldata
-            if self._producer_res_count[data_id] + 1 == producer_num:
-                put_data = self._push_res[data_id]
-                self._push_res.pop(data_id)
-                self._producer_res_count.pop(data_id)
+                self.pushed_producer_count[data_id] = 0
+            self._input_buf[data_id][op_name] = channeldata
+            if self.pushed_producer_count[data_id] + 1 == producer_num:
+                put_data = self._input_buf[data_id]
+                self._input_buf.pop(data_id)
+                self.pushed_producer_count.pop(data_id)
            else:
-                self._producer_res_count[data_id] += 1
+                self.pushed_producer_count[data_id] += 1

            if put_data is None:
                _LOGGER.debug(
@@ -508,12 +542,12 @@ class ThreadChannel(Queue.Queue):

    def front(self, op_name=None):
        _LOGGER.debug(self._log("{} try to get data".format(op_name)))
-        if len(self._consumers) == 0:
+        if len(self._consumer_cursors) == 0:
            raise Exception(
                self._log(
                    "expected number of consumers to be greater than 0, but the it is 0."
                ))
-        elif len(self._consumers) == 1:
+        elif len(self._consumer_cursors) == 1:
            resp = None
            with self._cv:
                while self._stop is False and resp is None:
@@ -531,35 +565,48 @@ class ThreadChannel(Queue.Queue):
                self._log(
                    "There are multiple consumers, so op_name cannot be None."))

+        # In output_buf, different Ops (according to op_name) have different
+        # cursors. In addition, there is a base_cursor. Their difference is
+        # the data_idx to be taken by the corresponding Op at the current
+        # time:    data_idx = consumer_cursor - base_cursor
+        # 
+        #            base_cursor    consumer_B_cursor (data_idx: 3)
+        #                 |                       |
+        # output_buf: | data0 | data1 | data2 | data3 |
+        #                 |
+        #   consumer_A_cursor (data_idx: 0)
        with self._cv:
-            # data_idx = consumer_idx - base_idx
-            while self._stop is False and self._consumers[
-                    op_name] - self._consumer_base_idx >= len(self._front_res):
+            # When the data required by the current Op is not in output_buf,
+            # it is necessary to obtain a data from queue and add it to output_buf.
+            while self._stop is False and self._consumer_cursors[
+                    op_name] - self._base_cursor >= len(self._output_buf):
                try:
                    channeldata = self.get(timeout=0)
-                    self._front_res.append(channeldata)
+                    self._output_buf.append(channeldata)
                    break
                except Queue.Empty:
                    self._cv.wait()

-            consumer_idx = self._consumers[op_name]
-            base_idx = self._consumer_base_idx
-            data_idx = consumer_idx - base_idx
-            resp = self._front_res[data_idx]
+            consumer_cursor = self._consumer_cursors[op_name]
+            base_cursor = self._base_cursor
+            data_idx = consumer_cursor - base_cursor
+            resp = self._output_buf[data_idx]
            _LOGGER.debug(self._log("{} get data: {}".format(op_name, resp)))

-            self._idx_consumer_num[consumer_idx] -= 1
-            if consumer_idx == base_idx and self._idx_consumer_num[
-                    consumer_idx] == 0:
-                self._idx_consumer_num.pop(consumer_idx)
-                self._front_res.pop(0)
-                self._consumer_base_idx += 1
-
-            self._consumers[op_name] += 1
-            new_consumer_idx = self._consumers[op_name]
-            if self._idx_consumer_num.get(new_consumer_idx) is None:
-                self._idx_consumer_num[new_consumer_idx] = 0
-            self._idx_consumer_num[new_consumer_idx] += 1
+            self._cursor_count[consumer_cursor] -= 1
+            if consumer_cursor == base_cursor and self._cursor_count[
+                    consumer_cursor] == 0:
+                # When all the different Ops get the data that data_idx points
+                # to, pop the data from output_buf.
+                self._cursor_count.pop(consumer_cursor)
+                self._output_buf.pop(0)
+                self._base_cursor += 1
+
+            self._consumer_cursors[op_name] += 1
+            new_consumer_cursor = self._consumer_cursors[op_name]
+            if self._cursor_count.get(new_consumer_cursor) is None:
+                self._cursor_count[new_consumer_cursor] = 0
+            self._cursor_count[new_consumer_cursor] += 1

            self._cv.notify_all()


--- a/python/pipeline/operator.py
+++ b/python/pipeline/operator.py
@@ -146,9 +146,6 @@ class Op(object):
        return fetch_dict

    def stop(self):
-        self._input.stop()
-        for channel in self._outputs:
-            channel.stop()
        self._is_run = False

    def _parse_channeldata(self, channeldata_dict):

--- a/python/pipeline/pipeline_server.py
+++ b/python/pipeline/pipeline_server.py
@@ -66,6 +66,7 @@ class PipelineService(pipeline_service_pb2_grpc.PipelineServiceServicer):
        self._globel_resp_dict = {}
        self._id_counter = 0
        self._retry = retry
+        self._is_run = True
        self._pack_func = pack_func
        self._unpack_func = unpack_func
        self._recive_func = threading.Thread(
@@ -91,8 +92,11 @@ class PipelineService(pipeline_service_pb2_grpc.PipelineServiceServicer):
        out_channel.add_consumer(self.name)
        self._out_channel = out_channel

+    def stop(self):
+        self._is_run = False
+
    def _recive_out_channel_func(self):
-        while True:
+        while self._is_run:
            channeldata_dict = self._out_channel.front(self.name)
            if len(channeldata_dict) != 1:
                raise Exception("out_channel cannot have multiple input ops")
@@ -416,22 +420,26 @@ class PipelineServer(object):
                    op.start_with_process(self._client_type))
        return threads_or_proces

-    def _stop_ops(self):
+    def _stop_all(self, service):
+        service.stop()
        for op in self._actual_ops:
            op.stop()
+        for chl in self._channels:
+            chl.stop()

    def run_server(self):
        op_threads_or_proces = self._run_ops()
+        service = PipelineService(self._in_channel, self._out_channel,
+                                  self._unpack_func, self._pack_func,
+                                  self._retry)
        server = grpc.server(
            futures.ThreadPoolExecutor(max_workers=self._worker_num))
-        pipeline_service_pb2_grpc.add_PipelineServiceServicer_to_server(
-            PipelineService(self._in_channel, self._out_channel,
-                            self._unpack_func, self._pack_func, self._retry),
-            server)
+        pipeline_service_pb2_grpc.add_PipelineServiceServicer_to_server(service,
+                                                                        server)
        server.add_insecure_port('[::]:{}'.format(self._port))
        server.start()
        server.wait_for_termination()
-        self._stop_ops()  # TODO
+        self._stop_all()  # TODO
        for x in op_threads_or_proces:
            x.join()