#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import json
import paddle.fluid as fluid
import paddle
import numpy as np
import collections
import math
import sys as sys
import os
import struct

#常量控制
#抽样打印数据数量
logDataCount = 50

# 输入模型所在目录
modelDir = "humanseg/"
# 输入模型名
modelName = "model"
# 输入参数名，当且仅当所有模型参数被保存在一个单独的二进制文件中，它才需要被指定，若为分片模型，请设置为None
paramsName = None
# 模型feed shape
inputShape = (1, 3, 192, 192)
# 输入数据
inputData = np.full(inputShape, 1, "float32")
# 输出模型目录
outputDir = "../dist/model/humanseg/"
# 权重分片扩展名
extensionName = ".dat"
# 输出各var数据
outputVarData = False

# 确认fluid的版本号
print(paddle.__version__)

# 采样输出list数据，采样的个数logDataCount为常量
def stridePrint1(data):
    dataCount = len(data)
    stride = math.floor(dataCount / logDataCount)
    if stride == 0:
        stride = 1
    nums = []
    # outputCount = logDataCount
    # if dataCount < logDataCount:
    #     outputCount = dataCount
    # for i in range(outputCount):
    #     # nums.append(str(i) + ": " + str(data[i]))
    #     nums.append(data[i])
    
    for i in range(0, logDataCount):
        item = data[i * stride]
        nums.append(str(i * stride) + ": " + str(item))
    print(nums)

def stridePrint(tensor):
    length = len(tensor)
#    if length < 3000:
#        print(tensor)
#        return
    size = 20
    stride = math.floor(length / size)
    if stride == 0:
        stride = 1
    size = math.floor(length / stride)
    nums = []
    for i in range(0, size):
        item = tensor[i * stride]
        nums.append(str(i * stride) + ": " + str(item))
    print(nums)


# 对字典进行排序，返回有序字典，默认升序
def sortDict(oldDict, reverse = False):
    # 获得排序后的key list
    keys = sorted(oldDict.keys(), reverse = reverse)
    orderDict = collections.OrderedDict()
    # 遍历 key 列表
    for key in keys:
        orderDict[key] = oldDict[key]
    return orderDict


# 将权重数据分片输出到文件，默认分片策略为按4M分片
def sliceDataToBinaryFile(weightValueList, sliceMethod = 0):
    # TODO: 分片这里不太对，待修改
    totalWeightCount = len(weightValueList)
    countPerSlice = 0
    # sliceCount = 0
    if sliceMethod == 0:
        # 分片策略 0:按4M分片
        countPerSlice = int(4 * 1024 * 1024 / 4)
        # sliceCount = math.ceil(totalWeightCount / countPerSlice)
    else:
        # 分片策略 1:按<=4M等分
        # TODO: 待实现
        countPerSlice = 0
        # sliceCount = 0

    if not os.path.exists(outputDir):
        os.makedirs(outputDir)
    currentChunkIndex = 0
    currentWeightIndex = 0

    while currentWeightIndex < totalWeightCount - 1:
        remainCount = totalWeightCount - currentWeightIndex
        if remainCount < countPerSlice:
            countPerSlice = remainCount
        chunkPath = outputDir + 'chunk_%s' % (currentChunkIndex + 1) + extensionName
        file = open(chunkPath, 'wb')
        for i in weightValueList[currentWeightIndex : currentWeightIndex + countPerSlice]:
            byte = struct.pack('f', float(i))
            file.write(byte)
        file.close()
        currentWeightIndex = currentWeightIndex + countPerSlice
        currentChunkIndex = currentChunkIndex + 1
        # for debug
        print("第" + str(currentChunkIndex + 1) + "片权重输出完毕，输出个数：" + str(countPerSlice) + " 剩余个数:" + str(totalWeightCount - currentWeightIndex))

    # for debug
    print("========权重输出完毕，共" + str(currentWeightIndex) + "个数据，" + str(currentChunkIndex) + "个分片文件" + "========")

# 处理fluid的OP type与PaddleJS的OP type不对应情况
def mapToPaddleJSTypeName(fluidOPName):
    if fluidOPName == "batch_norm":
        return "batchnorm"
    return fluidOPName


# 将shape扩充为4维
def padToFourDimShape(shape):
    fourDimShape = []
    if len(shape) == 4:
        fourDimShape = shape
    elif len(shape) < 4:
        for i in range(0, 4 - len(shape)):
            fourDimShape.append(1)
        fourDimShape = fourDimShape + shape
    else:
        return []
    return fourDimShape


# for debug，将NCHW排布的数据转为NHWC排布的数据
def convertNCHW2NHWC(data, shape):
    fourDimShape = padToFourDimShape(shape)
    N = fourDimShape[0]
    C = fourDimShape[1]
    H = fourDimShape[2]
    W = fourDimShape[3]
    print(fourDimShape)
    HXW = H * W
    CXHXW = C * H * W
    index = 0
    nhwcData = []
    for n in range(0, N):
        for h in range(0, H):
            for w in range(0, W):
                for c in range(0, C):
                    nhwcData.append(data[n * CXHXW + c * HXW + h * W + w])
                    index = index + 1
    return nhwcData

# for debug 输出特定varName对应的数据
def writeTempOutputData(name):
    # FIXME:待完善
    return
    dataList = np.array(fluid.global_scope().find_var(name).get_tensor()).flatten().tolist()
    path = '/Users/bluebird/baidu/fluid_tools/check-temp/filter.txt'
    if os.path.exists(path):
        os.remove()
        file = open(path,'a')
        for a in range(0, len(dataList)):
            file.write(str(dataList[a]))
            file.write(",")
        file.close()

def convertToPaddleJSModel():
    # 1. 初始化fluid运行环境和配置
    exe = fluid.Executor(fluid.CPUPlace())
    [prog, feed_target_names, fetch_targets] = fluid.io.load_inference_model(dirname=modelDir, executor=exe, model_filename=modelName, params_filename=paramsName)
    out = exe.run(prog, feed={feed_target_names[0]: inputData}, fetch_list=fetch_targets, return_numpy=False)
    print(out)
    
    index = 0
    # 存放模型结构
    modelInfo = {"vars": [], "ops": []}
    # 存放var信息（未排序）
    varInfoDict = {}
    # 存放权重数值（未排序）
    weightValueDict = {}

    # 2. 获取program中所有的var，遍历并获取所有未排序的var信息和权重数值
    vars = list(prog.list_vars())
    for v in vars:
        # 跳过feed和fetch
        if "feed" == v.name:
            continue
        if "fetch" == v.name:
            continue

        print("Var index:" + str(index) + " name:" + v.name)
        print(v)
        index += 1

        varShape = list(v.shape)

        # FIXME:start paddlejs 不支持shape中为-1，这里需要手动过滤一下，支持了以后可以删除
        varShapeExcludeNegativeOne = []
        for s in varShape:
            if s == -1:
                continue
            varShapeExcludeNegativeOne.append(s)
        varShape = varShapeExcludeNegativeOne
        # FIXME:end

        # 存放variable信息，在dump成json时排序
        varInfo = {}
        varInfo["shape"] = varShape
        # 数据是否是持久化数据，如weight为持久化数据，op的output不是持久化数据
        # 只输出持久化数据，paddlejs中也仅读取持久化数据
        varInfo["persistable"] = v.persistable
        varInfoDict[v.name] = varInfo
        
        # for debug，输出var变量
        if outputVarData:
            writeTempOutputData(v.name)

        # persistable数据存入weightDict，等待排序
        if v.persistable:
            data = np.array(fluid.global_scope().find_var(v.name).get_tensor()).flatten().tolist()
            weightValueDict[v.name] = data

    # 3. 对var信息dict，按照key（var名）进行字母顺序排序
    varInfoOrderDict = sortDict(varInfoDict)

    # 4. 将var信息按照顺序，添加到model info的vars中
    for key, value in varInfoOrderDict.items():
        value["name"] = key
        modelInfo["vars"].append(value)
    
    # 5. 对权重数值dict，按照key（权重名）进行字母顺序排序，并组合到一起
    weightValueOrderDict = sortDict(weightValueDict)
    weightValues = []
    for key, value in weightValueOrderDict.items():
        weightValues += value
    
    # 6. 分片输出权重
    sliceDataToBinaryFile(weightValues)

    # 7. 获取program中所有的op，按op顺序加入到model info
    ops = prog.current_block().ops
    feedOutputName = None
    for op in ops:
        opInfo = {}

        # 获取OP type，需要映射到PaddleJS的名字
        opInfo["type"] = mapToPaddleJSTypeName(op.type)
        
        # 获取OP input
        inputs = {}
        for name in op.input_names:
            value = op.input(name)
            if len(value) <= 0:
                continue
            if value[0] == feedOutputName:
                # FIXME:workaround,PaddleJSfeed 输入必须是image，且为单输入
                # 这里修改feed后面的OP的input为image，建立前后关联
                # 这里可能会有问题
                inputs[name] = ["image"]
            else:
                inputs[name] = value
        opInfo["inputs"] = inputs
        
        # 获取OP output
        outputs = {}
        for name in op.output_names:
            value = op.output(name)
            if len(value) <= 0:
                continue
            if op.type == "feed":
                # FIXME:workaround,PaddleJSfeed 输入必须是image，且为单输入
                # 这里可能会有问题
                feedOutputName = value[0]
                outputs[name] = ["image"]
            else:
                outputs[name] = value
        opInfo["outputs"] = outputs

        # 获取OP attribute    
        attrs = {}
        for name in op.attr_names:
            # 过滤不需要的参数
            if name in ["op_callstack", 'col', 'op_role', 'op_namescope', 'op_role_var']:
                continue
            value = op.attr(name)
            attrs[name] = value
        opInfo["attrs"] = attrs

        # 存入modelInfo 
        modelInfo["ops"].append(opInfo)

    # 8. 模型信息按照key字母顺序导出到json
    outputModelPath = outputDir + "model.json"
    with open(outputModelPath, 'w') as outputFile:
        json.dump(modelInfo, outputFile, indent = 4, separators=(", ", ": "), sort_keys = True)

    print("========模型结构输出完毕========")

convertToPaddleJSModel()