lib.py 5.0 KB
Newer Older
J
Jeff Wang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (c) 2017 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================

O
Oraoto 已提交
16
from __future__ import absolute_import
Y
Yan Chunwei 已提交
17
import sys
18
import time
S
superjom 已提交
19
import numpy as np
20 21
from visualdl.server.log import logger
from visualdl.utils.string_util import encode_tag, decode_tag
22

S
superjom 已提交
23

24 25
def get_components(log_reader):
    return log_reader.components()
S
superjom 已提交
26

S
superjom 已提交
27

28 29
def get_runs(log_reader):
    return log_reader.runs()
30 31


32 33
def get_tags(log_reader):
    return log_reader.tags()
S
superjom 已提交
34 35


36 37 38 39 40 41 42 43 44
def get_logs(log_reader, component):
    all_tag = log_reader.data_manager.get_reservoir(component).keys
    tags = {}
    for item in all_tag:
        index = item.rfind('/')
        run = item[0:index]
        tag = encode_tag(item[index + 1:])
        if run in tags.keys():
            tags[run].append(tag)
45
        else:
46 47
            tags[run] = [tag]
    return tags
48 49


50 51
def get_scalar_tags(log_reader):
    return get_logs(log_reader, "scalar")
52 53


54 55 56 57 58 59
def get_scalar(log_reader, run, tag):
    log_reader.load_new_data()
    records = log_reader.data_manager.get_reservoir("scalar").get_items(
        run, decode_tag(tag))
    results = [[item.timestamp, item.id, item.value] for item in records]
    return results
60 61


62 63
def get_image_tags(log_reader):
    return get_logs(log_reader, "image")
64 65


66 67 68 69 70 71 72 73 74
def get_image_tag_steps(log_reader, run, tag):
    log_reader.load_new_data()
    records = log_reader.data_manager.get_reservoir("image").get_items(
        run, decode_tag(tag))
    result = [{
        "step": item.id,
        "wallTime": item.timestamp
    } for item in records]
    return result
75 76


77 78 79 80 81
def get_individual_image(log_reader, run, tag, step_index):
    log_reader.load_new_data()
    records = log_reader.data_manager.get_reservoir("image").get_items(
        run, decode_tag(tag))
    return records[step_index].image.encoded_image_string
82 83


84 85
def get_audio_tags(log_reader):
    return get_logs(log_reader, "audio")
86 87


88 89 90 91 92 93 94 95 96
def get_audio_tag_steps(log_reader, run, tag):
    log_reader.load_new_data()
    records = log_reader.data_manager.get_reservoir("audio").get_items(
        run, decode_tag(tag))
    result = [{
        "step": item.id,
        "wallTime": item.timestamp
    } for item in records]
    return result
97 98


99 100 101 102 103 104
def get_individual_audio(log_reader, run, tag, step_index):
    log_reader.load_new_data()
    records = log_reader.data_manager.get_reservoir("audio").get_items(
        run, decode_tag(tag))
    result = records[step_index].audio.encoded_image_string
    return result
105 106


107 108 109 110 111
def get_embeddings_tags(log_reader):
    return get_logs(log_reader, "embeddings")


def get_embeddings(log_reader, run, tag, reduction, dimension=2):
112 113 114
    log_reader.load_new_data()
    records = log_reader.data_manager.get_reservoir("embeddings").get_items(
        run, decode_tag(tag))
115

116 117 118 119 120 121
    labels = []
    vectors = []
    for item in records[0].embeddings.embeddings:
        labels.append(item.label)
        vectors.append(item.vectors)
    vectors = np.array(vectors)
122

123 124 125 126
    if reduction == 'tsne':
        import visualdl.server.tsne as tsne
        low_dim_embs = tsne.tsne(
            vectors, dimension, initial_dims=50, perplexity=30.0)
127

128 129
    elif reduction == 'pca':
        low_dim_embs = simple_pca(vectors, dimension)
130

131
    return {"embedding": low_dim_embs.tolist(), "labels": labels}
132 133


134 135 136 137 138
def retry(ntimes, function, time2sleep, *args, **kwargs):
    '''
    try to execute `function` `ntimes`, if exception catched, the thread will
    sleep `time2sleep` seconds.
    '''
O
Oraoto 已提交
139
    for i in range(ntimes):
140 141
        try:
            return function(*args, **kwargs)
T
Thuan Nguyen 已提交
142
        except Exception:
Y
Yan Chunwei 已提交
143 144
            error_info = '\n'.join(map(str, sys.exc_info()))
            logger.error("Unexpected error: %s" % error_info)
145
            time.sleep(time2sleep)
146

T
Thuan Nguyen 已提交
147

148 149 150 151 152 153 154 155 156
def cache_get(cache):
    def _handler(key, func, *args, **kwargs):
        data = cache.get(key)
        if data is None:
            logger.warning('update cache %s' % key)
            data = func(*args, **kwargs)
            cache.set(key, data)
            return data
        return data
T
Thuan Nguyen 已提交
157

158
    return _handler
J
Jeff Wang 已提交
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181


def simple_pca(x, dimension):
    """
    A simple PCA implementation to do the dimension reduction.
    """

    # Center the data.
    x -= np.mean(x, axis=0)

    # Computing the Covariance Matrix
    cov = np.cov(x, rowvar=False)

    # Get eigenvectors and eigenvalues from the covariance matrix
    eigvals, eigvecs = np.linalg.eig(cov)

    # Sort the eigvals from high to low
    order = np.argsort(eigvals)[::-1]

    # Drop the eigenvectors with low eigenvalues
    eigvecs = eigvecs[:, order[:dimension]]

    return np.dot(x, eigvecs)