Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
VisualDL
提交
a8cadfb7
V
VisualDL
项目概览
PaddlePaddle
/
VisualDL
1 年多 前同步成功
通知
88
Star
4655
Fork
642
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
10
列表
看板
标记
里程碑
合并请求
2
Wiki
5
Wiki
分析
仓库
DevOps
项目成员
Pages
V
VisualDL
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
10
Issue
10
列表
看板
标记
里程碑
合并请求
2
合并请求
2
Pages
分析
分析
仓库分析
DevOps
Wiki
5
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a8cadfb7
编写于
3月 29, 2018
作者:
J
Jeff Wang
提交者:
daminglu
3月 29, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Embedding visualization (#351)
上级
4e6e1949
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
162 addition
and
13 deletion
+162
-13
visualdl/logic/histogram.h
visualdl/logic/histogram.h
+4
-10
visualdl/logic/pybind.cc
visualdl/logic/pybind.cc
+26
-3
visualdl/logic/sdk.cc
visualdl/logic/sdk.cc
+73
-0
visualdl/logic/sdk.h
visualdl/logic/sdk.h
+46
-0
visualdl/python/storage.py
visualdl/python/storage.py
+8
-0
visualdl/storage/storage.proto
visualdl/storage/storage.proto
+1
-0
visualdl/storage/tablet.h
visualdl/storage/tablet.h
+4
-0
未找到文件。
visualdl/logic/histogram.h
浏览文件 @
a8cadfb7
...
...
@@ -81,14 +81,6 @@ struct HistogramBuilder {
T
right_boundary
{
std
::
numeric_limits
<
T
>::
min
()};
std
::
vector
<
int
>
buckets
;
void
Get
(
size_t
n
,
T
*
left
,
T
*
right
,
int
*
frequency
)
{
CHECK
(
!
buckets
.
empty
())
<<
"need to CreateBuckets first."
;
CHECK_LT
(
n
,
num_buckets_
)
<<
"n out of range."
;
*
left
=
left_boundary
+
span_
*
n
;
*
right
=
*
left
+
span_
;
*
frequency
=
buckets
[
n
];
}
private:
// Get the left and right boundaries.
void
UpdateBoundary
(
const
std
::
vector
<
T
>&
data
)
{
...
...
@@ -106,9 +98,11 @@ private:
(
float
)
left_boundary
/
num_buckets_
;
buckets
.
resize
(
num_buckets_
);
// Go through the data, increase the item count in a bucket.
for
(
auto
v
:
data
)
{
int
offset
=
std
::
min
(
int
((
v
-
left_boundary
)
/
span_
),
num_buckets_
-
1
);
buckets
[
offset
]
++
;
int
bucket_group_index
=
std
::
min
(
int
((
v
-
left_boundary
)
/
span_
),
num_buckets_
-
1
);
buckets
[
bucket_group_index
]
++
;
}
}
...
...
visualdl/logic/pybind.cc
浏览文件 @
a8cadfb7
...
...
@@ -84,9 +84,14 @@ PYBIND11_MODULE(core, m) {
auto
tablet
=
self
.
tablet
(
tag
);
return
vs
::
components
::
TextReader
(
tablet
);
})
.
def
(
"get_audio"
,
[](
vs
::
LogReader
&
self
,
const
std
::
string
&
tag
)
{
.
def
(
"get_audio"
,
[](
vs
::
LogReader
&
self
,
const
std
::
string
&
tag
)
{
auto
tablet
=
self
.
tablet
(
tag
);
return
vs
::
components
::
AudioReader
(
self
.
mode
(),
tablet
);
})
.
def
(
"get_embedding"
,
[](
vs
::
LogReader
&
self
,
const
std
::
string
&
tag
)
{
auto
tablet
=
self
.
tablet
(
tag
);
return
vs
::
components
::
AudioReader
(
self
.
mode
(),
tablet
);
return
vs
::
components
::
EmbeddingReader
(
tablet
);
});
// clang-format on
...
...
@@ -136,7 +141,11 @@ PYBIND11_MODULE(core, m) {
int
step_cycle
)
{
auto
tablet
=
self
.
AddTablet
(
tag
);
return
vs
::
components
::
Audio
(
tablet
,
num_samples
,
step_cycle
);
});
})
.
def
(
"new_embedding"
,
[](
vs
::
LogWriter
&
self
,
const
std
::
string
&
tag
)
{
auto
tablet
=
self
.
AddTablet
(
tag
);
return
vs
::
components
::
Embedding
(
tablet
);
});
//------------------- components --------------------
#define ADD_SCALAR_READER(T) \
...
...
@@ -233,6 +242,20 @@ PYBIND11_MODULE(core, m) {
.
def
(
"total_records"
,
&
cp
::
TextReader
::
total_records
)
.
def
(
"size"
,
&
cp
::
TextReader
::
size
);
py
::
class_
<
cp
::
Embedding
>
(
m
,
"EmbeddingWriter"
)
.
def
(
"set_caption"
,
&
cp
::
Embedding
::
SetCaption
)
.
def
(
"add_embeddings_with_word_list"
,
&
cp
::
Embedding
::
AddEmbeddingsWithWordList
);
py
::
class_
<
cp
::
EmbeddingReader
>
(
m
,
"EmbeddingReader"
)
.
def
(
"get_all_labels"
,
&
cp
::
EmbeddingReader
::
get_all_labels
)
.
def
(
"get_all_embeddings"
,
&
cp
::
EmbeddingReader
::
get_all_embeddings
)
.
def
(
"ids"
,
&
cp
::
EmbeddingReader
::
ids
)
.
def
(
"timestamps"
,
&
cp
::
EmbeddingReader
::
timestamps
)
.
def
(
"caption"
,
&
cp
::
EmbeddingReader
::
caption
)
.
def
(
"total_records"
,
&
cp
::
EmbeddingReader
::
total_records
)
.
def
(
"size"
,
&
cp
::
EmbeddingReader
::
size
);
py
::
class_
<
cp
::
Audio
>
(
m
,
"AudioWriter"
,
R"pbdoc(
PyBind class. Must instantiate through the LogWriter.
)pbdoc"
)
...
...
visualdl/logic/sdk.cc
浏览文件 @
a8cadfb7
...
...
@@ -347,6 +347,79 @@ std::string TextReader::caption() const {
size_t
TextReader
::
size
()
const
{
return
reader_
.
total_records
();
}
/*
* Embedding functions
*/
void
Embedding
::
AddEmbeddingsWithWordList
(
const
std
::
vector
<
std
::
vector
<
float
>>&
word_embeddings
,
std
::
vector
<
std
::
string
>&
labels
)
{
for
(
int
i
=
0
;
i
<
word_embeddings
.
size
();
i
++
)
{
AddEmbedding
(
i
,
word_embeddings
[
i
],
labels
[
i
]);
}
}
void
Embedding
::
AddEmbedding
(
int
item_id
,
const
std
::
vector
<
float
>&
one_hot_vector
,
std
::
string
&
label
)
{
auto
record
=
tablet_
.
AddRecord
();
record
.
SetId
(
item_id
);
time_t
time
=
std
::
time
(
nullptr
);
record
.
SetTimeStamp
(
time
);
auto
entry
=
record
.
AddData
();
entry
.
SetMulti
<
float
>
(
one_hot_vector
);
entry
.
SetRaw
(
label
);
}
/*
* EmbeddingReader functions
*/
std
::
vector
<
std
::
string
>
EmbeddingReader
::
get_all_labels
()
const
{
std
::
vector
<
std
::
string
>
result
;
for
(
int
i
=
0
;
i
<
total_records
();
i
++
)
{
auto
record
=
reader_
.
record
(
i
);
auto
entry
=
record
.
data
(
0
);
result
.
push_back
(
entry
.
GetRaw
());
}
return
result
;
}
std
::
vector
<
std
::
vector
<
float
>>
EmbeddingReader
::
get_all_embeddings
()
const
{
std
::
vector
<
std
::
vector
<
float
>>
result
;
for
(
int
i
=
0
;
i
<
total_records
();
i
++
)
{
auto
record
=
reader_
.
record
(
i
);
auto
entry
=
record
.
data
(
0
);
auto
tensors
=
entry
.
GetMulti
<
float
>
();
result
.
push_back
(
tensors
);
}
return
result
;
}
std
::
vector
<
int
>
EmbeddingReader
::
ids
()
const
{
std
::
vector
<
int
>
res
;
for
(
int
i
=
0
;
i
<
reader_
.
total_records
();
i
++
)
{
res
.
push_back
(
reader_
.
record
(
i
).
id
());
}
return
res
;
}
std
::
vector
<
time_t
>
EmbeddingReader
::
timestamps
()
const
{
std
::
vector
<
time_t
>
res
;
for
(
int
i
=
0
;
i
<
reader_
.
total_records
();
i
++
)
{
res
.
push_back
(
reader_
.
record
(
i
).
timestamp
());
}
return
res
;
}
std
::
string
EmbeddingReader
::
caption
()
const
{
CHECK
(
!
reader_
.
captions
().
empty
())
<<
"no caption"
;
return
reader_
.
captions
().
front
();
}
size_t
EmbeddingReader
::
size
()
const
{
return
reader_
.
total_records
();
}
void
Audio
::
StartSampling
()
{
if
(
!
ToSampleThisStep
())
return
;
...
...
visualdl/logic/sdk.h
浏览文件 @
a8cadfb7
...
...
@@ -327,6 +327,52 @@ private:
TabletReader
reader_
;
};
/*
* Embedding component writer
*/
struct
Embedding
{
Embedding
(
Tablet
tablet
)
:
tablet_
(
tablet
)
{
tablet_
.
SetType
(
Tablet
::
Type
::
kEmbedding
);
}
void
SetCaption
(
const
std
::
string
cap
)
{
tablet_
.
SetCaptions
(
std
::
vector
<
std
::
string
>
({
cap
}));
}
// Add all word vectors along with all labels
// The index of labels should match with the index of word_embeddings
// EX: ["Apple", "Orange"] means the first item in word_embeddings represents
// "Apple"
void
AddEmbeddingsWithWordList
(
const
std
::
vector
<
std
::
vector
<
float
>>&
word_embeddings
,
std
::
vector
<
std
::
string
>&
labels
);
// TODO: Create another function that takes 'word_embeddings' and 'word_dict'
private:
void
AddEmbedding
(
int
item_id
,
const
std
::
vector
<
float
>&
one_hot_vector
,
std
::
string
&
label
);
Tablet
tablet_
;
};
/*
* Embedding Reader.
*/
struct
EmbeddingReader
{
EmbeddingReader
(
TabletReader
reader
)
:
reader_
(
reader
)
{}
std
::
vector
<
int
>
ids
()
const
;
std
::
vector
<
std
::
string
>
get_all_labels
()
const
;
std
::
vector
<
std
::
vector
<
float
>>
get_all_embeddings
()
const
;
std
::
vector
<
time_t
>
timestamps
()
const
;
std
::
string
caption
()
const
;
size_t
total_records
()
const
{
return
reader_
.
total_records
();
}
size_t
size
()
const
;
private:
TabletReader
reader_
;
};
/*
* Image component writer.
*/
...
...
visualdl/python/storage.py
浏览文件 @
a8cadfb7
...
...
@@ -119,6 +119,10 @@ class LogReader(object):
check_tag_name_valid
(
tag
)
return
self
.
reader
.
get_text
(
tag
)
def
embedding
(
self
,
tag
):
check_tag_name_valid
(
tag
)
return
self
.
reader
.
get_embedding
(
tag
)
def
audio
(
self
,
tag
):
"""
Get an audio reader with tag
...
...
@@ -256,6 +260,10 @@ class LogWriter(object):
check_tag_name_valid
(
tag
)
return
self
.
writer
.
new_text
(
tag
)
def
embedding
(
self
,
tag
):
check_tag_name_valid
(
tag
)
return
self
.
writer
.
new_embedding
(
tag
)
def
save
(
self
):
self
.
writer
.
save
()
...
...
visualdl/storage/storage.proto
浏览文件 @
a8cadfb7
...
...
@@ -109,6 +109,7 @@ message Tablet {
kImage
=
2
;
kText
=
3
;
kAudio
=
4
;
kEmbedding
=
5
;
}
// The unique identification for this `Tablet`. VisualDL will have no the
// concept of FileWriter like TB. It will store all the tablets in a single
...
...
visualdl/storage/tablet.h
浏览文件 @
a8cadfb7
...
...
@@ -35,6 +35,7 @@ struct Tablet {
kImage
=
2
,
kText
=
3
,
kAudio
=
4
,
kEmbedding
=
5
,
kUnknown
=
-
1
};
...
...
@@ -59,6 +60,9 @@ struct Tablet {
if
(
name
==
"audio"
)
{
return
kAudio
;
}
if
(
name
==
"embedding"
)
{
return
kEmbedding
;
}
LOG
(
ERROR
)
<<
"unknown component: "
<<
name
;
return
kUnknown
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录