未验证 提交 c7e3879c 编写于 作者: M Matt Pharr 提交者: GitHub

Unicode normalization (#225)

Normalize unicode strings for user-supplied names (objects, materials,
media, etc.)  Note that there is no need to normalize strings for things
like the name of the selected sampler, light source types, or the
parameters provided to pbrt objects, as all of the valid ones are plain old
ASCII text. We also intentionally do not normalize pathnames, as doing so
can cause all sorts of trouble.
上级 6b634163
......@@ -27,3 +27,6 @@
[submodule "src/ext/lodepng"]
path = src/ext/lodepng
url = https://github.com/lvandeve/lodepng.git
[submodule "src/ext/utf8proc"]
path = src/ext/utf8proc
url = https://github.com/JuliaStrings/utf8proc.git
......@@ -65,6 +65,7 @@ check_ext ("filesystem" "filesystem/filesystem" c5f9de30142453eb3c6fe991e82dfc25
check_ext ("libdeflate" "libdeflate/common" 1fd0bea6ca2073c68493632dafc4b1ddda1bcbc3)
check_ext ("lodepng" "lodepng/examples" 8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a)
check_ext ("stb" "stb/tools" af1a5bc352164740c1cc1354942b1c6b72eacb8a)
check_ext ("utf8proc" "utf8proc/bench" 2484e2ed5e1d9c19edcccf392a7d9920ad90dfaf)
check_ext ("zlib" "zlib/doc" 54d591eabf9fe0e84c725638f8d5d8d202a093fa)
add_compile_definitions ("$<$<CONFIG:DEBUG>:PBRT_DEBUG_BUILD>")
......@@ -861,6 +862,7 @@ set (ALL_PBRT_LIBS
${LIBDEFLATE_LIBRARIES}
double-conversion
${PBRT_CUDA_LIB}
utf8proc
)
if (PBRT_CUDA_ENABLED)
......@@ -1024,6 +1026,7 @@ set (PBRT_TEST_SOURCE
src/pbrt/util/sampling_test.cpp
src/pbrt/util/spectrum_test.cpp
src/pbrt/util/splines_test.cpp
src/pbrt/util/string_test.cpp
src/pbrt/util/taggedptr_test.cpp
src/pbrt/util/transform_test.cpp
src/pbrt/util/vecmath_test.cpp
......
......@@ -139,3 +139,10 @@ set (FLIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/flip PARENT_SCOPE)
add_library (flip_lib STATIC ${CMAKE_CURRENT_SOURCE_DIR}/flip/flip.cpp)
set_property (TARGET flip_lib PROPERTY FOLDER "ext")
###########################################################################
# utf8proc
set (UTF8PROC_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/utf8proc PARENT_SCOPE)
add_subdirectory (utf8proc)
Subproject commit 2484e2ed5e1d9c19edcccf392a7d9920ad90dfaf
......@@ -23,6 +23,7 @@
#include <pbrt/util/parallel.h>
#include <pbrt/util/print.h>
#include <pbrt/util/spectrum.h>
#include <pbrt/util/string.h>
#include <pbrt/util/transform.h>
#include <iostream>
......@@ -122,11 +123,13 @@ void BasicSceneBuilder::Translate(Float dx, Float dy, Float dz, FileLoc loc) {
[=](auto t) { return t * pbrt::Translate(Vector3f(dx, dy, dz)); });
}
void BasicSceneBuilder::CoordinateSystem(const std::string &name, FileLoc loc) {
void BasicSceneBuilder::CoordinateSystem(const std::string &origName, FileLoc loc) {
std::string name = NormalizeUTF8(origName);
namedCoordinateSystems[name] = graphicsState.ctm;
}
void BasicSceneBuilder::CoordSysTransform(const std::string &name, FileLoc loc) {
void BasicSceneBuilder::CoordSysTransform(const std::string &origName, FileLoc loc) {
std::string name = NormalizeUTF8(origName);
if (namedCoordinateSystems.find(name) != namedCoordinateSystems.end())
graphicsState.ctm = namedCoordinateSystems[name];
else
......@@ -230,8 +233,9 @@ void BasicSceneBuilder::WorldBegin(FileLoc loc) {
scene->SetOptions(filter, film, camera, sampler, integrator, accelerator);
}
void BasicSceneBuilder::MakeNamedMedium(const std::string &name,
void BasicSceneBuilder::MakeNamedMedium(const std::string &origName,
ParsedParameterVector params, FileLoc loc) {
std::string name = NormalizeUTF8(origName);
// Issue error if medium _name_ is multiply defined
if (mediumNames.find(name) != mediumNames.end()) {
ErrorExitDeferred(&loc, "Named medium \"%s\" redefined.", name);
......@@ -302,7 +306,9 @@ void BasicSceneBuilder::Shape(const std::string &name, ParsedParameterVector par
}
}
void BasicSceneBuilder::ObjectBegin(const std::string &name, FileLoc loc) {
void BasicSceneBuilder::ObjectBegin(const std::string &origName, FileLoc loc) {
std::string name = NormalizeUTF8(origName);
VERIFY_WORLD("ObjectBegin");
pushedGraphicsStates.push_back(graphicsState);
......@@ -356,7 +362,8 @@ void BasicSceneBuilder::ObjectEnd(FileLoc loc) {
activeInstanceDefinition = nullptr;
}
void BasicSceneBuilder::ObjectInstance(const std::string &name, FileLoc loc) {
void BasicSceneBuilder::ObjectInstance(const std::string &origName, FileLoc loc) {
std::string name = NormalizeUTF8(origName);
VERIFY_WORLD("ObjectInstance");
if (activeInstanceDefinition) {
......@@ -643,15 +650,19 @@ void BasicSceneBuilder::Integrator(const std::string &name, ParsedParameterVecto
integrator = SceneEntity(name, std::move(dict), loc);
}
void BasicSceneBuilder::MediumInterface(const std::string &insideName,
const std::string &outsideName, FileLoc loc) {
void BasicSceneBuilder::MediumInterface(const std::string &origInsideName,
const std::string &origOutsideName, FileLoc loc) {
std::string insideName = NormalizeUTF8(origInsideName);
std::string outsideName = NormalizeUTF8(origOutsideName);
graphicsState.currentInsideMedium = insideName;
graphicsState.currentOutsideMedium = outsideName;
}
void BasicSceneBuilder::Texture(const std::string &name, const std::string &type,
void BasicSceneBuilder::Texture(const std::string &origName, const std::string &type,
const std::string &texname, ParsedParameterVector params,
FileLoc loc) {
std::string name = NormalizeUTF8(origName);
VERIFY_WORLD("Texture");
ParameterDictionary dict(std::move(params), graphicsState.textureAttributes,
......@@ -691,8 +702,9 @@ void BasicSceneBuilder::Material(const std::string &name, ParsedParameterVector
graphicsState.currentMaterialName.clear();
}
void BasicSceneBuilder::MakeNamedMaterial(const std::string &name,
void BasicSceneBuilder::MakeNamedMaterial(const std::string &origName,
ParsedParameterVector params, FileLoc loc) {
std::string name = NormalizeUTF8(origName);
VERIFY_WORLD("MakeNamedMaterial");
ParameterDictionary dict(std::move(params), graphicsState.materialAttributes,
......@@ -707,7 +719,8 @@ void BasicSceneBuilder::MakeNamedMaterial(const std::string &name,
scene->AddNamedMaterial(name, SceneEntity("", std::move(dict), loc));
}
void BasicSceneBuilder::NamedMaterial(const std::string &name, FileLoc loc) {
void BasicSceneBuilder::NamedMaterial(const std::string &origName, FileLoc loc) {
std::string name = NormalizeUTF8(origName);
VERIFY_WORLD("NamedMaterial");
graphicsState.currentMaterialName = name;
graphicsState.currentMaterialIndex = -1;
......
......@@ -9,6 +9,10 @@
#include <pbrt/util/string.h>
#include <pbrt/util/check.h>
#include <pbrt/util/error.h>
#define UTF8PROC_STATIC
#include <utf8proc/utf8proc.h>
#include <ctype.h>
#include <codecvt>
......@@ -185,4 +189,18 @@ std::u16string UTF16FromUTF8(std::string str) {
return utf16;
}
std::string NormalizeUTF8(std::string str) {
utf8proc_option_t options = UTF8PROC_COMPOSE;
utf8proc_uint8_t *result;
utf8proc_ssize_t length = utf8proc_map((const unsigned char *)str.data(), str.size(),
&result, options);
if (length < 0)
ErrorExit("Unicode normalization error: %s: \"%s\"", utf8proc_errmsg(length), str);
str = std::string(result, result + length);
free(result);
return str;
}
} // namespace pbrt
......@@ -36,6 +36,8 @@ std::wstring WStringFromUTF8(std::string str);
std::string UTF8FromWString(std::wstring str);
#endif // PBRT_IS_WINDOWS
std::string NormalizeUTF8(std::string str);
// InternedString Definition
class InternedString {
public:
......
// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys.
// The pbrt source code is licensed under the Apache License, Version 2.0.
// SPDX: Apache-2.0
#include <gtest/gtest.h>
#include <pbrt/pbrt.h>
#include <pbrt/util/string.h>
#include <string>
using namespace pbrt;
TEST(Unicode, BasicNormalization) {
// "Amélie" two ways, via https://en.wikipedia.org/wiki/Unicode_equivalence
std::u16string nfc16(u"\u0041\u006d\u00e9\u006c\u0069\u0065");
std::u16string nfd16(u"\u0041\u006d\u0065\u0301\u006c\u0069\u0065");
EXPECT_NE(nfc16, nfd16);
std::string nfc8 = UTF8FromUTF16(nfc16);
std::string nfd8 = UTF8FromUTF16(nfd16);
EXPECT_NE(nfc8, nfd8);
EXPECT_EQ(nfc8, NormalizeUTF8(nfc8)); // nfc is already normalized
EXPECT_EQ(nfc8, NormalizeUTF8(nfd8)); // normalizing nfd should make it equal nfc
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册