diff --git a/.gitmodules b/.gitmodules index b299bd0ff23006c7b4379ff0ff425dfea858b2f6..a908a29b5f4b6ebf8a6ef1f7bf587dfa120b1997 100644 --- a/.gitmodules +++ b/.gitmodules @@ -27,3 +27,6 @@ [submodule "src/ext/lodepng"] path = src/ext/lodepng url = https://github.com/lvandeve/lodepng.git +[submodule "src/ext/utf8proc"] + path = src/ext/utf8proc + url = https://github.com/JuliaStrings/utf8proc.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 4465ed1c548f424c88a4c195523ea384d1d1206c..1619858f81aa7413c1eb47052444f4a915bb694d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,6 +65,7 @@ check_ext ("filesystem" "filesystem/filesystem" c5f9de30142453eb3c6fe991e82dfc25 check_ext ("libdeflate" "libdeflate/common" 1fd0bea6ca2073c68493632dafc4b1ddda1bcbc3) check_ext ("lodepng" "lodepng/examples" 8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a) check_ext ("stb" "stb/tools" af1a5bc352164740c1cc1354942b1c6b72eacb8a) +check_ext ("utf8proc" "utf8proc/bench" 2484e2ed5e1d9c19edcccf392a7d9920ad90dfaf) check_ext ("zlib" "zlib/doc" 54d591eabf9fe0e84c725638f8d5d8d202a093fa) add_compile_definitions ("$<$:PBRT_DEBUG_BUILD>") @@ -861,6 +862,7 @@ set (ALL_PBRT_LIBS ${LIBDEFLATE_LIBRARIES} double-conversion ${PBRT_CUDA_LIB} + utf8proc ) if (PBRT_CUDA_ENABLED) @@ -1024,6 +1026,7 @@ set (PBRT_TEST_SOURCE src/pbrt/util/sampling_test.cpp src/pbrt/util/spectrum_test.cpp src/pbrt/util/splines_test.cpp + src/pbrt/util/string_test.cpp src/pbrt/util/taggedptr_test.cpp src/pbrt/util/transform_test.cpp src/pbrt/util/vecmath_test.cpp diff --git a/src/ext/CMakeLists.txt b/src/ext/CMakeLists.txt index d7f47f673effee9387565e53f8bc8c4c864bbcd7..3968d18f3cc05fa8f0d7db8be130e9e8654653e9 100644 --- a/src/ext/CMakeLists.txt +++ b/src/ext/CMakeLists.txt @@ -139,3 +139,10 @@ set (FLIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/flip PARENT_SCOPE) add_library (flip_lib STATIC ${CMAKE_CURRENT_SOURCE_DIR}/flip/flip.cpp) set_property (TARGET flip_lib PROPERTY FOLDER "ext") + +########################################################################### +# utf8proc + +set (UTF8PROC_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/utf8proc PARENT_SCOPE) + +add_subdirectory (utf8proc) diff --git a/src/ext/utf8proc b/src/ext/utf8proc new file mode 160000 index 0000000000000000000000000000000000000000..2484e2ed5e1d9c19edcccf392a7d9920ad90dfaf --- /dev/null +++ b/src/ext/utf8proc @@ -0,0 +1 @@ +Subproject commit 2484e2ed5e1d9c19edcccf392a7d9920ad90dfaf diff --git a/src/pbrt/scene.cpp b/src/pbrt/scene.cpp index 8f99579bc03200e8f64d5dd290178ced1824f7d9..91ec02642ac89f20892f96a78432e1800217a739 100644 --- a/src/pbrt/scene.cpp +++ b/src/pbrt/scene.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -122,11 +123,13 @@ void BasicSceneBuilder::Translate(Float dx, Float dy, Float dz, FileLoc loc) { [=](auto t) { return t * pbrt::Translate(Vector3f(dx, dy, dz)); }); } -void BasicSceneBuilder::CoordinateSystem(const std::string &name, FileLoc loc) { +void BasicSceneBuilder::CoordinateSystem(const std::string &origName, FileLoc loc) { + std::string name = NormalizeUTF8(origName); namedCoordinateSystems[name] = graphicsState.ctm; } -void BasicSceneBuilder::CoordSysTransform(const std::string &name, FileLoc loc) { +void BasicSceneBuilder::CoordSysTransform(const std::string &origName, FileLoc loc) { + std::string name = NormalizeUTF8(origName); if (namedCoordinateSystems.find(name) != namedCoordinateSystems.end()) graphicsState.ctm = namedCoordinateSystems[name]; else @@ -230,8 +233,9 @@ void BasicSceneBuilder::WorldBegin(FileLoc loc) { scene->SetOptions(filter, film, camera, sampler, integrator, accelerator); } -void BasicSceneBuilder::MakeNamedMedium(const std::string &name, +void BasicSceneBuilder::MakeNamedMedium(const std::string &origName, ParsedParameterVector params, FileLoc loc) { + std::string name = NormalizeUTF8(origName); // Issue error if medium _name_ is multiply defined if (mediumNames.find(name) != mediumNames.end()) { ErrorExitDeferred(&loc, "Named medium \"%s\" redefined.", name); @@ -302,7 +306,9 @@ void BasicSceneBuilder::Shape(const std::string &name, ParsedParameterVector par } } -void BasicSceneBuilder::ObjectBegin(const std::string &name, FileLoc loc) { +void BasicSceneBuilder::ObjectBegin(const std::string &origName, FileLoc loc) { + std::string name = NormalizeUTF8(origName); + VERIFY_WORLD("ObjectBegin"); pushedGraphicsStates.push_back(graphicsState); @@ -356,7 +362,8 @@ void BasicSceneBuilder::ObjectEnd(FileLoc loc) { activeInstanceDefinition = nullptr; } -void BasicSceneBuilder::ObjectInstance(const std::string &name, FileLoc loc) { +void BasicSceneBuilder::ObjectInstance(const std::string &origName, FileLoc loc) { + std::string name = NormalizeUTF8(origName); VERIFY_WORLD("ObjectInstance"); if (activeInstanceDefinition) { @@ -643,15 +650,19 @@ void BasicSceneBuilder::Integrator(const std::string &name, ParsedParameterVecto integrator = SceneEntity(name, std::move(dict), loc); } -void BasicSceneBuilder::MediumInterface(const std::string &insideName, - const std::string &outsideName, FileLoc loc) { +void BasicSceneBuilder::MediumInterface(const std::string &origInsideName, + const std::string &origOutsideName, FileLoc loc) { + std::string insideName = NormalizeUTF8(origInsideName); + std::string outsideName = NormalizeUTF8(origOutsideName); + graphicsState.currentInsideMedium = insideName; graphicsState.currentOutsideMedium = outsideName; } -void BasicSceneBuilder::Texture(const std::string &name, const std::string &type, +void BasicSceneBuilder::Texture(const std::string &origName, const std::string &type, const std::string &texname, ParsedParameterVector params, FileLoc loc) { + std::string name = NormalizeUTF8(origName); VERIFY_WORLD("Texture"); ParameterDictionary dict(std::move(params), graphicsState.textureAttributes, @@ -691,8 +702,9 @@ void BasicSceneBuilder::Material(const std::string &name, ParsedParameterVector graphicsState.currentMaterialName.clear(); } -void BasicSceneBuilder::MakeNamedMaterial(const std::string &name, +void BasicSceneBuilder::MakeNamedMaterial(const std::string &origName, ParsedParameterVector params, FileLoc loc) { + std::string name = NormalizeUTF8(origName); VERIFY_WORLD("MakeNamedMaterial"); ParameterDictionary dict(std::move(params), graphicsState.materialAttributes, @@ -707,7 +719,8 @@ void BasicSceneBuilder::MakeNamedMaterial(const std::string &name, scene->AddNamedMaterial(name, SceneEntity("", std::move(dict), loc)); } -void BasicSceneBuilder::NamedMaterial(const std::string &name, FileLoc loc) { +void BasicSceneBuilder::NamedMaterial(const std::string &origName, FileLoc loc) { + std::string name = NormalizeUTF8(origName); VERIFY_WORLD("NamedMaterial"); graphicsState.currentMaterialName = name; graphicsState.currentMaterialIndex = -1; diff --git a/src/pbrt/util/string.cpp b/src/pbrt/util/string.cpp index 4d108759542b01e755e3a2c8e8bcff35ecc66bce..6a24862973d6bd8be7eb572e75756ab8853fd52d 100644 --- a/src/pbrt/util/string.cpp +++ b/src/pbrt/util/string.cpp @@ -9,6 +9,10 @@ #include #include +#include + +#define UTF8PROC_STATIC +#include #include #include @@ -185,4 +189,18 @@ std::u16string UTF16FromUTF8(std::string str) { return utf16; } +std::string NormalizeUTF8(std::string str) { + utf8proc_option_t options = UTF8PROC_COMPOSE; + + utf8proc_uint8_t *result; + utf8proc_ssize_t length = utf8proc_map((const unsigned char *)str.data(), str.size(), + &result, options); + if (length < 0) + ErrorExit("Unicode normalization error: %s: \"%s\"", utf8proc_errmsg(length), str); + + str = std::string(result, result + length); + free(result); + return str; +} + } // namespace pbrt diff --git a/src/pbrt/util/string.h b/src/pbrt/util/string.h index 8db24f285e1521454c31af480ff21e8c3748230e..319e80ece2cba93a1ed9607b2c1aabec0a3722c0 100644 --- a/src/pbrt/util/string.h +++ b/src/pbrt/util/string.h @@ -36,6 +36,8 @@ std::wstring WStringFromUTF8(std::string str); std::string UTF8FromWString(std::wstring str); #endif // PBRT_IS_WINDOWS +std::string NormalizeUTF8(std::string str); + // InternedString Definition class InternedString { public: diff --git a/src/pbrt/util/string_test.cpp b/src/pbrt/util/string_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..11fc0ccd8d3d2ad13d287105de60f6717c9c6f5d --- /dev/null +++ b/src/pbrt/util/string_test.cpp @@ -0,0 +1,26 @@ +// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys. +// The pbrt source code is licensed under the Apache License, Version 2.0. +// SPDX: Apache-2.0 + +#include + +#include +#include + +#include + +using namespace pbrt; + +TEST(Unicode, BasicNormalization) { + // "Amélie" two ways, via https://en.wikipedia.org/wiki/Unicode_equivalence + std::u16string nfc16(u"\u0041\u006d\u00e9\u006c\u0069\u0065"); + std::u16string nfd16(u"\u0041\u006d\u0065\u0301\u006c\u0069\u0065"); + EXPECT_NE(nfc16, nfd16); + + std::string nfc8 = UTF8FromUTF16(nfc16); + std::string nfd8 = UTF8FromUTF16(nfd16); + EXPECT_NE(nfc8, nfd8); + + EXPECT_EQ(nfc8, NormalizeUTF8(nfc8)); // nfc is already normalized + EXPECT_EQ(nfc8, NormalizeUTF8(nfd8)); // normalizing nfd should make it equal nfc +}