ConfigProcessor.cpp 15.7 KB
Newer Older
A
Alexey Milovidov 已提交
1 2 3 4 5
#include <DB/Common/ConfigProcessor.h>
#include <sys/utsname.h>
#include <cerrno>
#include <cstring>
#include <iostream>
6
#include <functional>
Y
Yuri Dyachenko 已提交
7

A
Alexey Milovidov 已提交
8 9 10 11 12
#include <Poco/DOM/Text.h>
#include <Poco/DOM/Attr.h>
#include <Poco/DOM/Comment.h>
#include <Poco/Util/XMLConfiguration.h>

13 14
#include <zkutil/ZooKeeperNodeCache.h>

A
Alexey Milovidov 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
using namespace Poco::XML;


static bool endsWith(const std::string & s, const std::string & suffix)
{
	return s.size() >= suffix.size() && s.substr(s.size() - suffix.size()) == suffix;
}

/// Извлекает из строки первое попавшееся число, состоящее из хотя бы двух цифр.
static std::string numberFromHost(const std::string & s)
{
	for (size_t i = 0; i < s.size(); ++i)
	{
		std::string res;
		size_t j = i;
		while (j < s.size() && isdigit(s[j]))
			res += s[j++];
		if (res.size() >= 2)
		{
			while (res[0] == '0')
				res.erase(res.begin());
			return res;
		}
	}
	return "";
}

ConfigProcessor::ConfigProcessor(bool throw_on_bad_incl_, bool log_to_console, const Substitutions & substitutions_)
43 44 45 46 47 48 49
	: throw_on_bad_incl(throw_on_bad_incl_)
	, substitutions(substitutions_)
	/// We need larger name pool to allow to support vast amount of users in users.xml files for ClickHouse.
	/// Size is prime because Poco::XML::NamePool uses bad (inefficient, low quality)
	///  hash function internally, and its size was prime by default.
	, name_pool(new Poco::XML::NamePool(65521))
	, dom_parser(name_pool)
A
Alexey Milovidov 已提交
50 51 52
{
	if (log_to_console && Logger::has("ConfigProcessor") == nullptr)
	{
53 54
		channel_ptr = new Poco::ConsoleChannel;
		log = &Logger::create("ConfigProcessor", channel_ptr.get(), Poco::Message::PRIO_TRACE);
A
Alexey Milovidov 已提交
55 56 57 58 59 60 61
	}
	else
	{
		log = &Logger::get("ConfigProcessor");
	}
}

62 63 64 65 66 67 68
ConfigProcessor::~ConfigProcessor()
{
	if (channel_ptr) /// This means we have created a new console logger in the constructor.
		Logger::destroy("ConfigProcessor");
}


A
Alexey Milovidov 已提交
69 70
/// Вектор из имени элемента и отсортированного списка имен и значений атрибутов (кроме атрибутов replace и remove).
/// Взаимно однозначно задает имя элемента и список его атрибутов. Нужен, чтобы сравнивать элементы.
71
using ElementIdentifier = std::vector<std::string>;
A
Alexey Milovidov 已提交
72

73
using NamedNodeMapPtr = Poco::AutoPtr<Poco::XML::NamedNodeMap>;
A
Alexey Milovidov 已提交
74 75
/// NOTE Можно избавиться от использования Node.childNodes() и итерации по полученному списку, потому что
///  доступ к i-му элементу этого списка работает за O(i).
76
using NodeListPtr = Poco::AutoPtr<Poco::XML::NodeList>;
A
Alexey Milovidov 已提交
77 78 79 80 81 82 83 84 85

static ElementIdentifier getElementIdentifier(Node * element)
{
	NamedNodeMapPtr attrs = element->attributes();
	std::vector<std::pair<std::string, std::string> > attrs_kv;
	for (size_t i = 0; i < attrs->length(); ++i)
	{
		Node * node = attrs->item(i);
		std::string name = node->nodeName();
86
		if (name == "replace" || name == "remove" || name == "incl" || name == "from_zk")
A
Alexey Milovidov 已提交
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
			continue;
		std::string value = node->nodeValue();
		attrs_kv.push_back(std::make_pair(name, value));
	}
	std::sort(attrs_kv.begin(), attrs_kv.end());

	ElementIdentifier res;
	res.push_back(element->nodeName());
	for (const auto & attr : attrs_kv)
	{
		res.push_back(attr.first);
		res.push_back(attr.second);
	}

	return res;
}

static Node * getRootNode(Document * document)
{
	NodeListPtr children = document->childNodes();
	for (size_t i = 0; i < children->length(); ++i)
	{
		Node * child = children->item(i);
		/// Кроме корневого элемента на верхнем уровне могут быть комментарии. Пропустим их.
		if (child->nodeType() == Node::ELEMENT_NODE)
			return child;
	}

	throw Poco::Exception("No root node in document");
}

static bool allWhitespace(const std::string & s)
{
	return s.find_first_not_of(" \t\n\r") == std::string::npos;
}

123 124 125 126 127 128 129 130
static std::string preprocessedConfigPath(const std::string & path)
{
	Poco::Path preprocessed_path(path);
	preprocessed_path.setBaseName(preprocessed_path.getBaseName() + "-preprocessed");
	return preprocessed_path.toString();
}

void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, Node * with_root)
A
Alexey Milovidov 已提交
131 132
{
	NodeListPtr with_nodes = with_root->childNodes();
133
	using ElementsByIdentifier = std::multimap<ElementIdentifier, Node *>;
A
Alexey Milovidov 已提交
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
	ElementsByIdentifier config_element_by_id;
	for (Node * node = config_root->firstChild(); node;)
	{
		Node * next_node = node->nextSibling();
		/// Уберем исходный текст из объединяемой части.
		if (node->nodeType() == Node::TEXT_NODE && !allWhitespace(node->getNodeValue()))
		{
			config_root->removeChild(node);
		}
		else if (node->nodeType() == Node::ELEMENT_NODE)
		{
			config_element_by_id.insert(ElementsByIdentifier::value_type(getElementIdentifier(node), node));
		}
		node = next_node;
	}

	for (size_t i = 0; i < with_nodes->length(); ++i)
	{
		Node * with_node = with_nodes->item(i);

		bool merged = false;
		bool remove = false;
		if (with_node->nodeType() == Node::ELEMENT_NODE)
		{
			Element * with_element = dynamic_cast<Element *>(with_node);
			remove = with_element->hasAttribute("remove");
			bool replace = with_element->hasAttribute("replace");

			if (remove && replace)
163
				throw Poco::Exception("both remove and replace attributes set for element <" + with_node->nodeName() + ">");
A
Alexey Milovidov 已提交
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196

			ElementsByIdentifier::iterator it = config_element_by_id.find(getElementIdentifier(with_node));

			if (it != config_element_by_id.end())
			{
				Node * config_node = it->second;
				config_element_by_id.erase(it);

				if (remove)
				{
					config_root->removeChild(config_node);
				}
				else if (replace)
				{
					with_element->removeAttribute("replace");
					NodePtr new_node = config->importNode(with_node, true);
					config_root->replaceChild(new_node, config_node);
				}
				else
				{
					mergeRecursive(config, config_node, with_node);
				}
				merged = true;
			}
		}
		if (!merged && !remove)
		{
			NodePtr new_node = config->importNode(with_node, true);
			config_root->appendChild(new_node);
		}
	}
}

197
void ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with)
A
Alexey Milovidov 已提交
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
{
	mergeRecursive(config, getRootNode(&*config), getRootNode(&*with));
}

std::string ConfigProcessor::layerFromHost()
{
	utsname buf;
	if (uname(&buf))
		throw Poco::Exception(std::string("uname failed: ") + std::strerror(errno));

	std::string layer = numberFromHost(buf.nodename);
	if (layer.empty())
		throw Poco::Exception(std::string("no layer in host name: ") + buf.nodename);

	return layer;
}

215 216 217 218 219 220
void ConfigProcessor::doIncludesRecursive(
		XMLDocumentPtr config,
		XMLDocumentPtr include_from,
		Node * node,
		zkutil::ZooKeeperNodeCache * zk_node_cache,
		std::unordered_set<std::string> & contributing_zk_paths)
A
Alexey Milovidov 已提交
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
{
	if (node->nodeType() == Node::TEXT_NODE)
	{
		for (auto & substitution : substitutions)
		{
			std::string value = node->nodeValue();

			bool replace_occured = false;
			size_t pos;
			while ((pos = value.find(substitution.first)) != std::string::npos)
			{
				value.replace(pos, substitution.first.length(), substitution.second);
				replace_occured = true;
			}

			if (replace_occured)
				node->setNodeValue(value);
		}
	}

	if (node->nodeType() != Node::ELEMENT_NODE)
		return;

	/// Будем заменять <layer> на число из имени хоста, только если во входном файле есть тег <layer>, и он пустой, и у него нет атрибутов
	if ( node->nodeName() == "layer" &&
		!node->hasAttributes() &&
		!node->hasChildNodes() &&
		 node->nodeValue().empty())
	{
		NodePtr new_node = config->createTextNode(layerFromHost());
		node->appendChild(new_node);
		return;
	}

	NamedNodeMapPtr attributes = node->attributes();
	Node * incl_attribute = attributes->getNamedItem("incl");
257 258 259 260
	Node * from_zk_attribute = attributes->getNamedItem("from_zk");

	if (incl_attribute && from_zk_attribute)
		throw Poco::Exception("both incl and from_zk attributes set for element <" + node->nodeName() + ">");
A
Alexey Milovidov 已提交
261 262 263 264

	/// Заменять имеющееся значение, а не добавлять к нему.
	bool replace = attributes->getNamedItem("replace");

265
	auto process_include = [&](const Node * include_attr, const std::function<Node * (const std::string &)> & get_node, const char * error_msg)
A
Alexey Milovidov 已提交
266
	{
267 268 269
		std::string name = include_attr->getNodeValue();
		Node * node_to_include = get_node(name);
		if (!node_to_include)
A
Alexey Milovidov 已提交
270 271 272 273
		{
			if (attributes->getNamedItem("optional"))
				node->parentNode()->removeChild(node);
			else if (throw_on_bad_incl)
274
				throw Poco::Exception(error_msg + name);
A
Alexey Milovidov 已提交
275
			else
276
				LOG_WARNING(log, error_msg << name);
A
Alexey Milovidov 已提交
277 278 279
		}
		else
		{
280 281 282 283 284
			Element * element = dynamic_cast<Element *>(node);

			element->removeAttribute("incl");
			element->removeAttribute("from_zk");

A
Alexey Milovidov 已提交
285
			if (replace)
286
			{
A
Alexey Milovidov 已提交
287 288 289
				while (Node * child = node->firstChild())
					node->removeChild(child);

290 291 292 293
				element->removeAttribute("replace");
			}

			NodeListPtr children = node_to_include->childNodes();
A
Alexey Milovidov 已提交
294 295 296 297 298 299
			for (size_t i = 0; i < children->length(); ++i)
			{
				NodePtr new_node = config->importNode(children->item(i), true);
				node->appendChild(new_node);
			}

300
			NamedNodeMapPtr from_attrs = node_to_include->attributes();
A
Alexey Milovidov 已提交
301 302 303 304 305
			for (size_t i = 0; i < from_attrs->length(); ++i)
			{
				element->setAttributeNode(dynamic_cast<Attr *>(config->importNode(from_attrs->item(i), true)));
			}
		}
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
	};

	auto get_incl_node = [&](const std::string & name)
	{
		return include_from ? include_from->getNodeByPath("yandex/" + name) : nullptr;
	};
	if (incl_attribute)
		process_include(incl_attribute, get_incl_node, "Include not found: ");

	if (from_zk_attribute)
	{
		contributing_zk_paths.insert(from_zk_attribute->getNodeValue());

		if (zk_node_cache)
		{
			XMLDocumentPtr zk_document;
			auto get_zk_node = [&](const std::string & name) -> Node *
			{
				std::experimental::optional<std::string> contents = zk_node_cache->get(name);
				if (!contents)
					return nullptr;

				/// Enclose contents into a fake <from_zk> tag to allow pure text substitutions.
				zk_document = dom_parser.parseString("<from_zk>" + contents.value() + "</from_zk>");
				return getRootNode(zk_document.get());
			};

			process_include(from_zk_attribute, get_zk_node, "Could not get ZooKeeper node: ");
		}
A
Alexey Milovidov 已提交
335 336 337 338 339
	}

	NodeListPtr children = node->childNodes();
	for (size_t i = 0; i < children->length(); ++i)
	{
340
		doIncludesRecursive(config, include_from, children->item(i), zk_node_cache, contributing_zk_paths);
A
Alexey Milovidov 已提交
341 342 343
	}
}

344
ConfigProcessor::Files ConfigProcessor::getConfigMergeFiles(const std::string & config_path)
A
Alexey Milovidov 已提交
345
{
346
	Files res;
A
Alexey Milovidov 已提交
347

348
	Poco::Path merge_dir_path(config_path);
A
Alexey Milovidov 已提交
349
	merge_dir_path.setExtension("d");
350

A
Alexey Milovidov 已提交
351 352
	std::vector<std::string> merge_dirs;
	merge_dirs.push_back(merge_dir_path.toString());
353
	if (merge_dir_path.getBaseName() != "conf")	{
A
Alexey Milovidov 已提交
354 355 356
		merge_dir_path.setBaseName("conf");
		merge_dirs.push_back(merge_dir_path.toString());
	}
357

A
Alexey Milovidov 已提交
358 359 360 361 362 363 364 365
	for (const std::string & merge_dir_name : merge_dirs)
	{
		Poco::File merge_dir(merge_dir_name);
		if (!merge_dir.exists() || !merge_dir.isDirectory())
			continue;
		for (Poco::DirectoryIterator it(merge_dir_name); it != Poco::DirectoryIterator(); ++it)
		{
			Poco::File & file = *it;
366
			if (file.isFile() && (endsWith(file.path(), ".xml") || endsWith(file.path(), ".conf")))
A
Alexey Milovidov 已提交
367
			{
368
				res.push_back(file.path());
A
Alexey Milovidov 已提交
369 370 371 372
			}
		}
	}

373 374 375
	return res;
}

376 377 378 379
XMLDocumentPtr ConfigProcessor::processConfig(
		const std::string & path_str,
		bool * has_zk_includes,
		zkutil::ZooKeeperNodeCache * zk_node_cache)
380
{
381
	XMLDocumentPtr config = dom_parser.parse(path_str);
382 383 384 385 386 387 388 389

	std::vector<std::string> contributing_files;
	contributing_files.push_back(path_str);

	for (auto & merge_file : getConfigMergeFiles(path_str))
	{
		try
		{
390
			XMLDocumentPtr with = dom_parser.parse(merge_file);
391 392 393 394 395 396 397 398 399
			merge(config, with);
			contributing_files.push_back(merge_file);
		}
		catch (Poco::Exception & e)
		{
			throw Poco::Exception("Failed to merge config with " + merge_file + ": " + e.displayText());
		}
	}

400
	std::unordered_set<std::string> contributing_zk_paths;
A
Alexey Milovidov 已提交
401 402 403
	try
	{
		Node * node = config->getNodeByPath("yandex/include_from");
404
		XMLDocumentPtr include_from;
A
Alexey Milovidov 已提交
405 406 407 408 409 410 411 412 413 414 415 416 417 418
		std::string include_from_path;
		if (node)
		{
			include_from_path = node->innerText();
		}
		else
		{
			std::string default_path = "/etc/metrika.xml";
			if (Poco::File(default_path).exists())
				include_from_path = default_path;
		}
		if (!include_from_path.empty())
		{
			contributing_files.push_back(include_from_path);
419
			include_from = dom_parser.parse(include_from_path);
A
Alexey Milovidov 已提交
420 421
		}

422
		doIncludesRecursive(config, include_from, getRootNode(config.get()), zk_node_cache, contributing_zk_paths);
A
Alexey Milovidov 已提交
423 424 425
	}
	catch (Poco::Exception & e)
	{
426
		throw Poco::Exception("Failed to preprocess config `" + path_str + "': " + e.displayText(), e);
A
Alexey Milovidov 已提交
427 428
	}

429 430 431
	if (has_zk_includes)
		*has_zk_includes = !contributing_zk_paths.empty();

A
Alexey Milovidov 已提交
432 433 434 435 436 437 438 439
	std::stringstream comment;
	comment <<     " This file was generated automatically.\n";
	comment << "     Do not edit it: it is likely to be discarded and generated again before it's read next time.\n";
	comment << "     Files used to generate this file:";
	for (const std::string & path : contributing_files)
	{
		comment << "\n       " << path;
	}
440 441 442 443 444 445 446 447
	if (zk_node_cache && !contributing_zk_paths.empty())
	{
		comment << "\n     ZooKeeper nodes used to generate this file:";
		for (const std::string & path : contributing_zk_paths)
			comment << "\n       " << path;
	}

	comment << "      ";
A
Alexey Milovidov 已提交
448 449 450 451 452 453 454 455
	NodePtr new_node = config->createTextNode("\n\n");
	config->insertBefore(new_node, config->firstChild());
	new_node = config->createComment(comment.str());
	config->insertBefore(new_node, config->firstChild());

	return config;
}

456
ConfigProcessor::LoadedConfig ConfigProcessor::loadConfig(const std::string & path, bool allow_zk_includes)
A
Alexey Milovidov 已提交
457
{
458 459
	bool has_zk_includes;
	XMLDocumentPtr config_xml = processConfig(path, &has_zk_includes);
A
Alexey Milovidov 已提交
460

461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
	if (has_zk_includes && !allow_zk_includes)
		throw Poco::Exception("Error while loading config `" + path + "': from_zk includes are not allowed!");

	bool preprocessed_written = false;
	if (!has_zk_includes)
	{
		savePreprocessedConfig(config_xml, preprocessedConfigPath(path));
		preprocessed_written = true;
	}

	ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(config_xml));

	return LoadedConfig{configuration, has_zk_includes, /* loaded_from_preprocessed = */ false, preprocessed_written};
}

ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes(
		const std::string & path,
		zkutil::ZooKeeperNodeCache & zk_node_cache,
		bool fallback_to_preprocessed)
{
	std::string preprocessed_path = preprocessedConfigPath(path);

	XMLDocumentPtr config_xml;
	bool has_zk_includes;
	bool processed_successfully = false;
A
Alexey Milovidov 已提交
486 487
	try
	{
488 489
		config_xml = processConfig(path, &has_zk_includes, &zk_node_cache);
		processed_successfully = true;
A
Alexey Milovidov 已提交
490
	}
491
	catch (const Poco::Exception & ex)
A
Alexey Milovidov 已提交
492
	{
493 494 495 496 497 498 499 500 501 502 503 504 505
		if (!fallback_to_preprocessed)
			throw;

		const auto * zk_exception = dynamic_cast<const zkutil::KeeperException *>(ex.nested());
		if (!zk_exception)
			throw;

		LOG_WARNING(
				log,
				"Error while processing from_zk config includes: " + zk_exception->message() +
				". Config will be loaded from preprocessed file: " + preprocessed_path);

		config_xml = dom_parser.parse(preprocessed_path);
A
Alexey Milovidov 已提交
506 507
	}

508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
	if (processed_successfully)
		savePreprocessedConfig(config_xml, preprocessed_path);

	ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(config_xml));

	return LoadedConfig{configuration, has_zk_includes, !processed_successfully, processed_successfully};
}

void ConfigProcessor::savePreprocessedConfig(const XMLDocumentPtr & config, const std::string & preprocessed_path)
{
	try
	{
		DOMWriter().writeNode(preprocessed_path, config);
	}
	catch (Poco::Exception & e)
	{
		LOG_WARNING(log, "Couldn't save preprocessed config to " << preprocessed_path << ": " << e.displayText());
	}
A
Alexey Milovidov 已提交
526
}