From 103df26f7f10eef741dfd1724ff84e78eb9f1a52 Mon Sep 17 00:00:00 2001 From: Chris Hajas Date: Tue, 11 Apr 2017 09:43:35 -0700 Subject: [PATCH] Refactor schema filtering in gprestore_filter. (#2188) Move most of the process_schema logic into functions, clean up dead code, and cover the process_schema function with unit tests. Signed-off-by: Chris Hajas --- .../test/unit/test_unit_gprestore_filter.py | 999 ++++++++++++------ gpMgmt/bin/gprestore_filter.py | 411 +++---- 2 files changed, 908 insertions(+), 502 deletions(-) diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_gprestore_filter.py b/gpMgmt/bin/gppylib/test/unit/test_unit_gprestore_filter.py index f9fdf97904..bae4d340f0 100755 --- a/gpMgmt/bin/gppylib/test/unit/test_unit_gprestore_filter.py +++ b/gpMgmt/bin/gppylib/test/unit/test_unit_gprestore_filter.py @@ -8,11 +8,636 @@ from mock import patch from gppylib.mainUtils import ExceptionNoStackTraceNeeded from gprestore_filter import get_table_schema_set, extract_schema, extract_table, \ process_data, get_table_info, process_schema, check_valid_schema, check_valid_relname, \ - check_dropped_table, get_table_from_alter_table + check_dropped_table, get_table_from_alter_table, process_line, Arguments, ParserState logger = gplog.get_unittest_logger() +class GpRestoreFilterProcessLineTestCase(unittest.TestCase): + + def test_begin_block(self): + arguments = Arguments(set(['schema']), set([('schema', 'table')])) + state = ParserState() + input_line = "BEGIN" + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.in_block) + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + + def test_end_block(self): + arguments = Arguments(set(['schema']), set([('schema', 'table')])) + state = ParserState() + state.in_block = True + input_line = "END" + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.in_block) + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + + def test_within_block(self): + arguments = Arguments(set(['schema']), set([('schema', 'table')])) + state = ParserState() + state.in_block = True + input_line = "SOMETHING" + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.in_block) + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + + def test_set_search_path_for_pg_catalog(self): + arguments = Arguments(set(['schema']), set([('schema', 'table')])) + state = ParserState() + input_line = "SET search_path = pg_catalog;" + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(newState.schema, 'pg_catalog') + self.assertEquals(newState.cast_func_schema, None) + self.assertEquals(line, input_line) + + def test_set_search_path_schema_in_table_file(self): + arguments = Arguments(set(['schemaIcareAbout']), set([('schemaIcareAbout', 'table')])) + state = ParserState() + input_line = "SET search_path = schemaIcareAbout, pg_catalog;" + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(newState.schema, 'schemaIcareAbout') + self.assertEquals(newState.cast_func_schema, 'schemaIcareAbout') + self.assertEquals(line, input_line) + + def test_set_search_path_schema_in_schema_file(self): + arguments = Arguments() + arguments.schemas_in_schema_file = ['schemaIcareAbout'] + state = ParserState() + input_line = "SET search_path = schemaIcareAbout, pg_catalog;" + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(newState.schema, 'schemaIcareAbout') + self.assertEquals(newState.cast_func_schema, 'schemaIcareAbout') + self.assertEquals(line, input_line) + + def test_set_search_path_change_schema_in_table_file(self): + arguments = Arguments(set(['schemaIcareAbout']), set([('schemaIcareAbout', 'table')])) + arguments.change_schema_name = 'newSchema' + state = ParserState() + input_line = "SET search_path = schemaIcareAbout, pg_catalog;" + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(newState.schema, 'schemaIcareAbout') + self.assertEquals(newState.cast_func_schema, 'schemaIcareAbout') + self.assertEquals(line, 'SET search_path = "newSchema", pg_catalog;') + + def test_set_search_path_change_quoted_schema_in_table_file_(self): + arguments = Arguments(set(['schemaIcareAbout']), set([('schemaIcareAbout', 'table')])) + arguments.change_schema_name = 'newSchema' + state = ParserState() + input_line = 'SET search_path = "schemaIcareAbout", pg_catalog;' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(newState.schema, 'schemaIcareAbout') + self.assertEquals(newState.cast_func_schema, 'schemaIcareAbout') + self.assertEquals(line, 'SET search_path = "newSchema", pg_catalog;') + + def test_set_search_path_ignores_unineresting_schemas(self): + arguments = Arguments(set(['schemaIcareAbout']), set([('schemaIcareAbout', 'table')])) + state = ParserState() + input_line = "SET search_path = someOtherSchema, pg_catalog;" + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(newState.schema, 'someOtherSchema') + self.assertEquals(newState.cast_func_schema, None) + self.assertEquals(line, input_line) + + def test_set_assignment_outputs(self): + arguments = Arguments(set(['schema']), set([('schema', 'table')])) + state = ParserState() + input_line = "SET SOMETHING=SOMETHING" + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + + def test_drop_table_statement_drop_schema_section_passed(self): + arguments = Arguments(set(['schema']), set([('schema', 'table')])) + state = ParserState() + state.passedDropSchemaSection=True + input_line = "DROP TABLE mytable" + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + + def test_drop_external_table_statement_drop_schema_section_passed(self): + arguments = Arguments(set(['schema']), set([('schema', 'table')])) + state = ParserState() + state.passedDropSchemaSection=True + input_line = "DROP EXTERNAL TABLE mytable" + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + + def test_drop_table_statement_drop_schema_section_passed(self): + arguments = Arguments(set(['schema']), set([('schema', 'table')])) + state = ParserState() + state.passedDropSchemaSection=True + input_line = "DROP EXTERNAL TABLE mytable" + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + + def test_drop_not_table(self): + arguments = Arguments(set(['schema']), set([('schema', 'table')])) + state = ParserState() + state.passedDropSchemaSection=True + input_line = "DROP SOME_RANDOM_THING" + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + + def test_cast_function_schema_with_function(self): + arguments = Arguments() + arguments.change_schema_name = 'newSchema' + state = ParserState() + state.change_cast_func_schema = True + state.cast_func_schema = 'schemaToReplace' + input_line = "CREATE CAST castName WITH FUNCTION schemaToReplace.castToInt(text) AS ASSIGNMENT" + + newState, line = process_line(state, input_line, arguments) + + output_line = 'CREATE CAST castName WITH FUNCTION "newSchema".castToInt(text) AS ASSIGNMENT' + self.assertFalse(newState.output) + self.assertEquals(line, output_line) + self.assertFalse(newState.change_cast_func_schema) + self.assertEquals(newState.cast_func_schema, None) + + def test_cast_function_schema_with_function_with_quotes(self): + arguments = Arguments() + arguments.change_schema_name = 'newSchema' + state = ParserState() + state.change_cast_func_schema = True + state.cast_func_schema = 'schemaToReplace' + input_line = 'CREATE CAST castName WITH FUNCTION "schemaToReplace".castToInt(text) AS ASSIGNMENT' + + newState, line = process_line(state, input_line, arguments) + + output_line = 'CREATE CAST castName WITH FUNCTION "newSchema".castToInt(text) AS ASSIGNMENT' + self.assertFalse(newState.output) + self.assertEquals(line, output_line) + self.assertFalse(newState.change_cast_func_schema) + self.assertEquals(newState.cast_func_schema, None) + + def test_schema_expression_in_comments_exists_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: schemaICareAbout; Type: SCHEMA; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_schema_expression_in_comments_exists_in_schema_file(self): + arguments = Arguments() + arguments.schemas_in_schema_file = 'schemaICareAbout' + state = ParserState() + input_line = '-- Name: schemaICareAbout; Type: SCHEMA; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_schema_expression_in_comments_does_not_exist(self): + arguments = Arguments() + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: schemaICareAbout; Type: SCHEMA; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_non_existent_type_expression_in_comments(self): + arguments = Arguments() + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: arbitrary; Type: RANDOM; Schema: schema; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertEqual(newState.schema, 'schema') + + def test_table_expression_in_comments_exists_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'some_table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_table; Type: TABLE; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertFalse(newState.further_investigation_required) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_table_expression_in_comments_does_not_exist_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'some_other_table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_table; Type: TABLE; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertFalse(newState.further_investigation_required) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_external_table_expression_in_comments_exists_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'some_table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_table; Type: EXTERNAL TABLE; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertFalse(newState.further_investigation_required) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_external_table_expression_in_comments_does_not_exist_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'some_other_external_table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_external_table; Type: EXTERNAL TABLE; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertFalse(newState.further_investigation_required) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_view_expression_in_comments_exists_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'some_table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_table; Type: VIEW; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertFalse(newState.further_investigation_required) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_view_expression_in_comments_does_not_exist_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'some_other_view')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_view; Type: VIEW; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertFalse(newState.further_investigation_required) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_sequence_expression_in_comments_exists_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'some_table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_table; Type: SEQUENCE; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertFalse(newState.further_investigation_required) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_sequence_expression_in_comments_does_not_exist_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'some_other_sequence')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_sequence; Type: SEQUENCE; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertFalse(newState.further_investigation_required) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_constraint_expression_in_comments_exists_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_constraint; Type: CONSTRAINT; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertEquals(state.line_buff, input_line) + self.assertFalse(newState.function_ddl) + self.assertTrue(newState.further_investigation_required) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_constraint_expression_in_comments_schema_does_not_exist_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_constraint; Type: CONSTRAINT; Schema: other_schema; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertEquals(state.line_buff, '') + self.assertFalse(newState.function_ddl) + self.assertTrue(newState.further_investigation_required) + self.assertEqual(newState.schema, 'other_schema') + + def test_ACL_expression_in_comments_exists_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'some_ACL')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_ACL; Type: ACL; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_ACL_expression_in_comments_schema_does_not_exist_in_table_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: public; Type: ACL; Schema: other_schema; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.function_ddl) + self.assertEqual(newState.schema, 'other_schema') + + def test_function_expression_in_comments_exists_in_schema_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_function; Type: FUNCTION; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertTrue(newState.function_ddl) + self.assertEqual(newState.schema, 'schemaICareAbout') + + def test_function_expression_in_comments_schema_does_not_exist_in_schema_file(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_function; Type: FUNCTION; Schema: other_schema; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertTrue(newState.function_ddl) + self.assertEqual(newState.schema, 'other_schema') + + def test_cast_expression_in_comments(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_cast; Type: CAST; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertTrue(newState.change_cast_func_schema) + + def test_procedural_language_expression_in_comments(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'table')])) + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Name: some_proc_lang; Type: PROCEDURAL LANGUAGE; Schema: schemaICareAbout; Owner: user_role_b; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertTrue(newState.change_cast_func_schema) + + def test_data_expression_in_comments_type_a_without_table_data(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'table')])) + state = ParserState() + input_line = '-- Data: ao_part_table; Type: SOMETHING; Schema: some_other_schema; Owner: dcddev; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertEquals(state.schema, 'some_other_schema') + self.assertFalse(newState.further_investigation_required) + + def test_data_expression_in_comments_type_b_without_table_data(self): + arguments = Arguments(set(['schemaICareAbout']), set([('schemaICareAbout', 'table')])) + state = ParserState() + input_line = '-- Data for Name: ao_table; Type: SOMETHING; Schema: some_other_schema; Owner: dcddev' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertEquals(state.schema, 'some_other_schema') + self.assertFalse(newState.further_investigation_required) + + def test_data_expression_in_comments_type_a_with_table_data_in_schema_file(self): + arguments = Arguments() + arguments.schemas_in_schema_file = ['schemaICareAbout'] + state = ParserState() + input_line = '-- Data: ao_part_table; Type: TABLE DATA; Schema: schemaICareAbout; Owner: dcddev; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertEquals(state.schema, 'schemaICareAbout') + self.assertFalse(newState.further_investigation_required) + + def test_data_expression_in_comments_type_a_with_table_data_not_in_schema_file(self): + arguments = Arguments() + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Data: ao_part_table; Type: TABLE DATA; Schema: some_other_schema; Owner: dcddev; Tablespace:' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertEquals(state.schema, 'some_other_schema') + self.assertFalse(newState.further_investigation_required) + + def test_data_expression_in_comments_type_b_with_table_data_in_schema_file(self): + arguments = Arguments() + arguments.schemas_in_schema_file = ['schemaICareAbout'] + state = ParserState() + input_line = '-- Data for Name: ao_table; Type: TABLE DATA; Schema: schemaICareAbout; Owner: dcddev' + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertEquals(state.schema, 'schemaICareAbout') + self.assertFalse(newState.further_investigation_required) + + def test_data_expression_in_comments_type_b_with_table_data_not_in_schema_file(self): + arguments = Arguments() + arguments.schemas_in_schema_file = None + state = ParserState() + input_line = '-- Data for Name: ao_table; Type: TABLE DATA; Schema: some_other_schema; Owner: dcddev' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertEquals(state.schema, 'some_other_schema') + self.assertFalse(newState.further_investigation_required) + + def test_further_investigation_without_alter_expression(self): + arguments = Arguments() + state = ParserState() + state.further_investigation_required = True + input_line = 'RANDOM ARBITRARY EXPRESSION' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertTrue(newState.further_investigation_required) + + def test_further_investigation_with_alter_table_expression_not_in_schema_file(self): + arguments = Arguments() + arguments.schemas_in_schema_file = None + state = ParserState() + state.further_investigation_required = True + input_line = 'ALTER TABLE schema1.table1 OWNER TO gpadmin;' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.further_investigation_required) + + def test_further_investigation_with_alter_table_only_expression_not_in_schema_file(self): + arguments = Arguments() + arguments.schemas_in_schema_file = None + state = ParserState() + state.further_investigation_required = True + state.schema = 'SchemaICareAbout' + input_line = 'ALTER TABLE ONLY schema.table1 OWNER TO gpadmin;' + + newState, line = process_line(state, input_line, arguments) + + self.assertFalse(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.further_investigation_required) + + def test_further_investigation_with_alter_table_expression_in_schema_file(self): + arguments = Arguments() + arguments.schemas_in_schema_file = ['SchemaICareAbout'] + state = ParserState() + state.schema = 'SchemaICareAbout' + state.further_investigation_required = True + input_line = 'ALTER TABLE SchemaICareAbout.table1 OWNER TO gpadmin;' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.further_investigation_required) + + def test_further_investigation_with_alter_table_expression_in_schema_file_with_line_buff(self): + arguments = Arguments() + arguments.schemas_in_schema_file = ['SchemaICareAbout'] + state = ParserState() + state.schema = 'SchemaICareAbout' + state.line_buff = 'some previously saved text' + state.further_investigation_required = True + input_line = 'ALTER TABLE SchemaICareAbout.table1 OWNER TO gpadmin;' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, 'some previously saved text' + input_line) + self.assertFalse(newState.further_investigation_required) + + def test_further_investigation_with_alter_table_expression_in_schema_file_with_quotes(self): + arguments = Arguments() + arguments.schemas_in_schema_file = ['SchemaICareAbout'] + state = ParserState() + state.schema = 'SchemaICareAbout' + state.further_investigation_required = True + input_line = 'ALTER TABLE SchemaICareAbout."""table1""" OWNER TO gpadmin;' + + newState, line = process_line(state, input_line, arguments) + + self.assertTrue(newState.output) + self.assertEquals(line, input_line) + self.assertFalse(newState.further_investigation_required) + class GpRestoreFilterTestCase(unittest.TestCase): def test_get_table_schema_set00(self): @@ -170,9 +795,11 @@ COPY ao_table (column1, column2, column3) FROM stdin; dump_schemas = set(['pepper']) dump_tables = set([('pepper', 'ao_table')]) + arguments = Arguments(dump_schemas, dump_tables) + with open(out_name, 'w') as fdout: with open(in_name, 'r') as fdin: - process_data(dump_schemas, dump_tables, fdin, fdout, None) + process_data(arguments, fdin, fdout) with open(out_name, 'r') as fd: results = fd.read() @@ -339,9 +966,10 @@ COPY ao_part_table_comp_1_prt_p1_2_prt_1 (column1, column2, column3) FROM stdin; dump_schemas = set(['public']) dump_tables = set([('public', 'ao_part_table_comp_1_prt_p1_2_prt_1'), ('public', 'ao_part_table_1_prt_p1_2_prt_1')]) + arguments = Arguments(dump_schemas, dump_tables) with open(out_name, 'w') as fdout: with open(in_name, 'r') as fdin: - process_data(dump_schemas, dump_tables, fdin, fdout, None) + process_data(arguments, fdin, fdout) with open(out_name, 'r') as fd: results = fd.read() @@ -372,9 +1000,10 @@ COPY ao_table (column1, column2, column3) FROM stdin; dump_schemas = set(['public']) dump_tables = set([('public', 'ao_table')]) + arguments = Arguments(dump_schemas, dump_tables) with open(out_name, 'w') as fdout: with open(in_name, 'r') as fdin: - process_data(dump_schemas, dump_tables, fdin, fdout, None) + process_data(arguments, fdin, fdout) with open(out_name, 'r') as fd: results = fd.read() @@ -422,6 +1051,7 @@ SET search_path = pepper, pg_catalog; dump_schemas = set(['pepper']) dump_tables = set([('pepper', 'ao_table')]) + arguments = Arguments(dump_schemas, dump_tables) with open(out_name, 'w') as fdout: with open(in_name, 'r') as fdin: process_data(dump_schemas, dump_tables, fdin, fdout) @@ -471,9 +1101,10 @@ SET search_path = pepper, pg_catalog; dump_schemas = set(['pepper']) dump_tables = set([('pepper', 'ao_table')]) + arguments = Arguments(dump_schemas, dump_tables) with open(out_name, 'w') as fdout: with open(in_name, 'r') as fdin: - process_data(dump_schemas, dump_tables, fdin, fdout, None) + process_data(arguments, fdin, fdout) with open(out_name, 'r') as fd: results = fd.read() @@ -516,9 +1147,10 @@ COPY "测试" (column1, column2, column3) FROM stdin; dump_schemas = set(['public']) dump_tables = set([('public', '测试')]) + arguments = Arguments(dump_schemas, dump_tables) with open(out_name, 'w') as fdout: with open(in_name, 'r') as fdin: - process_data(dump_schemas, dump_tables, fdin, fdout, None) + process_data(arguments, fdin, fdout) with open(out_name, 'r') as fd: results = fd.read() @@ -580,6 +1212,7 @@ CREATE TABLE heap_table1 ( dump_schemas = ['public'] dump_tables = [('public', 'heap_table1')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -588,7 +1221,7 @@ CREATE TABLE heap_table1 ( with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -659,6 +1292,7 @@ CREATE TABLE heap_table ( dump_schemas = ['public'] dump_tables = [('public', 'heap_table')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -667,7 +1301,7 @@ CREATE TABLE heap_table ( with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -740,6 +1374,7 @@ CREATE TABLE heap_table1 ( dump_schemas = ['public'] dump_tables = [('pepper', 'heap_table1')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -748,7 +1383,7 @@ CREATE TABLE heap_table1 ( with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -813,6 +1448,7 @@ CREATE TABLE heap_table1 ( dump_schemas = ['public'] dump_tables = [('public', 'heap_table1'), ('pepper','ao_part_table')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -821,7 +1457,7 @@ CREATE TABLE heap_table1 ( with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -894,6 +1530,7 @@ CREATE TABLE heap_table1 ( dump_schemas = ['public'] dump_tables = [('public', 'heap_table1'), ('pepper','ao_part_table')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -902,7 +1539,7 @@ CREATE TABLE heap_table1 ( with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -978,6 +1615,7 @@ CREATE TABLE heap_table1 ( dump_schemas = ['public'] dump_tables = [('public', 'heap_table1'), ('public','ao_part_table')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -986,7 +1624,7 @@ CREATE TABLE heap_table1 ( with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -1067,6 +1705,7 @@ CREATE TABLE heap_table1 ( dump_schemas = ['public'] dump_tables = [('public', 'heap_table1')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -1075,7 +1714,7 @@ CREATE TABLE heap_table1 ( with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -1155,6 +1794,7 @@ COPY ao_part_table from stdin; dump_schemas = ['public'] dump_tables = [('public', 'ao_part_table')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -1163,7 +1803,7 @@ COPY ao_part_table from stdin; with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -1252,6 +1892,7 @@ COPY ao_part_table from stdin; dump_schemas = ['public'] dump_tables = [('public', 'ao_part_table')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -1260,7 +1901,7 @@ COPY ao_part_table from stdin; with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -1345,6 +1986,7 @@ CREATE TABLE ao_part_table ( dump_schemas = ['public'] dump_tables = [('public', 'ao_part_table')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -1353,7 +1995,7 @@ CREATE TABLE ao_part_table ( with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -1486,13 +2128,16 @@ ALTER TABLE s1.id_seq OWNER TO gpadmin;""" in_name = '/tmp/infile' out_name = '/tmp/outfile' + arguments = Arguments() + arguments.schemas_in_schema_file=['s1'] + with open(in_name, 'w') as fd: fd.write(test_case_buf) - schema_level_restore_list=['s1'] + schemas_in_schema_file=['s1'] with open(out_name, 'w') as fdout: with open(in_name, 'r') as fdin: - process_schema(None, None, fdin, fdout, schema_level_restore_list=['s1']) + process_schema(arguments, fdin, fdout) with open(out_name, 'r') as fd: results = fd.read() @@ -1621,6 +2266,7 @@ CREATE TABLE ao_part_table ( dump_schemas = [] dump_tables = [] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -1629,7 +2275,7 @@ CREATE TABLE ao_part_table ( with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -1696,6 +2342,7 @@ CREATE TABLE ao_part_table ( dump_schemas = ['no_match_schema'] dump_tables = [('no_match_schema', 'no_match_table')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -1704,7 +2351,7 @@ CREATE TABLE ao_part_table ( with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -1774,6 +2421,7 @@ CREATE TABLE ao_part_table ( dump_schemas = ['public'] dump_tables = [('public', 'ao_part_table')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -1782,7 +2430,7 @@ CREATE TABLE ao_part_table ( with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -1861,297 +2509,6 @@ CREATE TABLE ao_part_table ( output = check_valid_relname(schema, name, dump_tables) self.assertEquals(output, False) - def test_process_schema_function_drop_table(self): - test_case_buf = """-- --- Greenplum Database database dump --- - -SET statement_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = off; -SET check_function_bodies = false; -SET client_min_messages = warning; -SET escape_string_warning = off; -DROP TABLE public.heap_table; -DROP TABLE public.ao_part_table; -DROP PROCEDURAL LANGUAGE plpgsql; -DROP SCHEMA public; - -SET default_with_oids = false; - --- --- Name: SCHEMA public; Type: COMMENT; Schema: -; Owner: dcddev --- - -COMMENT ON SCHEMA public IS 'Standard public schema'; - -SET search_path = some_schema, pg_catalog; - --- --- Name: ao_part_table_constraint; Type: CONSTRAINT; Schema: some_schema; Owner: dcddev; Tablespace: --- - -ALTER TABLE ONLY some_schema.ao_part_table - ADD CONSTRAINT constraint_name PRIMARY KEY (name); - - -SET search_path = public, pg_catalog; - -SET default_tablespace = ''; - --- --- Name: ao_part_table; Type: TABLE; Schema: public; Owner: dcddev; Tablespace: --- - -CREATE TABLE ao_part_table ( - column1 integer, - column2 character varying(20), - column3 date -) DISTRIBUTED BY (column1); with (appendonly=true)""" - - dump_schemas = ['public'] - dump_tables = [('public', 'ao_part_table')] - - infile = '/tmp/test_schema.in' - outfile = '/tmp/test_schema.out' - with open(infile, 'w') as fd: - fd.write(test_case_buf) - - with open(infile, 'r') as fdin: - with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) - - expected_out = """SET statement_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = off; -SET check_function_bodies = false; -SET client_min_messages = warning; -SET escape_string_warning = off; -DROP TABLE public.ao_part_table; -SET default_with_oids = false; - --- -SET search_path = public, pg_catalog; - -SET default_tablespace = ''; - --- --- Name: ao_part_table; Type: TABLE; Schema: public; Owner: dcddev; Tablespace: --- - -CREATE TABLE ao_part_table ( - column1 integer, - column2 character varying(20), - column3 date -) DISTRIBUTED BY (column1); with (appendonly=true)""" - - - with open(outfile, 'r') as fd: - results = fd.read() - self.assertEquals(results, expected_out) - - - def test_process_schema_user_function_having_drop_table(self): - test_case_buf = """-- --- Greenplum Database database dump --- - -SET statement_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = off; -SET check_function_bodies = false; -SET client_min_messages = warning; -SET escape_string_warning = off; -DROP TABLE public.heap_table; -DROP TABLE public.ao_part_table; -DROP PROCEDURAL LANGUAGE plpgsql; -DROP SCHEMA public; - -SET default_with_oids = false; - --- --- Name: SCHEMA public; Type: COMMENT; Schema: -; Owner: dcddev --- - -COMMENT ON SCHEMA public IS 'Standard public schema'; - -SET search_path = some_schema, pg_catalog; - --- --- Name: ao_part_table_constraint; Type: CONSTRAINT; Schema: some_schema; Owner: dcddev; Tablespace: --- - -ALTER TABLE ONLY some_schema.ao_part_table - ADD CONSTRAINT constraint_name PRIMARY KEY (name); - - -SET search_path = public, pg_catalog; - -SET default_tablespace = ''; - --- --- Name: ao_part_table; Type: TABLE; Schema: public; Owner: dcddev; Tablespace: --- - -CREATE TABLE ao_part_table ( - column1 integer, - column2 character varying(20), - column3 date -) DISTRIBUTED BY (column1); with (appendonly=true) - -SET search_path = foo, pg_catalog; - ---- ---- Name: foofunc(); Type: FUNCTION; Schema: foo; Owner: foo ---- - -CREATE OR REPLACE FUNCTION foofunc() -RETURNS TEXT AS $$ -DECLARE ver TEXT; -BEGIN -DROP TABLE IF EXISTS footab; -SELECT version() INTO ver; -RETURN ver; -END; -$$ LANGUAGE plpgsql;""" - - dump_schemas = ['public'] - dump_tables = [('public', 'ao_part_table')] - - infile = '/tmp/test_schema.in' - outfile = '/tmp/test_schema.out' - with open(infile, 'w') as fd: - fd.write(test_case_buf) - - with open(infile, 'r') as fdin: - with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) - - expected_out = """SET statement_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = off; -SET check_function_bodies = false; -SET client_min_messages = warning; -SET escape_string_warning = off; -DROP TABLE public.ao_part_table; -SET default_with_oids = false; - --- -SET search_path = public, pg_catalog; - -SET default_tablespace = ''; - --- --- Name: ao_part_table; Type: TABLE; Schema: public; Owner: dcddev; Tablespace: --- - -CREATE TABLE ao_part_table ( - column1 integer, - column2 character varying(20), - column3 date -) DISTRIBUTED BY (column1); with (appendonly=true) - -BEGIN -DROP TABLE IF EXISTS footab; -SELECT version() INTO ver; -RETURN ver; -END; -$$ LANGUAGE plpgsql;""" - - with open(outfile, 'r') as fd: - results = fd.read().strip() - self.assertEquals(results, expected_out) - - def test_process_schema_function_drop_external_table(self): - test_case_buf = """-- --- Greenplum Database database dump --- - -SET statement_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = off; -SET check_function_bodies = false; -SET client_min_messages = warning; -SET escape_string_warning = off; -DROP TABLE public.heap_table; -DROP EXTERNAL TABLE public.ao_part_table; -DROP PROCEDURAL LANGUAGE plpgsql; -DROP SCHEMA public; - -SET default_with_oids = false; - --- --- Name: SCHEMA public; Type: COMMENT; Schema: -; Owner: dcddev --- - -COMMENT ON SCHEMA public IS 'Standard public schema'; - -SET search_path = some_schema, pg_catalog; - --- --- Name: ao_part_table_constraint; Type: CONSTRAINT; Schema: some_schema; Owner: dcddev; Tablespace: --- - -ALTER TABLE ONLY some_schema.ao_part_table - ADD CONSTRAINT constraint_name PRIMARY KEY (name); - - -SET search_path = public, pg_catalog; - -SET default_tablespace = ''; - --- --- Name: ao_part_table; Type: EXTERNAL TABLE; Schema: public; Owner: dcddev; Tablespace: --- - -CREATE TABLE ao_part_table ( - column1 integer, - column2 character varying(20), - column3 date -) DISTRIBUTED BY (column1); with (appendonly=true)""" - - dump_schemas = ['public'] - dump_tables = [('public', 'ao_part_table')] - - infile = '/tmp/test_schema.in' - outfile = '/tmp/test_schema.out' - with open(infile, 'w') as fd: - fd.write(test_case_buf) - - with open(infile, 'r') as fdin: - with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) - - expected_out = """SET statement_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = off; -SET check_function_bodies = false; -SET client_min_messages = warning; -SET escape_string_warning = off; -DROP EXTERNAL TABLE public.ao_part_table; -SET default_with_oids = false; - --- -SET search_path = public, pg_catalog; - -SET default_tablespace = ''; - --- --- Name: ao_part_table; Type: EXTERNAL TABLE; Schema: public; Owner: dcddev; Tablespace: --- - -CREATE TABLE ao_part_table ( - column1 integer, - column2 character varying(20), - column3 date -) DISTRIBUTED BY (column1); with (appendonly=true)""" - - - with open(outfile, 'r') as fd: - results = fd.read() - self.assertEquals(results, expected_out) - - def test_process_schema_single_table(self): test_case_buf = """-- -- Greenplum Database database dump @@ -2390,6 +2747,7 @@ GRANT ALL ON TABLE user_table TO user_role_b; dump_schemas = ['user_schema_a', 'user_schema_e'] dump_tables = [('user_schema_a', 'user_table'), ('user_schema_e', 'test_table')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -2398,7 +2756,7 @@ GRANT ALL ON TABLE user_table TO user_role_b; with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -2551,6 +2909,7 @@ CREATE FOREIGN TABLE ao_part_table ( dump_schemas = ['public'] dump_tables = [('public', 'ao_part_table')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -2559,7 +2918,7 @@ CREATE FOREIGN TABLE ao_part_table ( with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET search_path = public, pg_catalog; @@ -2715,6 +3074,7 @@ GRANT ALL ON TABLE user_table TO user_role_b; dump_schemas = ['user_schema_a'] dump_tables = [('user_schema_a', 'user_table')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -2723,7 +3083,7 @@ GRANT ALL ON TABLE user_table TO user_role_b; with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -2888,6 +3248,7 @@ GRANT ALL ON TABLE "测试" TO user_role_b; """ dump_schemas = ['测试_schema'] dump_tables = [('测试_schema', '测试')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -2896,7 +3257,7 @@ GRANT ALL ON TABLE "测试" TO user_role_b; with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -3058,6 +3419,7 @@ GRANT ALL ON TABLE "Áá" TO user_role_b; """ dump_schemas = ['Áá_schema'] dump_tables = [('Áá_schema', 'Áá')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -3066,7 +3428,7 @@ GRANT ALL ON TABLE "Áá" TO user_role_b; with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; @@ -3228,6 +3590,7 @@ GRANT ALL ON TABLE "Ж" TO user_role_b; """ dump_schemas = ['Ж_schema'] dump_tables = [('Ж_schema', 'Ж')] + arguments = Arguments(dump_schemas, dump_tables) infile = '/tmp/test_schema.in' outfile = '/tmp/test_schema.out' @@ -3236,7 +3599,7 @@ GRANT ALL ON TABLE "Ж" TO user_role_b; with open(infile, 'r') as fdin: with open(outfile, 'w') as fdout: - process_schema(dump_schemas, dump_tables, fdin, fdout, None) + process_schema(arguments, fdin, fdout) expected_out = """SET statement_timeout = 0; SET client_encoding = 'UTF8'; diff --git a/gpMgmt/bin/gprestore_filter.py b/gpMgmt/bin/gprestore_filter.py index 1d901111a3..64ac19ae55 100755 --- a/gpMgmt/bin/gprestore_filter.py +++ b/gpMgmt/bin/gprestore_filter.py @@ -56,9 +56,9 @@ def get_table_info(line, cur_comment_expr): if len(type_start) != 1 or len(schema_start) != 1 or len(owner_start) != 1: return (None, None, None) name = temp[len(cur_comment_expr) : type_start[0]] - type = temp[type_start[0] + len(type_expr) : schema_start[0]] + data_type = temp[type_start[0] + len(type_expr) : schema_start[0]] schema = temp[schema_start[0] + len(schema_expr) : owner_start[0]] - return (name, type, schema) + return (name, data_type, schema) def get_table_from_alter_table(line, alter_expr): """ @@ -99,179 +99,204 @@ def find_all_expr_start(line, expr): """ return [m.start() for m in re.finditer('(?=%s)' % expr, line)] -def process_schema(dump_schemas, dump_tables, fdin, fdout, change_schema=None, schema_level_restore_list=None): - """ - Filter the dump file line by line from restore - dump_schemas: set of schemas to restore - dump_tables: set of (schema, table) tuple to restore - fdin: stdin from dump file - fdout: to write filtered content to stdout - change_schema_name: different schema name to restore - schema_level_restore_list: list of schemas to restore all tables under them - """ - - schema, table = None, None - line_buff = '' - - # to help decide whether or not to filter out - output = False - - # to help exclude SET clause within a function's ddl statement - function_ddl = False - - further_investigation_required = False - # we need to set search_path to true after every ddl change due to the - # fact that the schema "set search_path" may change on the next ddl command - search_path = True - passedDropSchemaSection = False - - cast_func_schema = None - change_cast_func_schema = False - - in_block = False +class ParserState: + + def __init__(self): + self.output = False # to help decide whether or not to filter out + self.function_ddl = False # to help exclude SET clause within a function's ddl statement + self.further_investigation_required = False + # we need to set search_path to true after every ddl change due to the + # fact that the schema "set search_path" may change on the next ddl command + self.cast_func_schema = None + self.change_cast_func_schema = False + self.in_block = False + self.line_buff = '' + self.schema = None + +def _handle_begin_end_block(state, line, _): + + if (line[0] == begin_start) and line.startswith(begin_expr): + state.in_block = True + state.output = True + elif (line[0] == end_start) and line.startswith(end_expr): + state.in_block = False + state.output = True + elif state.in_block: + state.output = True + else: + return False, state, line + return True, state, line + +def _handle_change_schema(schema_to_replace, change_schema, line): + if change_schema and len(change_schema) > 0: + # change schema name can contain special chars including white space, double quote that. + # if original schema name is already quoted, replaced it with quoted change schema name + quoted_schema = '"' + schema_to_replace + '"' + if quoted_schema in line: + line = line.replace(quoted_schema, escapeDoubleQuoteInSQLString(change_schema)) + else: + line = line.replace(schema_to_replace, escapeDoubleQuoteInSQLString(change_schema)) + return line + +def _handle_set_statement(state, line, arguments): + schemas_in_table_file = arguments.schemas + change_schema = arguments.change_schema_name + schemas_in_schema_file = arguments.schemas_in_schema_file + + if (line[0] == set_start) and line.startswith(search_path_expr): + + # NOTE: The goal is to output the correct mapping to the search path + # for the schema + + state.further_investigation_required = False + # schema in set state.search_path line is already escaped in dump file + state.schema = extract_schema(line) + schema_wo_escaping = removeEscapingDoubleQuoteInSQLString(state.schema, False) + if state.schema == "pg_catalog": + state.output = True + elif (schemas_in_table_file and schema_wo_escaping in schemas_in_table_file or + schemas_in_schema_file and schema_wo_escaping in schemas_in_schema_file): + line = _handle_change_schema(state.schema, change_schema, line) + state.cast_func_schema = state.schema # Save the schema in case we need to replace a cast's function's schema later + state.output = True + else: + state.output = False + return True, state, line + return False, state, line + +def _handle_set_assignment(state, line, _): + # set_assignment must be in the line to filter out dump line: SET SUBPARTITION TEMPLATE + if (line[0] == set_start) and line.startswith(set_expr) and set_assignment in line and not state.function_ddl: + state.output = True + return True, state, line + return False, state, line + +def _handle_expressions_in_comments(state, line, arguments): + schemas_in_table_file = arguments.schemas + tables_in_table_file = arguments.tables + schemas_in_schema_file = arguments.schemas_in_schema_file + + if line[:2] == comment_start_expr and line.startswith(comment_expr): + # Parse the line using get_table_info for SCHEMA relation type as well, + # if type is SCHEMA, then the value of name returned is schema's name, and returned schema is represented by '-' + name, data_type, state.schema = get_table_info(line, comment_expr) + state.output = False + state.function_ddl = False + + if data_type in ['SCHEMA']: + # Make sure that schemas are created before restoring the desired tables. + state.output = check_valid_schema(name, schemas_in_table_file, schemas_in_schema_file) + elif data_type in ['TABLE', 'EXTERNAL TABLE', 'VIEW', 'SEQUENCE']: + state.further_investigation_required = False + state.output = check_valid_relname(state.schema, name, tables_in_table_file, schemas_in_schema_file) + elif data_type in ['CONSTRAINT']: + state.further_investigation_required = True + if check_valid_schema(state.schema, schemas_in_table_file, schemas_in_schema_file): + state.line_buff = line + elif data_type in ['ACL']: + state.output = check_valid_relname(state.schema, name, tables_in_table_file, schemas_in_schema_file) + elif data_type in ['FUNCTION']: + state.function_ddl = True + state.output = check_valid_schema(state.schema, schemas_in_table_file, schemas_in_schema_file) + elif data_type in ['CAST', 'PROCEDURAL LANGUAGE']: # Restored to pg_catalog, so always filtered in + state.output = True + state.change_cast_func_schema = True # When changing schemas, we need to ensure that functions used in casts reference the new schema + return True, state, line + return False, state, line + +def _handle_data_expressions_in_comments(state, line, arguments): + tables_in_table_file = arguments.tables + schemas_in_schema_file = arguments.schemas_in_schema_file + + if (line[:2] == comment_start_expr) and (line.startswith(comment_data_expr_a) or line.startswith(comment_data_expr_b)): + state.further_investigation_required = False + if line.startswith(comment_data_expr_a): + name, data_type, state.schema = get_table_info(line, comment_data_expr_a) + else: + name, data_type, state.schema = get_table_info(line, comment_data_expr_b) + if data_type == 'TABLE DATA': + state.output = check_valid_relname(state.schema, name, tables_in_table_file, schemas_in_schema_file) + else: + state.output = False + return True, state, line + return False, state, line - for line in fdin: - # NOTE: We are checking the first character before actually verifying - # the line with "startswith" due to the performance gain. - if in_block: - output = True - elif (line[0] == begin_start) and line.startswith(begin_expr): - in_block = True - output = True - elif (line[0] == end_start) and line.startswith(end_expr): - in_block = False - output = True - elif search_path and (line[0] == set_start) and line.startswith(search_path_expr): - # NOTE: The goal is to output the correct mapping to the search path - # for the schema - - further_investigation_required = False - # schema in set search_path line is already escaped in dump file - schema = extract_schema(line) - schema_wo_escaping = removeEscapingDoubleQuoteInSQLString(schema, False) - if schema == "pg_catalog": - output = True - elif (dump_schemas and schema_wo_escaping in dump_schemas or - schema_level_restore_list and schema_wo_escaping in schema_level_restore_list): - if change_schema and len(change_schema) > 0: - # change schema name can contain special chars including white space, double quote that. - # if original schema name is already quoted, replaced it with quoted change schema name - quoted_schema = '"' + schema + '"' - if quoted_schema in line: - line = line.replace(quoted_schema, escapeDoubleQuoteInSQLString(change_schema)) - else: - line = line.replace(schema, escapeDoubleQuoteInSQLString(change_schema)) - cast_func_schema = schema # Save the schema in case we need to replace a cast's function's schema later - output = True - search_path = False - else: - output = False - # set_assignment must be in the line to filter out dump line: SET SUBPARTITION TEMPLATE - elif (line[0] == set_start) and line.startswith(set_expr) and set_assignment in line and not function_ddl: - output = True - elif (line[0] == drop_start) and line.startswith(drop_expr): - if line.startswith(drop_table_expr) or line.startswith(drop_external_table_expr): - if passedDropSchemaSection: - output = False - else: - if line.startswith(drop_table_expr): - output = check_dropped_table(line, dump_tables, schema_level_restore_list, drop_table_expr) - else: - output = check_dropped_table(line, dump_tables, schema_level_restore_list, - drop_external_table_expr) - else: - output = False - elif line[:2] == comment_start_expr and line.startswith(comment_expr): - # Parse the line using get_table_info for SCHEMA relation type as well, - # if type is SCHEMA, then the value of name returned is schema's name, and returned schema is represented by '-' - name, type, schema = get_table_info(line, comment_expr) - output = False - function_ddl = False - passedDropSchemaSection = True - - if type in ['SCHEMA']: - # Make sure that schemas are created before restoring the desired tables. - output = check_valid_schema(name, dump_schemas, schema_level_restore_list) - elif type in ['TABLE', 'EXTERNAL TABLE', 'VIEW', 'SEQUENCE']: - further_investigation_required = False - output = check_valid_relname(schema, name, dump_tables, schema_level_restore_list) - elif type in ['CONSTRAINT']: - further_investigation_required = True - if check_valid_schema(schema, dump_schemas, schema_level_restore_list): - line_buff = line - elif type in ['ACL']: - output = check_valid_relname(schema, name, dump_tables, schema_level_restore_list) - elif type in ['FUNCTION']: - function_ddl = True - output = check_valid_schema(schema, dump_schemas, schema_level_restore_list) - elif type in ['CAST', 'PROCEDURAL LANGUAGE']: # Restored to pg_catalog, so always filtered in - output = True - change_cast_func_schema = True # When changing schemas, we need to ensure that functions used in casts reference the new schema +def _handle_further_investigation(state, line, arguments): + tables_in_table_file = arguments.tables + schemas_in_schema_file = arguments.schemas_in_schema_file - if output: - search_path = True + if state.further_investigation_required: + if line.startswith(alter_table_expr): + state.further_investigation_required = False - elif (line[:2] == comment_start_expr) and (line.startswith(comment_data_expr_a) or line.startswith(comment_data_expr_b)): - passedDropSchemaSection = True - further_investigation_required = False - if line.startswith(comment_data_expr_a): - name, type, schema = get_table_info(line, comment_data_expr_a) - else: - name, type, schema = get_table_info(line, comment_data_expr_b) - if type == 'TABLE DATA': - output = check_valid_relname(schema, name, dump_tables, schema_level_restore_list) - if output: - search_path = True + # Get the full qualified table name with the correct split + if line.startswith(alter_table_only_expr): + tablename = get_table_from_alter_table(line, alter_table_only_expr) else: - output = False - elif further_investigation_required: - if line.startswith(alter_table_only_expr) or line.startswith(alter_table_expr): - further_investigation_required = False - - # Get the full qualified table name with the correct split - if line.startswith(alter_table_only_expr): - tablename = get_table_from_alter_table(line, alter_table_only_expr) - else: - tablename = get_table_from_alter_table(line, alter_table_expr) - - tablename = checkAndRemoveEnclosingDoubleQuote(tablename) - tablename = removeEscapingDoubleQuoteInSQLString(tablename, False) - output = check_valid_relname(schema, tablename, dump_tables, schema_level_restore_list) - - if output: - if line_buff: - fdout.write(line_buff) - line_buff = '' - search_path = True - elif change_cast_func_schema: - if "CREATE CAST" in line and "WITH FUNCTION" in line: - change_cast_func_schema = False - if change_schema and len(change_schema) > 0: - quoted_schema = '"' + cast_func_schema + '"' - if quoted_schema in line: - line = line.replace(quoted_schema, escapeDoubleQuoteInSQLString(change_schema)) - else: - line = line.replace(cast_func_schema, escapeDoubleQuoteInSQLString(change_schema)) - cast_func_schema = None - else: - further_investigation_required = False - - if output: - fdout.write(line) + tablename = get_table_from_alter_table(line, alter_table_expr) + + tablename = checkAndRemoveEnclosingDoubleQuote(tablename) + tablename = removeEscapingDoubleQuoteInSQLString(tablename, False) + state.output = check_valid_relname(state.schema, tablename, tables_in_table_file, schemas_in_schema_file) + if state.output: + if state.line_buff: + line = state.line_buff + line + state.line_buff = '' + return True, state, line + return False, state, line + + +def _handle_cast_function_schema(state, line, arguments): + change_schema = arguments.change_schema_name + + if state.change_cast_func_schema: + if "CREATE CAST" in line and "WITH FUNCTION" in line: + state.change_cast_func_schema = False + line = _handle_change_schema(state.cast_func_schema, change_schema, line) + state.cast_func_schema = None + return True, state, line + return False, state, line + +def process_line(state, line, arguments): + # NOTE: We are checking the first character before actually verifying + # the line with "startswith" due to the performance gain. + + fns = [ _handle_begin_end_block, + _handle_set_statement, + _handle_set_assignment, + _handle_expressions_in_comments, + _handle_data_expressions_in_comments, + _handle_further_investigation, + _handle_cast_function_schema ] + for fn in fns: + result, state , line= fn(state, line, arguments) + if result: + return state, line + + state.further_investigation_required = False + return state, line + + +def process_schema(arguments, fdin, fdout): + state = ParserState() + for line in fdin: + state, output_line = process_line(state, line, arguments) + if state.output: + fdout.write(output_line) -def check_valid_schema(schema, dump_schemas, schema_level_restore_list=None): - if ((schema_level_restore_list and schema in schema_level_restore_list) or - (dump_schemas and schema in dump_schemas)): +def check_valid_schema(schema, schemas_in_table_file, schemas_in_schema_file=None): + if ((schemas_in_schema_file and schema in schemas_in_schema_file) or + (schemas_in_table_file and schema in schemas_in_table_file)): return True return False -def check_valid_relname(schema, relname, dump_tables, schema_level_restore_list=None): +def check_valid_relname(schema, relname, tables_in_table_file, schemas_in_schema_file=None): """ check if relation is valid (can be from schema level restore) """ - if ((schema_level_restore_list and schema in schema_level_restore_list) or - (dump_tables and (schema, relname) in dump_tables)): + if ((schemas_in_schema_file and schema in schemas_in_schema_file) or + (tables_in_table_file and (schema, relname) in tables_in_table_file)): return True return False @@ -280,17 +305,17 @@ def get_table_schema_set(filename): filename: file with true schema and table name (none escaped), don't strip white space on schema and table name in case it's part of the name """ - dump_schemas = set() - dump_tables = set() + schemas_in_table_file = set() + tables_in_table_file = set() with open(filename) as fd: contents = fd.read() tables = contents.splitlines() for t in tables: schema, table = split_fqn(t) - dump_tables.add((schema, table)) - dump_schemas.add(schema) - return (dump_schemas, dump_tables) + tables_in_table_file.add((schema, table)) + schemas_in_table_file.add(schema) + return (schemas_in_table_file, tables_in_table_file) def extract_schema(line): """ @@ -333,7 +358,7 @@ def extract_table(line): else: raise Exception('Failed to extract table name from line %s' % line) -def check_dropped_table(line, dump_tables, schema_level_restore_list, drop_table_expr): +def check_dropped_table(line, tables_in_table_file, schemas_in_schema_file, drop_table_expr): """ check if table to drop is valid (can be dropped from schema level restore) """ @@ -341,11 +366,16 @@ def check_dropped_table(line, dump_tables, schema_level_restore_list, drop_table (schema, table) = split_fqn(temp) schema = removeEscapingDoubleQuoteInSQLString(checkAndRemoveEnclosingDoubleQuote(schema), False) table = removeEscapingDoubleQuoteInSQLString(checkAndRemoveEnclosingDoubleQuote(table), False) - if (schema_level_restore_list and schema in schema_level_restore_list) or ((schema, table) in dump_tables): + if (schemas_in_schema_file and schema in schemas_in_schema_file) or ((schema, table) in tables_in_table_file): return True return False -def process_data(dump_schemas, dump_tables, fdin, fdout, change_schema=None, schema_level_restore_list=None): +def process_data(arguments, fdin, fdout): + schemas_in_table_file = arguments.schemas + tables_in_table_file = arguments.tables + change_schema = arguments.change_schema_name + schemas_in_schema_file = arguments.schemas_in_schema_file + schema, table, schema_wo_escaping = None, None, None output = False #PYTHON PERFORMANCE IS TRICKY .... THIS CODE IS LIKE THIS BECAUSE ITS FAST @@ -353,8 +383,8 @@ def process_data(dump_schemas, dump_tables, fdin, fdout, change_schema=None, sch if (line[0] == set_start) and line.startswith(search_path_expr): schema = extract_schema(line) schema_wo_escaping = removeEscapingDoubleQuoteInSQLString(schema, False) - if ((dump_schemas and schema_wo_escaping in dump_schemas) or - (schema_level_restore_list and schema_wo_escaping in schema_level_restore_list)): + if ((schemas_in_table_file and schema_wo_escaping in schemas_in_table_file) or + (schemas_in_schema_file and schema_wo_escaping in schemas_in_schema_file)): if change_schema: # change schema name can contain special chars including white space, double quote that. # if original schema name is already quoted, replaced it with quoted change schema name @@ -369,7 +399,7 @@ def process_data(dump_schemas, dump_tables, fdin, fdout, change_schema=None, sch elif (line[0] == copy_start) and line.startswith(copy_expr) and line.endswith(copy_expr_end): table = extract_table(line) table = removeEscapingDoubleQuoteInSQLString(table, False) - if (schema_level_restore_list and schema_wo_escaping in schema_level_restore_list) or (dump_tables and (schema_wo_escaping, table) in dump_tables): + if (schemas_in_schema_file and schema_wo_escaping in schemas_in_schema_file) or (tables_in_table_file and (schema_wo_escaping, table) in tables_in_table_file): output = True elif output and (line[0] == copy_end_start) and line.startswith(copy_end_expr): table = None @@ -379,17 +409,17 @@ def process_data(dump_schemas, dump_tables, fdin, fdout, change_schema=None, sch if output: fdout.write(line) -def get_schema_level_restore_list(schema_level_restore_file=None): +def get_schemas_in_schema_file(schema_level_restore_file=None): """ Note: white space in schema and table name is supported now, don't do strip on them """ if not os.path.exists(schema_level_restore_file): raise Exception('schema level restore file path %s does not exist' % schema_level_restore_file) - schema_level_restore_list = [] + schemas_in_schema_file = [] with open(schema_level_restore_file) as fr: schema_entries = fr.read() - schema_level_restore_list = schema_entries.splitlines() - return schema_level_restore_list + schemas_in_schema_file = schema_entries.splitlines() + return schemas_in_schema_file def get_change_schema_name(change_schema_file): """ @@ -404,6 +434,19 @@ def get_change_schema_name(change_schema_file): change_schema_name = line.strip('\n') return change_schema_name +class Arguments: + """ + schemas_in_table_file: set of schemas to restore + tables_in_table_file: set of (schema, table) tuple to restore + change_schema_name: different schema name to restore + schemas_in_schema_file: list of schemas to restore all tables under them + """ + def __init__(self, schemas_in_table_file=None, tables_in_table_file=None): + self.schemas = schemas_in_table_file + self.tables = tables_in_table_file + self.change_schema_name = None + self.schemas_in_schema_file = None + if __name__ == "__main__": parser = OptParser(option_class=OptChecker) parser.remove_option('-h') @@ -422,16 +465,16 @@ if __name__ == "__main__": if options.tablefile: (schemas, tables) = get_table_schema_set(options.tablefile) - change_schema_name = None + args = Arguments(schemas, tables) + if options.change_schema_file: - change_schema_name = get_change_schema_name(options.change_schema_file) + args.change_schema_name = get_change_schema_name(options.change_schema_file) - schema_level_restore_list = None if options.schema_level_file: - schema_level_restore_list = get_schema_level_restore_list(options.schema_level_file) + args.schemas_in_schema_file = get_schemas_in_schema_file(options.schema_level_file) if options.master_only: - process_schema(schemas, tables, sys.stdin, sys.stdout, change_schema_name, schema_level_restore_list) + process_schema(args, sys.stdin, sys.stdout) else: - process_data(schemas, tables, sys.stdin, sys.stdout, change_schema_name, schema_level_restore_list) + process_data(args, sys.stdin, sys.stdout) -- GitLab