From 982832af3bd48c14d81a6ee0662d51f0ff51eded Mon Sep 17 00:00:00 2001 From: Shivram Mani Date: Tue, 26 Jun 2018 10:40:28 -0700 Subject: [PATCH] Add gphdfs-mapr certification job (#5184) Added new test job to the pipeline to certify GPHDFS with MAPR Hadoop distribution and renamed existing GPHDFS certification job to state that it tests with generic Hadoop. MAPR cluster consists of 1 node deployed by CCP scripts into GCE. - MAPR 5.2 - Parquet 1.8.1 Co-authored-by: Alexander Denissov Co-authored-by: Shivram Mani Co-authored-by: Francisco Guerrero --- concourse/pipelines/gpdb_master-generated.yml | 50 ++++++- concourse/pipelines/templates/gpdb-tpl.yml | 51 ++++++- concourse/scripts/gen_mapr.sh | 132 ++++++++++++++++++ .../scripts/regression_tests_gphdfs.bash | 76 +++++++--- concourse/tasks/gen_mapr.yml | 36 +++++ .../tasks/regression_tests_gphdfs_mapr.yml | 23 +++ .../gphdfs/regression/gphdfs_init_file | 2 +- .../gphdfs/regression/input/init_file | 2 +- .../regression/input/regression/init_file | 2 +- .../integrate/generate_gphdfs_data.sh | 15 +- .../legacy/javaclasses/UseMapred.java | 4 +- .../legacy/javaclasses/UseMapreduce.java | 4 +- .../regression/run_gphdfs_regression.sh | 20 ++- 13 files changed, 365 insertions(+), 52 deletions(-) create mode 100755 concourse/scripts/gen_mapr.sh create mode 100644 concourse/tasks/gen_mapr.yml create mode 100644 concourse/tasks/regression_tests_gphdfs_mapr.yml diff --git a/concourse/pipelines/gpdb_master-generated.yml b/concourse/pipelines/gpdb_master-generated.yml index ddca3f64a7..a6bc84741c 100644 --- a/concourse/pipelines/gpdb_master-generated.yml +++ b/concourse/pipelines/gpdb_master-generated.yml @@ -12,7 +12,7 @@ ## file (example: templates/gpdb-tpl.yml) and regenerate the pipeline ## using appropriate tool (example: gen_pipeline.py -t prod). ## ---------------------------------------------------------------------- -## Generated by gen_pipeline.py at: 2018-06-20 11:32:43.140762 +## Generated by gen_pipeline.py at: 2018-06-25 14:29:00.766030 ## Template file: gpdb-tpl.yml ## OS Types: ['centos6', 'centos7', 'sles', 'aix7', 'win', 'ubuntu16'] ## Test Sections: ['ICW', 'Replication', 'ResourceGroups', 'Interconnect', 'CLI', 'UD'] @@ -85,7 +85,8 @@ groups: ## -------------------------------------------------------------------- - gate_ud_start - regression_tests_pxf - - regression_tests_gphdfs_centos + - regression_tests_gphdfs_hadoop_centos + - regression_tests_gphdfs_mapr_centos ## ====================================================================== @@ -197,7 +198,8 @@ groups: jobs: - gate_ud_start - regression_tests_pxf - - regression_tests_gphdfs_centos + - regression_tests_gphdfs_hadoop_centos + - regression_tests_gphdfs_mapr_centos - compile_gpdb_centos6 ## ====================================================================== @@ -2077,7 +2079,7 @@ jobs: TARGET_OS: centos TARGET_OS_VERSION: 6 -- name: regression_tests_gphdfs_centos +- name: regression_tests_gphdfs_hadoop_centos plan: - aggregate: - get: gpdb_src @@ -2094,6 +2096,40 @@ jobs: TARGET_OS: centos TARGET_OS_VERSION: 6 +- name: regression_tests_gphdfs_mapr_centos + ensure: + <<: *set_failed + on_success: + <<: *ccp_destroy + plan: + - aggregate: + - get: gpdb_src + passed: [gate_ud_start] + - get: bin_gpdb + passed: [gate_ud_start] + trigger: true + resource: bin_gpdb_centos6 + - get: ccp_src + - get: centos-gpdb-dev-6 + - put: terraform + params: + <<: *ccp_default_params + vars: + <<: *ccp_default_vars + number_of_nodes: 1 + PLATFORM: centos7 + instance_type: n1-standard-4 + - task: gen_and_initialize_mapr + file: gpdb_src/concourse/tasks/gen_mapr.yml + params: + <<: *ccp_gen_cluster_default_params + - task: regression_tests_gphdfs_mapr + file: gpdb_src/concourse/tasks/regression_tests_gphdfs_mapr.yml + image: centos-gpdb-dev-6 + params: + TARGET_OS: centos + TARGET_OS_VERSION: 6 + ## ====================================================================== ## ____ _ ## | _ \ ___| | ___ __ _ ___ ___ @@ -2134,7 +2170,8 @@ jobs: - resource_group_sles12 - segwalrep_mirrorless_centos6 - walrep_2 - - regression_tests_gphdfs_centos + - regression_tests_gphdfs_hadoop_centos + - regression_tests_gphdfs_mapr_centos - regression_tests_pxf - gprecoverseg - gpcheckcat @@ -2164,7 +2201,8 @@ jobs: - resource_group_centos6 - segwalrep_mirrorless_centos6 - walrep_2 - - regression_tests_gphdfs_centos + - regression_tests_gphdfs_hadoop_centos + - regression_tests_gphdfs_mapr_centos - regression_tests_pxf - gprecoverseg - gpcheckcat diff --git a/concourse/pipelines/templates/gpdb-tpl.yml b/concourse/pipelines/templates/gpdb-tpl.yml index f51bd2c282..4117ced244 100644 --- a/concourse/pipelines/templates/gpdb-tpl.yml +++ b/concourse/pipelines/templates/gpdb-tpl.yml @@ -111,7 +111,8 @@ groups: ## -------------------------------------------------------------------- - gate_ud_start - regression_tests_pxf - - regression_tests_gphdfs_centos + - regression_tests_gphdfs_hadoop_centos + - regression_tests_gphdfs_mapr_centos {% endif %} {% if pipeline_type == "prod" %} @@ -263,7 +264,8 @@ groups: jobs: - gate_ud_start - regression_tests_pxf - - regression_tests_gphdfs_centos + - regression_tests_gphdfs_hadoop_centos + - regression_tests_gphdfs_mapr_centos - compile_gpdb_centos6 {% endif %} @@ -294,7 +296,8 @@ resources: "Replication" in test_sections or "ResourceGroups" in test_sections or "Interconnect" in test_sections or - "CLI" in test_sections %} + "CLI" in test_sections or + "UD" in test_sections %} - name: ccp_src type: git source: @@ -2106,7 +2109,7 @@ jobs: TARGET_OS: centos TARGET_OS_VERSION: 6 -- name: regression_tests_gphdfs_centos +- name: regression_tests_gphdfs_hadoop_centos plan: - aggregate: - get: gpdb_src @@ -2123,6 +2126,40 @@ jobs: TARGET_OS: centos TARGET_OS_VERSION: 6 +- name: regression_tests_gphdfs_mapr_centos + ensure: + <<: *set_failed + on_success: + <<: *ccp_destroy + plan: + - aggregate: + - get: gpdb_src + passed: [gate_ud_start] + - get: bin_gpdb + passed: [gate_ud_start] + trigger: [[ test_trigger ]] + resource: bin_gpdb_centos6 + - get: ccp_src + - get: centos-gpdb-dev-6 + - put: terraform + params: + <<: *ccp_default_params + vars: + <<: *ccp_default_vars + number_of_nodes: 1 + PLATFORM: centos7 + instance_type: n1-standard-4 + - task: gen_and_initialize_mapr + file: gpdb_src/concourse/tasks/gen_mapr.yml + params: + <<: *ccp_gen_cluster_default_params + - task: regression_tests_gphdfs_mapr + file: gpdb_src/concourse/tasks/regression_tests_gphdfs_mapr.yml + image: centos-gpdb-dev-6 + params: + TARGET_OS: centos + TARGET_OS_VERSION: 6 + {% endif %} {% if pipeline_type == "prod" %} ## ====================================================================== @@ -2165,7 +2202,8 @@ jobs: - resource_group_sles12 - segwalrep_mirrorless_centos6 - walrep_2 - - regression_tests_gphdfs_centos + - regression_tests_gphdfs_hadoop_centos + - regression_tests_gphdfs_mapr_centos - regression_tests_pxf {% for test_name in CLI_1_suites + CLI_2_suites + @@ -2191,7 +2229,8 @@ jobs: - resource_group_centos6 - segwalrep_mirrorless_centos6 - walrep_2 - - regression_tests_gphdfs_centos + - regression_tests_gphdfs_hadoop_centos + - regression_tests_gphdfs_mapr_centos - regression_tests_pxf {% for test_name in CLI_1_suites + CLI_2_suites + diff --git a/concourse/scripts/gen_mapr.sh b/concourse/scripts/gen_mapr.sh new file mode 100755 index 0000000000..4006df8577 --- /dev/null +++ b/concourse/scripts/gen_mapr.sh @@ -0,0 +1,132 @@ +#!/bin/bash + +set -xeuo pipefail + +MAPR_SSH_OPTS="-i cluster_env_files/private_key.pem" +node_hostname="ccp-$(cat ./terraform*/name)-0" + +get_device_name() { + local device_name="/dev/xvdb" + + # We check fdisk to see if there is an nvme disk because we cannot + # rename the device name to /dev/xvdb like EBS or other ephemeral + # disks. If we find an nvme disk, it will be at /dev/nvme0n1. + local nvme + nvme=$(ssh -tt "${node_hostname}" "sudo bash -c \"fdisk -l | grep /dev/nvme\"") + ssh -tt "${node_hostname}" "[ -L /dev/disk/by-id/google-disk-for-gpdata ]" + local google_disk_exit_code=$? + + if [[ "$nvme" == *"/dev/nvme"* ]]; then + device_name="/dev/nvme0n1" + elif [ "$google_disk_exit_code" = "0" ]; then + device_name="/dev/disk/by-id/google-disk-for-gpdata" + fi + echo $(ssh -tt "${node_hostname}" "sudo bash -c \"readlink -f ${device_name}\"") | sed 's/\\r//g' +} + +# modify gpadmin userid and group to match +# that one of concourse test container +modify_groupid_userid() { + ssh -ttn "${node_hostname}" "sudo bash -c \"\ + usermod -u 500 gpadmin; \ + groupmod -g 501 gpadmin; \ + # find / -group 501 -exec chgrp -h foo {} \;; \ + # find / -user 500 -exec chown -h foo {} \;; \ + \"" +} + +install_java() { + ssh -ttn "${node_hostname}" "sudo bash -c \"\ + yum install -y java-1.7.0-openjdk; \ + \"" +} + +enable_root_ssh_login() { + ssh -ttn "${node_hostname}" "sudo bash -c \"\ + mkdir -p /root/.ssh/ + cp /home/centos/.ssh/authorized_keys /root/.ssh/; \ + sed -ri 's/PermitRootLogin no/PermitRootLogin yes/g' /etc/ssh/sshd_config; \ + service sshd restart; \ + \"" +} + +download_and_run_mapr_setup() { + ssh -ttn "${node_hostname}" "sudo bash -c \"\ + cd /root; \ + wget http://package.mapr.com/releases/v5.2.0/redhat/mapr-setup; \ + chmod 755 mapr-setup; \ + ./mapr-setup; \ + \"" +} + +# create cluster configuration file +create_config_file() { + local device_name=$1 + + cat > /tmp/singlenode_config <<-EOF +[Control_Nodes] +$node_hostname: $device_name +[Data_Nodes] +[Client_Nodes] +[Options] +MapReduce1 = false +YARN = true +HBase = false +MapR-DB = true +ControlNodesAsDataNodes = true +WirelevelSecurity = false +LocalRepo = false +[Defaults] +ClusterName = mapr +User = mapr +Group = mapr +Password = mapr +UID = 2000 +GID = 2000 +Disks = $device_name +StripeWidth = 3 +ForceFormat = false +CoreRepoURL = http://package.mapr.com/releases +EcoRepoURL = http://package.mapr.com/releases/ecosystem-5.x +Version = 5.2.0 +MetricsDBHost = +MetricsDBUser = +MetricsDBPassword = +MetricsDBSchema = +EOF + + scp ${MAPR_SSH_OPTS} cluster_env_files/private_key.pem centos@"${node_hostname}":/tmp + scp ${MAPR_SSH_OPTS} /tmp/singlenode_config centos@"${node_hostname}":/tmp + ssh -ttn "${node_hostname}" "sudo bash -c \"\ + mv /tmp/singlenode_config /opt/mapr-installer/bin/singlenode_config; \ + chown root:root /opt/mapr-installer/bin/singlenode_config; \ + \"" +} + +run_quick_installer() { + ssh -ttn "${node_hostname}" "sudo bash -c \"\ + /opt/mapr-installer/bin/install --user root --private-key /tmp/private_key.pem --quiet --cfg /opt/mapr-installer/bin/singlenode_config new; \ + \"" +} + +grant_top_level_write_permission() { + ssh -ttn "${node_hostname}" "sudo bash -c \"\ + hadoop fs -chmod 777 /;\ + \"" + +} +setup_node() { + local devicename + devicename=$(get_device_name) + + echo "Device name: $devicename" + + modify_groupid_userid + enable_root_ssh_login + download_and_run_mapr_setup + create_config_file "${devicename}" + run_quick_installer + grant_top_level_write_permission +} + +setup_node diff --git a/concourse/scripts/regression_tests_gphdfs.bash b/concourse/scripts/regression_tests_gphdfs.bash index 34580983c7..76fa7da55f 100755 --- a/concourse/scripts/regression_tests_gphdfs.bash +++ b/concourse/scripts/regression_tests_gphdfs.bash @@ -5,6 +5,8 @@ set -exo pipefail CWDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" source "${CWDIR}/common.bash" +HADOOP_TARGET_VERSION=${HADOOP_TARGET_VERSION:-hadoop} + function gen_env(){ cat > /home/gpadmin/run_regression_test.sh <<-EOF set -exo pipefail @@ -38,36 +40,58 @@ function gen_env(){ cd "\${1}/gpdb_src/gpAux" source gpdemo/gpdemo-env.sh - wget -P /tmp http://archive.apache.org/dist/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz - tar zxf /tmp/hadoop-2.7.3.tar.gz -C /tmp - export HADOOP_HOME=/tmp/hadoop-2.7.3 - - wget -O \${HADOOP_HOME}/share/hadoop/common/lib/parquet-hadoop-bundle-1.7.0.jar http://central.maven.org/maven2/org/apache/parquet/parquet-hadoop-bundle/1.7.0/parquet-hadoop-bundle-1.7.0.jar - cat > "\${HADOOP_HOME}/etc/hadoop/core-site.xml" <<-EOFF - - - fs.defaultFS - hdfs://localhost:9000/ - - - EOFF - - \${HADOOP_HOME}/bin/hdfs namenode -format -force - \${HADOOP_HOME}/sbin/start-dfs.sh + if [ "$HADOOP_TARGET_VERSION" != "mpr" ]; then + wget -P /tmp http://archive.apache.org/dist/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz + tar zxf /tmp/hadoop-2.7.3.tar.gz -C /tmp + export HADOOP_HOME=/tmp/hadoop-2.7.3 + + wget -O \${HADOOP_HOME}/share/hadoop/common/lib/parquet-hadoop-bundle-1.7.0.jar http://central.maven.org/maven2/org/apache/parquet/parquet-hadoop-bundle/1.7.0/parquet-hadoop-bundle-1.7.0.jar + cat > "\${HADOOP_HOME}/etc/hadoop/core-site.xml" <<-EOFF + + + fs.defaultFS + hdfs://localhost:9000/ + + + EOFF + + \${HADOOP_HOME}/bin/hdfs namenode -format -force + \${HADOOP_HOME}/sbin/start-dfs.sh + else + export HADOOP_HOME=/opt/mapr/hadoop/hadoop-2.7.0 + wget -O \${HADOOP_HOME}/share/hadoop/common/lib/parquet-hadoop-bundle-1.8.1.jar http://central.maven.org/maven2/org/apache/parquet/parquet-hadoop-bundle/1.8.1/parquet-hadoop-bundle-1.8.1.jar + fi cd "\${1}/gpdb_src/gpAux/extensions/gphdfs/regression/integrate" - HADOOP_HOST=localhost HADOOP_PORT=9000 ./generate_gphdfs_data.sh + ./generate_gphdfs_data.sh cd "\${1}/gpdb_src/gpAux/extensions/gphdfs/regression" - GP_HADOOP_TARGET_VERSION=hadoop HADOOP_HOST=localhost HADOOP_PORT=9000 ./run_gphdfs_regression.sh + GP_HADOOP_TARGET_VERSION=$HADOOP_TARGET_VERSION HADOOP_HOST=localhost HADOOP_PORT=9000 ./run_gphdfs_regression.sh exit 0 - EOF +EOF chown gpadmin:gpadmin /home/gpadmin/run_regression_test.sh chmod a+x /home/gpadmin/run_regression_test.sh } +function install_mapr_client() { + if [ "$HADOOP_TARGET_VERSION" == "mpr" ]; then + cat > "/etc/yum.repos.d/maprtech.repo" <<-EOFMAPR + [maprtech] + name=MapR Technologies + baseurl=http://package.mapr.com/releases/v5.2.0/redhat + enabled=1 + gpgcheck=0 + protect=1 + EOFMAPR + yum install -y mapr-client.x86_64 + /opt/mapr/server/configure.sh -N mapr -c -C $MAPR_HOST:7222 + chown -R gpadmin /opt/mapr + chown -R root /opt/mapr/conf/proxy + fi +} + function run_regression_test() { su - gpadmin -c "bash /home/gpadmin/run_regression_test.sh $(pwd)" } @@ -76,6 +100,15 @@ function setup_gpadmin_user() { ./gpdb_src/concourse/scripts/setup_gpadmin_user.bash "$TARGET_OS" } +function copy_jar_to_mapr_host() { + if [ "$HADOOP_TARGET_VERSION" == "mpr" ]; then + scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i cluster_env_files/private_key.pem ${GPHOME}/lib/hadoop/mpr-gnet-1.2.0.0.jar centos@$MAPR_HOST:/tmp + ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i cluster_env_files/private_key.pem -ttn centos@$MAPR_HOST "sudo bash -c \"\ + mv /tmp/mpr-gnet-1.2.0.0.jar /opt/mapr/hadoop/hadoop-2.7.0/share/hadoop/common/lib/; \ + \"" + fi +} + function _main() { if [ -z "$TARGET_OS" ]; then echo "FATAL: TARGET_OS is not set" @@ -88,13 +121,14 @@ function _main() { exit 1 fi + time setup_gpadmin_user + time install_mapr_client time configure sed -i s/1024/unlimited/ /etc/security/limits.d/90-nproc.conf time install_gpdb - time setup_gpadmin_user time make_cluster + time copy_jar_to_mapr_host time gen_env - time run_regression_test } diff --git a/concourse/tasks/gen_mapr.yml b/concourse/tasks/gen_mapr.yml new file mode 100644 index 0000000000..d1ff663add --- /dev/null +++ b/concourse/tasks/gen_mapr.yml @@ -0,0 +1,36 @@ +--- +platform: linux + +image_resource: + type: docker-image + source: + repository: pivotaldata/ccp + tag: "7" + +inputs: + - name: terraform + - name: ccp_src + - name: gpdb_src + +outputs: + - name: cluster_env_files + +run: + path: sh + args: + - -exc + - | + ccp_src/scripts/download_tf_state.sh + NUMBER_OF_NODES=$(jq -r .cluster_host_list[] ./terraform*/metadata | wc -l) + ccp_src/scripts/generate_env_files.sh $NUMBER_OF_NODES false + ccp_src/scripts/generate_ssh_files.sh $NUMBER_OF_NODES false + gpdb_src/concourse/scripts/gen_mapr.sh $NUMBER_OF_NODES + +params: + AWS_ACCESS_KEY_ID: + AWS_SECRET_ACCESS_KEY: + AWS_DEFAULT_REGION: + BUCKET_PATH: + BUCKET_NAME: + PLATFORM: centos7 + CLOUD_PROVIDER: google diff --git a/concourse/tasks/regression_tests_gphdfs_mapr.yml b/concourse/tasks/regression_tests_gphdfs_mapr.yml new file mode 100644 index 0000000000..7e55d140e7 --- /dev/null +++ b/concourse/tasks/regression_tests_gphdfs_mapr.yml @@ -0,0 +1,23 @@ +platform: linux +image_resource: + type: docker-image + source: + repository: pivotaldata/centos-gpdb-dev + username: + password: + +inputs: + - name: gpdb_src + - name: bin_gpdb + - name: terraform + - name: cluster_env_files + +run: + path: sh + args: + - -exc + - | + MAPR_HOST="ccp-$(cat ./terraform*/name)-0" gpdb_src/concourse/scripts/regression_tests_gphdfs.bash + +params: + HADOOP_TARGET_VERSION: mpr diff --git a/gpAux/extensions/gphdfs/regression/gphdfs_init_file b/gpAux/extensions/gphdfs/regression/gphdfs_init_file index f8182c76ba..dddb5f4045 100644 --- a/gpAux/extensions/gphdfs/regression/gphdfs_init_file +++ b/gpAux/extensions/gphdfs/regression/gphdfs_init_file @@ -67,7 +67,7 @@ m/\\\!hadoop_bin.*/ -- m/gphdfs:\/\/.*:\d+/ -- s/gphdfs:\/\/.*:\d+/gphdfs:\/\/:/ -- m/gphdfs:\/\/.*\// --- s/gphdfs:\/\/.*\//gphdfs:\/\// +-- s/gphdfs:\/\/.*\//gphdfs:\/\/mapr\/mapr/ -- m/gpfdist:\/\/.*:\d+/ -- s/gpfdist:\/\/.*:\d+/gpfdist:\/\/:/ -- m/\d+\/\d+\/\d+ \d+:\d+:\d+/ diff --git a/gpAux/extensions/gphdfs/regression/input/init_file b/gpAux/extensions/gphdfs/regression/input/init_file index 908a3b0b80..2fbf678ecd 100644 --- a/gpAux/extensions/gphdfs/regression/input/init_file +++ b/gpAux/extensions/gphdfs/regression/input/init_file @@ -47,7 +47,7 @@ m/INFO client.RMProxy/ -- m/gphdfs:\/\/.*:\d+/ -- s/gphdfs:\/\/.*:\d+/gphdfs:\/\/:/ -- m/gphdfs:\/\/.*\// --- s/gphdfs:\/\/.*\//gphdfs:\/\// +-- s/gphdfs:\/\/.*\//gphdfs:\/\/mapr\/mapr/ -- m/gpfdist:\/\/.*:\d+/ -- s/gpfdist:\/\/.*:\d+/gpfdist:\/\/:/ -- m/\d+\/\d+\/\d+ \d+:\d+:\d+/ diff --git a/gpAux/extensions/gphdfs/regression/input/regression/init_file b/gpAux/extensions/gphdfs/regression/input/regression/init_file index 2bc90c44ef..2a1e3e055b 100644 --- a/gpAux/extensions/gphdfs/regression/input/regression/init_file +++ b/gpAux/extensions/gphdfs/regression/input/regression/init_file @@ -2,5 +2,5 @@ -- m/gphdfs:\/\/.*:\d+/ -- s/gphdfs:\/\/.*:\d+/gphdfs:\/\/:/ -- m/gphdfs:\/\/.*\// --- s/gphdfs:\/\/.*\//gphdfs:\/\// +-- s/gphdfs:\/\/.*\//gphdfs:\/\/mapr\/mapr/ -- end_matchsubs diff --git a/gpAux/extensions/gphdfs/regression/integrate/generate_gphdfs_data.sh b/gpAux/extensions/gphdfs/regression/integrate/generate_gphdfs_data.sh index 90394a74f5..d3196fd0e6 100755 --- a/gpAux/extensions/gphdfs/regression/integrate/generate_gphdfs_data.sh +++ b/gpAux/extensions/gphdfs/regression/integrate/generate_gphdfs_data.sh @@ -1,6 +1,6 @@ #!/bin/bash -set -euo pipefail +set -xeuo pipefail _main() { local HADOOP=$HADOOP_HOME/bin/hadoop @@ -31,12 +31,13 @@ _main() { python ${CURDIR}/create_data.py 500000 all > ${DYNDATADIR}/all_100.txt sed 's/bigint/text/g' ${DYNDATADIR}/bigint.txt > ${DYNDATADIR}/bigint_text.txt - $HADOOP fs -mkdir -p hdfs://${HADOOP_HOST}:${HADOOP_PORT}/plaintext/ - $HADOOP fs -mkdir -p hdfs://${HADOOP_HOST}:${HADOOP_PORT}/extwrite/ - $HADOOP fs -mkdir -p hdfs://${HADOOP_HOST}:${HADOOP_PORT}/parquet/ - $UPLOADCMD $DATADIR/*.txt hdfs://${HADOOP_HOST}:${HADOOP_PORT}/plaintext/ - $UPLOADCMD $DATADIR/*.parquet hdfs://${HADOOP_HOST}:${HADOOP_PORT}/parquet/ - $UPLOADCMD $DYNDATADIR/* hdfs://${HADOOP_HOST}:${HADOOP_PORT}/plaintext/ + $HADOOP fs -mkdir -p /plaintext/ + $HADOOP fs -mkdir -p /extwrite/ + $HADOOP fs -mkdir -p /parquet/ + $UPLOADCMD $DATADIR/*.txt /plaintext/ + $UPLOADCMD $DATADIR/*.parquet /parquet/ + $UPLOADCMD $DYNDATADIR/* /plaintext/ + } _main "$@" diff --git a/gpAux/extensions/gphdfs/regression/legacy/javaclasses/UseMapred.java b/gpAux/extensions/gphdfs/regression/legacy/javaclasses/UseMapred.java index cd483c2092..2a626f70a9 100644 --- a/gpAux/extensions/gphdfs/regression/legacy/javaclasses/UseMapred.java +++ b/gpAux/extensions/gphdfs/regression/legacy/javaclasses/UseMapred.java @@ -29,8 +29,8 @@ public class UseMapred{ { //datanodestr = "maprfs:///" +hdfshostname; //jobtrackerstr = "maprfs:///" +hdfshostname; - datanodestr = "maprfs:///my.cluster.com"; - jobtrackerstr = "maprfs:///my.cluster.com"; + datanodestr = "maprfs:///mapr/mapr"; + jobtrackerstr = "maprfs:///mapr/mapr"; } else { diff --git a/gpAux/extensions/gphdfs/regression/legacy/javaclasses/UseMapreduce.java b/gpAux/extensions/gphdfs/regression/legacy/javaclasses/UseMapreduce.java index d304cf05ef..0957c44bc1 100644 --- a/gpAux/extensions/gphdfs/regression/legacy/javaclasses/UseMapreduce.java +++ b/gpAux/extensions/gphdfs/regression/legacy/javaclasses/UseMapreduce.java @@ -26,8 +26,8 @@ public class UseMapreduce { { //datanodestr = "maprfs:///" +hdfshostname; //jobtrackerstr = "maprfs:///" +hdfshostname; - datanodestr = "maprfs:///my.cluster.com"; - jobtrackerstr = "maprfs:///my.cluster.com"; + datanodestr = "maprfs:///mapr/mapr"; + jobtrackerstr = "maprfs:///mapr/mapr"; } else { diff --git a/gpAux/extensions/gphdfs/regression/run_gphdfs_regression.sh b/gpAux/extensions/gphdfs/regression/run_gphdfs_regression.sh index d29b56e5e1..2517248b62 100755 --- a/gpAux/extensions/gphdfs/regression/run_gphdfs_regression.sh +++ b/gpAux/extensions/gphdfs/regression/run_gphdfs_regression.sh @@ -54,11 +54,12 @@ create_runcmd() { _main() { allow_hadoop_user_to_connect - override_core_site + local CURDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" local PGREGRESS=$GPHOME/lib/postgresql/pgxs/src/test/regress/pg_regress local HADOOPCMD=$HADOOP_HOME/bin/hadoop + local NAMENODE_URI=${HADOOP_HOST}:${HADOOP_PORT} build_test_jar @@ -69,16 +70,25 @@ _main() { cp $CURDIR/input/*.source $CURDIR/source_replaced/input/ cp $CURDIR/output/*.source $CURDIR/source_replaced/output/ - for f in $(ls $CURDIR/source_replaced/input);do + if [ "$GP_HADOOP_TARGET_VERSION" == "mpr" ]; then + NAMENODE_URI="/mapr/mapr" + # set Hadoop port to none that is expected by test harness for MAPR distro + export HADOOP_PORT="none" + else + override_core_site + fi + + for f in $(ls $CURDIR/source_replaced/input); do echo -e "--start_ignore\n\!%HADOOP_HOME%/bin/hadoop fs -rm -r /mapreduce/*\n\!%HADOOP_HOME%/bin/hadoop fs -rm -r /mapred/*\n--end_ignore" >> "$CURDIR/source_replaced/input/$f" - sed -i "s|gpfdist://%localhost%:%gpfdistPort%|gphdfs://${HADOOP_HOST}:${HADOOP_PORT}/plaintext|g" "$CURDIR/source_replaced/input/$f" + sed -i "s|gpfdist://%localhost%:%gpfdistPort%|gphdfs://${NAMENODE_URI}/plaintext|g" "$CURDIR/source_replaced/input/$f" sed -i "s|%cmdstr%|${CURDIR}/runcmd|g" "$CURDIR/source_replaced/input/$f" - sed -i "s|%HADOOP_HOST%|${HADOOP_HOST}:${HADOOP_PORT}|g" "$CURDIR/source_replaced/input/$f" - sed -i "s|%HDFSaddr%|${HADOOP_HOST}:${HADOOP_PORT}|g" "$CURDIR/source_replaced/input/$f" + sed -i "s|%HADOOP_HOST%|${NAMENODE_URI}|g" "$CURDIR/source_replaced/input/$f" + sed -i "s|%HDFSaddr%|${NAMENODE_URI}|g" "$CURDIR/source_replaced/input/$f" sed -i "s|%HADOOP_HOME%|${HADOOP_HOME}|g" "$CURDIR/source_replaced/input/$f" sed -i "s|%MYD%|${CURDIR}/source_replaced/input|g" "$CURDIR/source_replaced/input/$f" sed -i "s|%HADOOP_FS%|${HADOOPCMD}|g" "$CURDIR/source_replaced/input/$f" done + cp $CURDIR/input/parsefile.py $CURDIR/source_replaced/input/ -- GitLab