提交 f2f86174 编写于 作者: N Ning Yu

resgroup: backward compatibility for memory auditor

Memory auditor was a new feature introduced to allow external components
(e.g. pl/container) managed by resource group.  This feature requires a
new gpdb dir to be created in cgroup memory controller, however on 5X
branch unless the users created this new dir manually then the upgrade
from a previous version would fail.

In this commit we provide backward compatibility by checking the release
version:

- on 6X and master branches the memory auditor feature is always enabled
  so the new gpdb dir is mandatory;
- on 5X branch only if the new gpdb dir is created with proper
  permissions the memory auditor feature could be enabled, when it's
  disabled `CREATE RESOURCE GROUP WITH (memory_auditor='cgroup') will fail
  with guide information on how to enable it;

Binary swap tests are also provided to verify backward compatibility in
future releases.  As cgroup need to be configured to enable resgroup we
split the resgroup binary swap tests into two parts:

- resqueue mode only tests which can be triggered in the
  icw_gporca_centos6 pipeline job after the ICW tests, these parts have
  no requirements on cgroup;
- complete resqueue & resgroup modes tests which can be triggered in the
  mpp_resource_group_centos{6,7} pipeline jobs after the resgroup tests,
  these parts need cgroup to be properly configured;
上级 b5f09a7f
......@@ -1602,6 +1602,10 @@ jobs:
trigger: [[ test_trigger ]]
- get: ccp_src
- get: centos-gpdb-dev-6
- get: binary_swap_gpdb
passed: [compile_gpdb_binary_swap_centos6]
resource: binary_swap_gpdb_centos6
trigger: [[ test_trigger ]]
- put: terraform
params:
<<: *ccp_default_params
......@@ -1636,6 +1640,10 @@ jobs:
trigger: [[ test_trigger ]]
- get: ccp_src
- get: centos-gpdb-dev-6
- get: binary_swap_gpdb
passed: [compile_gpdb_binary_swap_centos6]
resource: binary_swap_gpdb_centos6
trigger: [[ test_trigger ]]
- put: terraform
params:
<<: *ccp_default_params
......
......@@ -90,8 +90,64 @@ EOF1
EOF
}
keep_minimal_cgroup_dirs() {
local gpdb_master_alias=$1
local basedir=$CGROUP_BASEDIR
ssh -t $gpdb_master_alias sudo bash -ex <<EOF
rmdir $basedir/memory/gpdb/*/ || :
rmdir $basedir/memory/gpdb
EOF
}
setup_binary_swap_test() {
local gpdb_master_alias=$1
if [ "${TEST_BINARY_SWAP}" != "true" ]; then
return 0
fi
ssh $gpdb_master_alias mkdir -p /tmp/local/greenplum-db-devel
ssh $gpdb_master_alias tar -zxf - -C /tmp/local/greenplum-db-devel \
< binary_swap_gpdb/bin_gpdb.tar.gz
ssh $gpdb_master_alias sed -i -e "s@/usr/local@/tmp/local@" \
/tmp/local/greenplum-db-devel/greenplum_path.sh
}
run_binary_swap_test() {
local gpdb_master_alias=$1
if [ "${TEST_BINARY_SWAP}" != "true" ]; then
return 0
fi
scp -r /tmp/build/*/binary_swap_gpdb/ $gpdb_master_alias:/home/gpadmin/
ssh $gpdb_master_alias bash -ex <<EOF
source /usr/local/greenplum-db-devel/greenplum_path.sh
export PGPORT=5432
export MASTER_DATA_DIRECTORY=/data/gpdata/master/gpseg-1
export BINARY_SWAP_VARIANT=_resgroup
cd /home/gpadmin
time ./gpdb_src/concourse/scripts/test_binary_swap_gpdb.bash
EOF
}
mount_cgroups ccp-${CLUSTER_NAME}-0
mount_cgroups ccp-${CLUSTER_NAME}-1
make_cgroups_dir ccp-${CLUSTER_NAME}-0
make_cgroups_dir ccp-${CLUSTER_NAME}-1
run_resgroup_test mdw
#
# below is for binary swap test
#
# remove cgroup dirs for new features such as the memory auditor
# the purpose is to ensure the backward compatibilities
keep_minimal_cgroup_dirs ccp-${CLUSTER_NAME}-0
keep_minimal_cgroup_dirs ccp-${CLUSTER_NAME}-1
# deploy the binaries for binary swap test
setup_binary_swap_test sdw1
# run it
run_binary_swap_test mdw
......@@ -2,15 +2,37 @@
set -eox pipefail
if [ "$USER" = gpadmin ]; then
tmpdir=/tmp
else
tmpdir=/opt
fi
function gen_env() {
cat > /opt/run_test.sh <<-EOF
cat > $tmpdir/run_test.sh <<-EOF
source /usr/local/greenplum-db-devel/greenplum_path.sh
[ -e \${1}/gpdb_src/gpAux/gpdemo/gpdemo-env.sh ] && \
source \${1}/gpdb_src/gpAux/gpdemo/gpdemo-env.sh
cd "\${1}/gpdb_src/src/test/binary_swap"
./test_binary_swap.sh -b /tmp/local/greenplum-db-devel
./test_binary_swap.sh -b /tmp/local/greenplum-db-devel \
-v "${BINARY_SWAP_VARIANT}" || (
errcode=\$?
find . -name regression.diffs \
| while read diff; do
cat <<EOF1
======================================================================
DIFF FILE: \$diff
----------------------------------------------------------------------
EOF1
cat \$diff
done
exit \$errcode
)
EOF
chmod a+x /opt/run_test.sh
chmod a+x $tmpdir/run_test.sh
}
function install_gpdb_binary_swap() {
......@@ -21,7 +43,11 @@ function install_gpdb_binary_swap() {
}
function run_test() {
su - gpadmin -c "bash /opt/run_test.sh $(pwd)"
if [ "$USER" = gpadmin ]; then
bash $tmpdir/run_test.sh $(pwd)
else
su - gpadmin -c "bash $tmpdir/run_test.sh $(pwd)"
fi
}
function _main() {
......
......@@ -6,12 +6,14 @@ inputs:
- name: gpdb_src
- name: ccp_src
- name: cluster_env_files
- name: binary_swap_gpdb
outputs:
params:
MAKE_TEST_COMMAND: ""
BLDWRAP_POSTGRES_CONF_ADDONS: ""
TEST_OS: ""
TEST_BINARY_SWAP: false
CONFIGURE_FLAGS: ""
run:
......
......@@ -6,6 +6,8 @@
import os
import sys
import psutil
from gppylib.commands import gp
from gppylib import gpversion
class dummy(object):
......@@ -55,6 +57,16 @@ class cgroup(object):
self.validate_permission("memory/memory.limit_in_bytes", "r")
# resgroup memory auditor is introduced in 6.0 devel and backported
# to 5.x branch since 5.6.1. To provide backward compatibilities
# memory permissions are only checked since 6.0.
gpverstr = gp.GpVersion.local("", os.getenv("GPHOME"))
gpver = gpversion.GpVersion(gpverstr)
if gpver.version >= [6, 0, 0]:
self.validate_permission("memory/gpdb/", "rwx")
self.validate_permission("memory/gpdb/memory.limit_in_bytes", "rw")
self.validate_permission("memory/gpdb/memory.usage_in_bytes", "r")
def die(self, msg):
exit(self.impl + self.error_prefix + msg)
......
......@@ -14,6 +14,11 @@ gpcheckresgroupimpl_path = os.path.abspath('gpcheckresgroupimpl')
gpcheckresgroupimpl = imp.load_source('gpcheckresgroupimpl', gpcheckresgroupimpl_path)
import gpcheckresgroupimpl
from gppylib.commands import gp
from gppylib import gpversion
gpverstr = gp.GpVersion.local("", os.getenv("GPHOME"))
gpver = gpversion.GpVersion(gpverstr)
@unittest.skipUnless(sys.platform.startswith("linux"), "requires linux")
class GpCheckResGroupImplCGroup(unittest.TestCase):
cgroup_mntpnt = None
......@@ -44,6 +49,10 @@ class GpCheckResGroupImplCGroup(unittest.TestCase):
self.touch(os.path.join(self.cgroup_mntpnt, "memory", "memory.limit_in_bytes"), 0400)
self.touch(os.path.join(self.cgroup_mntpnt, "memory", "memory.memsw.limit_in_bytes"), 0400)
os.mkdir(os.path.join(self.cgroup_mntpnt, "memory", "gpdb"), 0700)
self.touch(os.path.join(self.cgroup_mntpnt, "memory", "gpdb", "memory.limit_in_bytes"), 0600)
self.touch(os.path.join(self.cgroup_mntpnt, "memory", "gpdb", "memory.usage_in_bytes"), 0400)
def tearDown(self):
shutil.rmtree(self.cgroup_mntpnt)
self.cgroup = None
......@@ -160,6 +169,61 @@ class GpCheckResGroupImplCGroup(unittest.TestCase):
with self.assertRaisesRegexp(AssertionError, "file '.*/memory/memory.limit_in_bytes' does not exist"):
self.cgroup.validate_all()
def test_when_memory_limit_in_bytes_bad_permission(self):
os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "memory.limit_in_bytes"), 0100)
with self.assertRaisesRegexp(AssertionError, "file '.*/memory/memory.limit_in_bytes' permission denied: require permission 'r'"):
self.cgroup.validate_all()
def test_when_memory_gpdb_dir_missing(self):
shutil.rmtree(os.path.join(self.cgroup_mntpnt, "memory", "gpdb"))
if gpver.version >= [6, 0, 0]:
with self.assertRaisesRegexp(AssertionError, "directory '.*/memory/gpdb/' does not exist"):
self.cgroup.validate_all()
else:
self.cgroup.validate_all()
def test_when_memory_gpdb_dir_bad_permission(self):
os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "gpdb"), 0500)
if gpver.version >= [6, 0, 0]:
with self.assertRaisesRegexp(AssertionError, "directory '.*/memory/gpdb/' permission denied: require permission 'rwx'"):
self.cgroup.validate_all()
else:
self.cgroup.validate_all()
# restore permission for the dir to be removed in tearDown()
os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "gpdb"), 0700)
def test_when_memory_gpdb_limit_in_bytes_missing(self):
os.unlink(os.path.join(self.cgroup_mntpnt, "memory", "gpdb", "memory.limit_in_bytes"))
if gpver.version >= [6, 0, 0]:
with self.assertRaisesRegexp(AssertionError, "file '.*/memory/gpdb/memory.limit_in_bytes' does not exist"):
self.cgroup.validate_all()
else:
self.cgroup.validate_all()
def test_when_memory_gpdb_limit_in_bytes_bad_permission(self):
os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "gpdb", "memory.limit_in_bytes"), 0100)
if gpver.version >= [6, 0, 0]:
with self.assertRaisesRegexp(AssertionError, "file '.*/memory/gpdb/memory.limit_in_bytes' permission denied: require permission 'rw'"):
self.cgroup.validate_all()
else:
self.cgroup.validate_all()
def test_when_memory_gpdb_usage_in_bytes_missing(self):
os.unlink(os.path.join(self.cgroup_mntpnt, "memory", "gpdb", "memory.usage_in_bytes"))
if gpver.version >= [6, 0, 0]:
with self.assertRaisesRegexp(AssertionError, "file '.*/memory/gpdb/memory.usage_in_bytes' does not exist"):
self.cgroup.validate_all()
else:
self.cgroup.validate_all()
def test_when_memory_gpdb_usage_in_bytes_bad_permission(self):
os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "gpdb", "memory.usage_in_bytes"), 0100)
if gpver.version >= [6, 0, 0]:
with self.assertRaisesRegexp(AssertionError, "file '.*/memory/gpdb/memory.usage_in_bytes' permission denied: require permission 'r'"):
self.cgroup.validate_all()
else:
self.cgroup.validate_all()
if __name__ == '__main__':
unittest.main()
......@@ -757,6 +757,13 @@ gpvars_assign_gp_resource_manager_policy(const char *newval, bool doit, GucSourc
{
ResourceManagerPolicy newtype = RESOURCE_MANAGER_POLICY_QUEUE;
/*
* Probe resgroup configurations even not in resgroup mode,
* variables like gp_resource_group_enable_cgroup_memory need to
* be properly set in all modes.
*/
ResGroupOps_Probe();
if (newval == NULL || newval[0] == 0)
newtype = RESOURCE_MANAGER_POLICY_QUEUE;
else if (!pg_strcasecmp("queue", newval))
......
......@@ -878,6 +878,29 @@ checkResgroupMemAuditor(int32 memAuditor)
errmsg("memory_auditor should be \"%s\" or \"%s\"",
ResGroupMemAuditorName[RESGROUP_MEMORY_AUDITOR_VMTRACKER],
ResGroupMemAuditorName[RESGROUP_MEMORY_AUDITOR_CGROUP])));
if (memAuditor == RESGROUP_MEMORY_AUDITOR_CGROUP &&
!gp_resource_group_enable_cgroup_memory)
{
/*
* Suppose the user has reconfigured the cgroup dirs by following
* the gpdb documents, could it take effect at runtime (e.g. create
* the resgroup again) without restart the cluster?
*
* It's possible but might not be reliable, as the user might
* introduced unwanted changes to other cgroup dirs during the
* reconfiguration (e.g. changed the permissions, moved processes
* in/out).
*
* So we do not recheck the permissions here.
*/
ereport(ERROR,
(errcode(ERRCODE_GP_FEATURE_NOT_CONFIGURED),
errmsg("cgroup is not properly configured for the 'cgroup' memory auditor"),
errhint("Extra cgroup configurations are required to enable this feature, "
"please refer to the Greenplum Documentations for details")));
}
}
/*
......
......@@ -36,6 +36,19 @@ ResGroupOps_Name(void)
return "unsupported";
}
/*
* Probe the configuration for the OS group implementation.
*
* Return true if everything is OK, or false is some requirements are not
* satisfied. Will not fail in either case.
*/
bool
ResGroupOps_Probe(void)
{
unsupported_system();
return false;
}
/* Check whether the OS group implementation is available and useable */
void
ResGroupOps_Bless(void)
......@@ -69,10 +82,11 @@ ResGroupOps_CreateGroup(Oid group)
/*
* Destroy the OS group for group.
*
* Fail if any process is running under it.
* One OS group can not be dropped if there are processes running under it,
* if migrate is true these processes will be moved out automatically.
*/
void
ResGroupOps_DestroyGroup(Oid group)
ResGroupOps_DestroyGroup(Oid group, bool migrate)
{
unsupported_system();
}
......
......@@ -42,15 +42,43 @@
* So far these operations are mainly for CPU rate limitation and accounting.
*/
#define CGROUP_ERROR_PREFIX "cgroup is not properly configured: "
#define CGROUP_ERROR(...) do { \
elog(ERROR, CGROUP_ERROR_PREFIX __VA_ARGS__); \
} while (false)
#define CGROUP_ERROR(...) elog(ERROR, __VA_ARGS__)
#define CGROUP_CONFIG_ERROR(...) \
CGROUP_ERROR("cgroup is not properly configured: " __VA_ARGS__)
#define PROC_MOUNTS "/proc/self/mounts"
#define MAX_INT_STRING_LEN 20
#define MAX_RETRY 10
/*
* cgroup memory permission is only mandatory on 6.x and master;
* on 5.x we need to make it optional to provide backward compatibilities.
*/
#define CGROUP_MEMORY_IS_OPTIONAL (GP_VERSION_NUM < 60000)
typedef struct PermItem PermItem;
typedef struct PermList PermList;
struct PermItem
{
const char *comp;
const char *prop;
int perm;
};
struct PermList
{
const PermItem *items;
bool optional;
bool *presult;
};
#define foreach_perm_list(i, lists) \
for ((i) = 0; (lists)[(i)].items; (i)++)
#define foreach_perm_item(i, items) \
for ((i) = 0; (items)[(i)].comp; (i)++)
static char * buildPath(Oid group, const char *base, const char *comp, const char *prop, char *path, size_t pathsize);
static int lockDir(const char *path, bool block);
static void unassignGroup(Oid group, const char *comp, int fddir);
......@@ -61,16 +89,87 @@ static size_t readData(const char *path, char *data, size_t datasize);
static void writeData(const char *path, char *data, size_t datasize);
static int64 readInt64(Oid group, const char *base, const char *comp, const char *prop);
static void writeInt64(Oid group, const char *base, const char *comp, const char *prop, int64 x);
static bool permListCheck(const PermList *permlist, Oid group, bool report);
static bool checkPermission(Oid group, bool report);
static void getMemoryInfo(unsigned long *ram, unsigned long *swap);
static void getCgMemoryInfo(uint64 *cgram, uint64 *cgmemsw);
static int getOvercommitRatio(void);
static void detectCgroupMountPoint(void);
static void detectCgroupMemSwap(void);
static bool detectCgroupMountPoint(void);
static Oid currentGroupIdInCGroup = InvalidOid;
static char cgdir[MAXPGPATH];
static bool cgmemswap = false;
bool gp_resource_group_enable_cgroup_memory = false;
bool gp_resource_group_enable_cgroup_swap = false;
/*
* These checks should keep in sync with gpMgmt/bin/gpcheckresgroupimpl
*/
static const PermItem perm_items_cpu[] =
{
{ "cpu", "", R_OK | W_OK | X_OK },
{ "cpu", "cgroup.procs", R_OK | W_OK },
{ "cpu", "cpu.cfs_period_us", R_OK | W_OK },
{ "cpu", "cpu.cfs_quota_us", R_OK | W_OK },
{ "cpu", "cpu.shares", R_OK | W_OK },
{ NULL, NULL, 0 }
};
static const PermItem perm_items_cpu_acct[] =
{
{ "cpuacct", "", R_OK | W_OK | X_OK },
{ "cpuacct", "cgroup.procs", R_OK | W_OK },
{ "cpuacct", "cpuacct.usage", R_OK },
{ "cpuacct", "cpuacct.stat", R_OK },
{ NULL, NULL, 0 }
};
static const PermItem perm_items_memory[] =
{
{ "memory", "", R_OK | W_OK | X_OK },
{ "memory", "memory.limit_in_bytes", R_OK | W_OK },
{ "memory", "memory.usage_in_bytes", R_OK },
{ NULL, NULL, 0 }
};
static const PermItem perm_items_swap[] =
{
{ "memory", "", R_OK | W_OK | X_OK },
{ "memory", "memory.memsw.limit_in_bytes", R_OK | W_OK },
{ "memory", "memory.memsw.usage_in_bytes", R_OK },
{ NULL, NULL, 0 }
};
/*
* Permission groups.
*/
static const PermList permlists[] =
{
/*
* swap permissions are optional.
*
* cgroup/memory/memory.memsw.* is only available if
* - CONFIG_MEMCG_SWAP_ENABLED=on in kernel config, or
* - swapaccount=1 in kernel cmdline.
*
* Without these interfaces the swap usage can not be limited or accounted
* via cgroup.
*/
{ perm_items_swap, true, &gp_resource_group_enable_cgroup_swap },
/*
* memory permissions can be mandatory or optional depends on the switch.
*
* resgroup memory auditor is introduced in 6.0 devel and backported
* to 5.x branch since 5.6.1. To provide backward compatibilities memory
* permissions are optional on 5.x branch.
*/
{ perm_items_memory, CGROUP_MEMORY_IS_OPTIONAL,
&gp_resource_group_enable_cgroup_memory },
/* cpu/cpuacct permissions are mandatory */
{ perm_items_cpu, false, NULL },
{ perm_items_cpu_acct, false, NULL },
{ NULL, false, NULL }
};
/*
* Build path string with parameters.
......@@ -324,7 +423,7 @@ removeDir(Oid group, const char *comp, const char *prop, bool unassign)
{
char path[MAXPGPATH];
size_t pathsize = sizeof(path);
int retry = 0;
int retry = unassign ? 0 : MAX_RETRY - 1;
int fddir;
buildPath(group, NULL, comp, "", path, pathsize);
......@@ -506,66 +605,71 @@ writeInt64(Oid group, const char *base, const char *comp, const char *prop, int6
}
/*
* Check permissions on group's cgroup dir & interface files.
* Check a list of permissions on group.
*
* - if report is true then raise an error on and bad permission,
* otherwise only return false;
* - if all the permissions are met then return true;
* - otherwise:
* - raise an error if report is true and permlist is not optional;
* - or return false;
*/
static bool
checkPermission(Oid group, bool report)
permListCheck(const PermList *permlist, Oid group, bool report)
{
char path[MAXPGPATH];
size_t pathsize = sizeof(path);
const char *comp;
int i;
#define __CHECK(prop, perm) do { \
buildPath(group, NULL, comp, prop, path, pathsize); \
if (access(path, perm)) \
{ \
if (report) \
{ \
CGROUP_ERROR("can't access %s '%s': %s", \
prop[0] ? "file" : "directory", \
path, \
strerror(errno)); \
} \
return false; \
} \
} while (0)
if (group == RESGROUP_ROOT_ID && permlist->presult)
*permlist->presult = false;
/*
* These checks should keep in sync with
* gpMgmt/bin/gpcheckresgroupimpl
*/
foreach_perm_item(i, permlist->items)
{
const char *comp = permlist->items[i].comp;
const char *prop = permlist->items[i].prop;
int perm = permlist->items[i].perm;
comp = "cpu";
buildPath(group, NULL, comp, prop, path, pathsize);
__CHECK("", R_OK | W_OK | X_OK);
__CHECK("cgroup.procs", R_OK | W_OK);
__CHECK("cpu.cfs_period_us", R_OK | W_OK);
__CHECK("cpu.cfs_quota_us", R_OK | W_OK);
__CHECK("cpu.shares", R_OK | W_OK);
if (access(path, perm))
{
/* No such file or directory / Permission denied */
comp = "cpuacct";
if (report && !permlist->optional)
{
CGROUP_CONFIG_ERROR("can't access %s '%s': %s",
prop[0] ? "file" : "directory",
path,
strerror(errno));
}
return false;
}
}
__CHECK("", R_OK | W_OK | X_OK);
__CHECK("cgroup.procs", R_OK | W_OK);
__CHECK("cpuacct.usage", R_OK);
__CHECK("cpuacct.stat", R_OK);
if (group == RESGROUP_ROOT_ID && permlist->presult)
*permlist->presult = true;
comp = "memory";
return true;
}
__CHECK("", R_OK | W_OK | X_OK);
__CHECK("memory.limit_in_bytes", R_OK | W_OK);
__CHECK("memory.usage_in_bytes", R_OK);
/*
* Check permissions on group's cgroup dir & interface files.
*
* - if report is true then raise an error if any mandatory permission
* is not met;
* - otherwise only return false;
*/
static bool
checkPermission(Oid group, bool report)
{
int i;
if (cgmemswap)
foreach_perm_list(i, permlists)
{
__CHECK("memory.memsw.limit_in_bytes", R_OK | W_OK);
__CHECK("memory.memsw.usage_in_bytes", R_OK);
}
const PermList *permlist = &permlists[i];
#undef __CHECK
if (!permListCheck(permlist, group, report) && !permlist->optional)
return false;
}
return true;
}
......@@ -585,20 +689,9 @@ getMemoryInfo(unsigned long *ram, unsigned long *swap)
static void
getCgMemoryInfo(uint64 *cgram, uint64 *cgmemsw)
{
char path[MAXPGPATH];
size_t pathsize = sizeof(path);
*cgram = readInt64(RESGROUP_ROOT_ID, "", "memory", "memory.limit_in_bytes");
/*
* cgroup/memory/memory.memsw.limit_in_bytes is only available if
* CONFIG_MEMCG_SWAP_ENABLED is on in kernel config or
* swapaccount=1 in cmdline. Without this file we have to assume swap is
* unlimited in container.
*/
buildPath(RESGROUP_ROOT_ID, "",
"memory", "memory.memsw.limit_in_bytes", path, pathsize);
if (access(path, R_OK) == 0)
if (gp_resource_group_enable_cgroup_swap)
{
*cgmemsw = readInt64(RESGROUP_ROOT_ID, "",
"memory", "memory.memsw.limit_in_bytes");
......@@ -627,37 +720,19 @@ getOvercommitRatio(void)
return ratio;
}
/*
* detect if cgroup supports swap memory limit
*/
static void
detectCgroupMemSwap(void)
{
char path[MAXPGPATH];
size_t pathsize = sizeof(path);
buildPath(RESGROUP_ROOT_ID, "",
"memory", "memory.memsw.limit_in_bytes", path, pathsize);
if (access(path, F_OK))
cgmemswap = false;
else
cgmemswap = true;
}
/* detect cgroup mount point */
static void
static bool
detectCgroupMountPoint(void)
{
struct mntent *me;
FILE *fp;
if (cgdir[0])
return;
return true;
fp = setmntent(PROC_MOUNTS, "r");
if (fp == NULL)
CGROUP_ERROR("can not open '%s' for read", PROC_MOUNTS);
CGROUP_CONFIG_ERROR("can not open '%s' for read", PROC_MOUNTS);
while ((me = getmntent(fp)))
......@@ -671,7 +746,7 @@ detectCgroupMountPoint(void)
p = strrchr(cgdir, '/');
if (p == NULL)
CGROUP_ERROR("cgroup mount point parse error: %s", cgdir);
CGROUP_CONFIG_ERROR("cgroup mount point parse error: %s", cgdir);
else
*p = 0;
break;
......@@ -679,8 +754,7 @@ detectCgroupMountPoint(void)
endmntent(fp);
if (!cgdir[0])
CGROUP_ERROR("can not find cgroup mount point");
return !!cgdir[0];
}
/* Return the name for the OS group implementation */
......@@ -690,6 +764,41 @@ ResGroupOps_Name(void)
return "cgroup";
}
/*
* Probe the configuration for the OS group implementation.
*
* Return true if everything is OK, or false is some requirements are not
* satisfied. Will not fail in either case.
*/
bool
ResGroupOps_Probe(void)
{
bool result = true;
/*
* We only have to do these checks and initialization once on each host,
* so only let postmaster do the job.
*/
if (IsUnderPostmaster)
return result;
/*
* Ignore the error even if cgroup mount point can not be successfully
* probed, the error will be reported in Bless() later.
*/
if (!detectCgroupMountPoint())
result = false;
/*
* Probe for optional features like the 'cgroup' memory auditor,
* do not raise any errors.
*/
if (!checkPermission(RESGROUP_ROOT_ID, false))
return result;
return result;
}
/* Check whether the OS group implementation is available and useable */
void
ResGroupOps_Bless(void)
......@@ -701,8 +810,18 @@ ResGroupOps_Bless(void)
if (IsUnderPostmaster)
return;
detectCgroupMountPoint();
detectCgroupMemSwap();
/*
* We should have already detected for cgroup mount point in Probe(),
* it was not an error if the detection failed at that step. But once
* we call Bless() we know we want to make use of cgroup then we must
* know the mount point, otherwise it's a critical error.
*/
if (!cgdir[0])
CGROUP_CONFIG_ERROR("can not find cgroup mount point");
/*
* Check again, this time we will fail on unmet requirements.
*/
checkPermission(RESGROUP_ROOT_ID, true);
/*
......@@ -766,7 +885,8 @@ ResGroupOps_CreateGroup(Oid group)
if (!createDir(group, "cpu")
|| !createDir(group, "cpuacct")
|| !createDir(group, "memory"))
|| (gp_resource_group_enable_cgroup_memory &&
!createDir(group, "memory")))
{
CGROUP_ERROR("can't create cgroup for resgroup '%d': %s",
group, strerror(errno));
......@@ -792,14 +912,16 @@ ResGroupOps_CreateGroup(Oid group)
/*
* Destroy the OS group for group.
*
* Fail if any process is running under it.
* One OS group can not be dropped if there are processes running under it,
* if migrate is true these processes will be moved out automatically.
*/
void
ResGroupOps_DestroyGroup(Oid group)
ResGroupOps_DestroyGroup(Oid group, bool migrate)
{
if (!removeDir(group, "cpu", "cpu.shares", true)
|| !removeDir(group, "cpuacct", NULL, true)
|| !removeDir(group, "memory", "memory.limit_in_bytes", true))
if (!removeDir(group, "cpu", "cpu.shares", migrate)
|| !removeDir(group, "cpuacct", NULL, migrate)
|| (gp_resource_group_enable_cgroup_memory &&
!removeDir(group, "memory", "memory.limit_in_bytes", migrate)))
{
CGROUP_ERROR("can't remove cgroup for resgroup '%d': %s",
group, strerror(errno));
......@@ -911,32 +1033,43 @@ ResGroupOps_SetMemoryLimitByValue(Oid group, int32 memory_limit)
const char *comp = "memory";
int64 memory_limit_in_bytes;
if (!gp_resource_group_enable_cgroup_memory)
return;
memory_limit_in_bytes = VmemTracker_ConvertVmemChunksToBytes(memory_limit);
if (cgmemswap == false)
/* Is swap interfaces enabled? */
if (!gp_resource_group_enable_cgroup_swap)
{
/* No, then we only need to setup the memory limit */
writeInt64(group, NULL, comp, "memory.limit_in_bytes",
memory_limit_in_bytes);
}
else
{
/* Yes, then we have to setup both the memory and mem+swap limits */
int64 memory_limit_in_bytes_old;
/*
* Memory limit should always <= mem+swap limit, then the limits
* must be set in a proper order depending on the relation between
* new and old limits.
*/
memory_limit_in_bytes_old = readInt64(group, NULL,
comp, "memory.limit_in_bytes");
if (memory_limit_in_bytes == memory_limit_in_bytes_old)
return;
if (memory_limit_in_bytes > memory_limit_in_bytes_old)
{
/* When new value > old memory limit, write mem+swap limit first */
writeInt64(group, NULL, comp, "memory.memsw.limit_in_bytes",
memory_limit_in_bytes);
writeInt64(group, NULL, comp, "memory.limit_in_bytes",
memory_limit_in_bytes);
}
else
else if (memory_limit_in_bytes < memory_limit_in_bytes_old)
{
/* When new value < old memory limit, write memory limit first */
writeInt64(group, NULL, comp, "memory.limit_in_bytes",
memory_limit_in_bytes);
writeInt64(group, NULL, comp, "memory.memsw.limit_in_bytes",
......@@ -969,7 +1102,13 @@ ResGroupOps_GetMemoryUsage(Oid group)
int64 memory_usage_in_bytes;
char *prop;
prop = cgmemswap ? "memory.memsw.usage_in_bytes" : "memory.usage_in_bytes";
/* Report 0 if cgroup memory is not enabled */
if (!gp_resource_group_enable_cgroup_memory)
return 0;
prop = gp_resource_group_enable_cgroup_swap
? "memory.memsw.usage_in_bytes"
: "memory.usage_in_bytes";
memory_usage_in_bytes = readInt64(group, NULL, comp, prop);
......@@ -987,6 +1126,10 @@ ResGroupOps_GetMemoryLimit(Oid group)
const char *comp = "memory";
int64 memory_limit_in_bytes;
/* Report unlimited (max int32) if cgroup memory is not enabled */
if (!gp_resource_group_enable_cgroup_memory)
return (int32) ((1U << 31) - 1);
memory_limit_in_bytes = readInt64(group, NULL,
comp, "memory.limit_in_bytes");
......
......@@ -648,8 +648,14 @@ ResGroupDropFinish(Oid groupId, bool isCommit)
if (isCommit)
{
bool migrate;
removeGroup(groupId);
ResGroupOps_DestroyGroup(groupId);
/* Only migrate processes out of vmtracker groups */
migrate = group->memAuditor == RESGROUP_MEMORY_AUDITOR_VMTRACKER;
ResGroupOps_DestroyGroup(groupId, migrate);
}
}
PG_CATCH();
......@@ -687,7 +693,7 @@ ResGroupCreateOnAbort(Oid groupId)
savedInterruptHoldoffCount = InterruptHoldoffCount;
removeGroup(groupId);
/* remove the os dependent part for this resource group */
ResGroupOps_DestroyGroup(groupId);
ResGroupOps_DestroyGroup(groupId, true);
}
PG_CATCH();
{
......
......@@ -20,11 +20,12 @@
*/
extern const char * ResGroupOps_Name(void);
extern bool ResGroupOps_Probe(void);
extern void ResGroupOps_Bless(void);
extern void ResGroupOps_Init(void);
extern void ResGroupOps_AdjustGUCs(void);
extern void ResGroupOps_CreateGroup(Oid group);
extern void ResGroupOps_DestroyGroup(Oid group);
extern void ResGroupOps_DestroyGroup(Oid group, bool migrate);
extern void ResGroupOps_AssignGroup(Oid group, int pid);
extern int ResGroupOps_LockGroup(Oid group, const char *comp, bool block);
extern void ResGroupOps_UnLockGroup(Oid group, int fd);
......
......@@ -78,6 +78,12 @@ extern int gp_resource_group_cpu_priority;
extern double gp_resource_group_cpu_limit;
extern double gp_resource_group_memory_limit;
/*
* Non-GUC global variables.
*/
extern bool gp_resource_group_enable_cgroup_memory;
extern bool gp_resource_group_enable_cgroup_swap;
/*
* Resource Group assignment hook.
*
......
-- start_ignore
drop resource group rg1;
drop resource group rg2;
\! gpconfig -c gp_resource_manager -v queue
20180409:14:08:27:030072 gpconfig:nyu-vm-centos:gpadmin-[INFO]:-completed successfully with parameters '-c gp_resource_manager -v queue'
-- end_ignore
\! echo $?
0
-- start_ignore
\! gpstop -rai
20180409:14:08:27:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Starting gpstop with args: -rai
20180409:14:08:27:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Gathering information and validating the environment...
20180409:14:08:27:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Obtaining Greenplum Master catalog information
20180409:14:08:27:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Obtaining Segment details from master...
20180409:14:08:27:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Greenplum Version: 'postgres (Greenplum Database) 5.6.1+dev.26.g8353723c5c build dev'
20180409:14:08:27:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-There are 1 connections to the database
20180409:14:08:27:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Commencing Master instance shutdown with mode='immediate'
20180409:14:08:27:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Master host=nyu-vm-centos
20180409:14:08:27:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Commencing Master instance shutdown with mode=immediate
20180409:14:08:27:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Master segment instance directory=/home/gpadmin/src/gpdb5.git/gpAux/gpdemo/datadirs/qddir/demoDataDir-1
20180409:14:08:28:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Attempting forceful termination of any leftover master process
20180409:14:08:28:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Terminating processes for segment /home/gpadmin/src/gpdb5.git/gpAux/gpdemo/datadirs/qddir/demoDataDir-1
20180409:14:08:28:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Stopping master standby host nyu-vm-centos mode=fast
20180409:14:08:29:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Successfully shutdown standby process on nyu-vm-centos
20180409:14:08:29:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Targeting dbid [2, 5, 3, 6, 4, 7] for shutdown
20180409:14:08:29:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Commencing parallel primary segment instance shutdown, please wait...
20180409:14:08:29:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-0.00% of jobs completed
20180409:14:08:31:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-100.00% of jobs completed
20180409:14:08:31:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Commencing parallel mirror segment instance shutdown, please wait...
20180409:14:08:31:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-0.00% of jobs completed
20180409:14:08:32:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-100.00% of jobs completed
20180409:14:08:32:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-----------------------------------------------------
20180409:14:08:32:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:- Segments stopped successfully = 6
20180409:14:08:32:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:- Segments with errors during stop = 0
20180409:14:08:32:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-----------------------------------------------------
20180409:14:08:32:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Successfully shutdown 6 of 6 segment instances
20180409:14:08:32:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Database successfully shutdown with no errors reported
20180409:14:08:32:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Cleaning up leftover gpmmon process
20180409:14:08:32:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-No leftover gpmmon process found
20180409:14:08:32:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Cleaning up leftover gpsmon processes
20180409:14:08:32:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-No leftover gpsmon processes on some hosts. not attempting forceful termination on these hosts
20180409:14:08:32:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Cleaning up leftover shared memory
20180409:14:08:33:030207 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Restarting System...
-- end_ignore
\! echo $?
0
-- start_ignore
drop resource group rg1;
drop resource group rg2;
-- end_ignore
show gp_resource_manager;
gp_resource_manager
---------------------
group
(1 row)
select * from pg_resgroup
where rsgname not like 'rg_dump_test%'
order by oid;
rsgname | parent
---------------+--------
default_group | 0
admin_group | 0
rg1 | 0
rg2 | 0
(4 rows)
select avg(reslimittype)
from pg_resgroupcapability
where reslimittype <= 1;
avg
-----
1
(1 row)
select groupname from gp_toolkit.gp_resgroup_config
where groupname not like 'rg_dump_test%'
order by groupid;
groupname
---------------
default_group
admin_group
rg1
rg2
(4 rows)
select rsgname from gp_toolkit.gp_resgroup_status
where rsgname not like 'rg_dump_test%'
order by groupid;
rsgname
---------------
default_group
admin_group
rg1
rg2
(4 rows)
alter resource group rg1 set cpu_rate_limit 20;
alter resource group rg1 set cpu_rate_limit 10;
drop resource group rg1;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
alter resource group rg2 set cpu_rate_limit 20;
alter resource group rg2 set cpu_rate_limit 10;
-- TODO: change to the resgroup
show gp_resource_manager;
gp_resource_manager
---------------------
queue
(1 row)
select * from pg_resgroup
where rsgname not like 'rg_dump_test%'
order by oid;
rsgname | parent
---------------+--------
default_group | 0
admin_group | 0
(2 rows)
select avg(reslimittype)
from pg_resgroupcapability
where reslimittype <= 1;
avg
-----
1
(1 row)
select groupname from gp_toolkit.gp_resgroup_config
where groupname not like 'rg_dump_test%'
order by groupid;
groupname
---------------
default_group
admin_group
(2 rows)
select rsgname from gp_toolkit.gp_resgroup_status
where rsgname not like 'rg_dump_test%'
order by groupid;
rsgname
---------
(0 rows)
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
WARNING: resource group is disabled
HINT: To enable set gp_resource_manager=group
alter resource group rg1 set cpu_rate_limit 20;
alter resource group rg1 set cpu_rate_limit 10;
drop resource group rg1;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
WARNING: resource group is disabled
HINT: To enable set gp_resource_manager=group
create resource group rg2 with (cpu_rate_limit=10, memory_limit=10);
WARNING: resource group is disabled
HINT: To enable set gp_resource_manager=group
show gp_resource_manager;
gp_resource_manager
---------------------
group
(1 row)
select * from pg_resgroup
where rsgname not like 'rg_dump_test%'
order by oid;
rsgname | parent
---------------+--------
default_group | 0
admin_group | 0
rg1 | 0
rg2 | 0
(4 rows)
select avg(reslimittype)
from pg_resgroupcapability
where reslimittype <= 1;
avg
-----
1
(1 row)
select groupname from gp_toolkit.gp_resgroup_config
where groupname not like 'rg_dump_test%'
order by groupid;
groupname
---------------
default_group
admin_group
rg1
rg2
(4 rows)
select rsgname from gp_toolkit.gp_resgroup_status
where rsgname not like 'rg_dump_test%'
order by groupid;
rsgname
---------------
default_group
admin_group
rg1
rg2
(4 rows)
alter resource group rg1 set cpu_rate_limit 20;
alter resource group rg1 set cpu_rate_limit 10;
drop resource group rg1;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
alter resource group rg2 set cpu_rate_limit 20;
alter resource group rg2 set cpu_rate_limit 10;
-- TODO: change to the resgroup
show gp_resource_manager;
gp_resource_manager
---------------------
queue
(1 row)
select * from pg_resgroup
where rsgname not like 'rg_dump_test%'
order by oid;
rsgname | parent
---------------+--------
default_group | 0
admin_group | 0
rg1 | 0
rg2 | 0
(4 rows)
select avg(reslimittype)
from pg_resgroupcapability
where reslimittype <= 1;
avg
-----
1
(1 row)
select groupname from gp_toolkit.gp_resgroup_config
where groupname not like 'rg_dump_test%'
order by groupid;
groupname
---------------
default_group
admin_group
rg1
rg2
(4 rows)
select rsgname from gp_toolkit.gp_resgroup_status
where rsgname not like 'rg_dump_test%'
order by groupid;
rsgname
---------
(0 rows)
alter resource group rg1 set cpu_rate_limit 20;
alter resource group rg1 set cpu_rate_limit 10;
drop resource group rg1;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
WARNING: resource group is disabled
HINT: To enable set gp_resource_manager=group
alter resource group rg2 set cpu_rate_limit 20;
alter resource group rg2 set cpu_rate_limit 10;
show gp_resource_manager;
gp_resource_manager
---------------------
group
(1 row)
select * from pg_resgroup
where rsgname not like 'rg_dump_test%'
order by oid;
rsgname | parent
---------------+--------
default_group | 0
admin_group | 0
rg1 | 0
rg2 | 0
(4 rows)
select avg(reslimittype)
from pg_resgroupcapability
where reslimittype <= 1;
avg
-----
1
(1 row)
select groupname from gp_toolkit.gp_resgroup_config
where groupname not like 'rg_dump_test%'
order by groupid;
groupname
---------------
default_group
admin_group
rg1
rg2
(4 rows)
select rsgname from gp_toolkit.gp_resgroup_status
where rsgname not like 'rg_dump_test%'
order by groupid;
rsgname
---------------
default_group
admin_group
rg1
rg2
(4 rows)
alter resource group rg1 set cpu_rate_limit 20;
alter resource group rg1 set cpu_rate_limit 10;
drop resource group rg1;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
alter resource group rg2 set cpu_rate_limit 20;
alter resource group rg2 set cpu_rate_limit 10;
-- TODO: change to the resgroup
show gp_resource_manager;
gp_resource_manager
---------------------
queue
(1 row)
select * from pg_resgroup
where rsgname not like 'rg_dump_test%'
order by oid;
rsgname | parent
---------------+--------
default_group | 0
admin_group | 0
rg1 | 0
rg2 | 0
(4 rows)
select avg(reslimittype)
from pg_resgroupcapability
where reslimittype <= 1;
avg
-----
1
(1 row)
select groupname from gp_toolkit.gp_resgroup_config
where groupname not like 'rg_dump_test%'
order by groupid;
groupname
---------------
default_group
admin_group
rg1
rg2
(4 rows)
select rsgname from gp_toolkit.gp_resgroup_status
where rsgname not like 'rg_dump_test%'
order by groupid;
rsgname
---------
(0 rows)
alter resource group rg1 set cpu_rate_limit 20;
alter resource group rg1 set cpu_rate_limit 10;
drop resource group rg1;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
WARNING: resource group is disabled
HINT: To enable set gp_resource_manager=group
alter resource group rg2 set cpu_rate_limit 20;
alter resource group rg2 set cpu_rate_limit 10;
-- start_ignore
\! gpconfig -c gp_resource_manager -v group
20180409:14:08:40:031018 gpconfig:nyu-vm-centos:gpadmin-[INFO]:-completed successfully with parameters '-c gp_resource_manager -v group'
-- end_ignore
\! echo $?
0
-- start_ignore
\! gpstop -rai
20180409:14:08:41:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Starting gpstop with args: -rai
20180409:14:08:41:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Gathering information and validating the environment...
20180409:14:08:41:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Obtaining Greenplum Master catalog information
20180409:14:08:41:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Obtaining Segment details from master...
20180409:14:08:41:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Greenplum Version: 'postgres (Greenplum Database) 5.6.1+dev.26.g8353723c5c build dev'
20180409:14:08:41:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-There are 1 connections to the database
20180409:14:08:41:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Commencing Master instance shutdown with mode='immediate'
20180409:14:08:41:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Master host=nyu-vm-centos
20180409:14:08:41:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Commencing Master instance shutdown with mode=immediate
20180409:14:08:41:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Master segment instance directory=/home/gpadmin/src/gpdb5.git/gpAux/gpdemo/datadirs/qddir/demoDataDir-1
20180409:14:08:42:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Attempting forceful termination of any leftover master process
20180409:14:08:42:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Terminating processes for segment /home/gpadmin/src/gpdb5.git/gpAux/gpdemo/datadirs/qddir/demoDataDir-1
20180409:14:08:42:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Stopping master standby host nyu-vm-centos mode=fast
20180409:14:08:43:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Successfully shutdown standby process on nyu-vm-centos
20180409:14:08:43:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Targeting dbid [2, 5, 3, 6, 4, 7] for shutdown
20180409:14:08:43:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Commencing parallel primary segment instance shutdown, please wait...
20180409:14:08:43:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-0.00% of jobs completed
20180409:14:08:44:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-100.00% of jobs completed
20180409:14:08:44:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Commencing parallel mirror segment instance shutdown, please wait...
20180409:14:08:44:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-0.00% of jobs completed
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-100.00% of jobs completed
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-----------------------------------------------------
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:- Segments stopped successfully = 6
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:- Segments with errors during stop = 0
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-----------------------------------------------------
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Successfully shutdown 6 of 6 segment instances
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Database successfully shutdown with no errors reported
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Cleaning up leftover gpmmon process
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-No leftover gpmmon process found
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Cleaning up leftover gpsmon processes
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-No leftover gpsmon processes on some hosts. not attempting forceful termination on these hosts
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Cleaning up leftover shared memory
20180409:14:08:46:031194 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Restarting System...
-- end_ignore
\! echo $?
0
-- start_ignore
\! gpconfig -c gp_resource_manager -v queue
20180409:14:57:19:021894 gpconfig:nyu-vm-centos:gpadmin-[INFO]:-completed successfully with parameters '-c gp_resource_manager -v queue'
-- end_ignore
\! echo $?
0
-- start_ignore
\! gpstop -rai
20180409:14:57:20:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Starting gpstop with args: -rai
20180409:14:57:20:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Gathering information and validating the environment...
20180409:14:57:20:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Obtaining Greenplum Master catalog information
20180409:14:57:20:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Obtaining Segment details from master...
20180409:14:57:20:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Greenplum Version: 'postgres (Greenplum Database) 5.6.1+dev.26.g8353723c5c build dev'
20180409:14:57:20:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-There are 1 connections to the database
20180409:14:57:20:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Commencing Master instance shutdown with mode='immediate'
20180409:14:57:20:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Master host=nyu-vm-centos
20180409:14:57:20:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Commencing Master instance shutdown with mode=immediate
20180409:14:57:20:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Master segment instance directory=/home/gpadmin/src/gpdb5.git/gpAux/gpdemo/datadirs/qddir/demoDataDir-1
20180409:14:57:21:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Attempting forceful termination of any leftover master process
20180409:14:57:21:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Terminating processes for segment /home/gpadmin/src/gpdb5.git/gpAux/gpdemo/datadirs/qddir/demoDataDir-1
20180409:14:57:21:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Stopping master standby host nyu-vm-centos mode=fast
20180409:14:57:22:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Successfully shutdown standby process on nyu-vm-centos
20180409:14:57:22:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Targeting dbid [2, 5, 3, 6, 4, 7] for shutdown
20180409:14:57:22:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Commencing parallel primary segment instance shutdown, please wait...
20180409:14:57:22:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-0.00% of jobs completed
20180409:14:57:24:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-100.00% of jobs completed
20180409:14:57:24:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Commencing parallel mirror segment instance shutdown, please wait...
20180409:14:57:24:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-0.00% of jobs completed
20180409:14:57:25:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-100.00% of jobs completed
20180409:14:57:25:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-----------------------------------------------------
20180409:14:57:25:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:- Segments stopped successfully = 6
20180409:14:57:25:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:- Segments with errors during stop = 0
20180409:14:57:25:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-----------------------------------------------------
20180409:14:57:25:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Successfully shutdown 6 of 6 segment instances
20180409:14:57:25:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Database successfully shutdown with no errors reported
20180409:14:57:25:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Cleaning up leftover gpmmon process
20180409:14:57:25:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-No leftover gpmmon process found
20180409:14:57:25:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Cleaning up leftover gpsmon processes
20180409:14:57:25:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-No leftover gpsmon processes on some hosts. not attempting forceful termination on these hosts
20180409:14:57:25:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Cleaning up leftover shared memory
20180409:14:57:26:022030 gpstop:nyu-vm-centos:gpadmin-[INFO]:-Restarting System...
-- end_ignore
\! echo $?
0
test: pg_dumpall_current
test: gpcheckcat
test: resgroup_cleanup_basic
test: resgroup_current_1_queue
test: resgroup_cleanup
test: pg_dumpall_current
test: gpcheckcat
test: resgroup_current_1_queue
test: resgroup_switch_group
test: resgroup_current_1_group
......@@ -3,3 +3,5 @@ test: diff_dumps
test: inserts
test: pg_dumpall_other
test: gpcheckcat
test: resgroup_other_2_queue
test: resgroup_switch_queue
test: pg_dumpall_other
test: diff_dumps
test: pg_dumpall_other
test: gpcheckcat
test: resgroup_other_2_queue
test: resgroup_switch_group
test: resgroup_other_2_group
......@@ -3,3 +3,6 @@ test: diff_dumps
test: inserts
test: pg_dumpall_current
test: gpcheckcat
test: resgroup_current_3_queue
test: resgroup_cleanup_basic
test: resgroup_switch_queue
test: pg_dumpall_current
test: diff_dumps
test: pg_dumpall_current
test: gpcheckcat
test: resgroup_current_3_queue
test: resgroup_switch_group
test: resgroup_current_3_group
test: resgroup_cleanup
-- start_ignore
drop resource group rg1;
drop resource group rg2;
\! gpconfig -c gp_resource_manager -v queue
-- end_ignore
\! echo $?
-- start_ignore
\! gpstop -rai
-- end_ignore
\! echo $?
-- start_ignore
drop resource group rg1;
drop resource group rg2;
-- end_ignore
show gp_resource_manager;
select * from pg_resgroup
where rsgname not like 'rg_dump_test%'
order by oid;
select avg(reslimittype)
from pg_resgroupcapability
where reslimittype <= 1;
select groupname from gp_toolkit.gp_resgroup_config
where groupname not like 'rg_dump_test%'
order by groupid;
select rsgname from gp_toolkit.gp_resgroup_status
where rsgname not like 'rg_dump_test%'
order by groupid;
alter resource group rg1 set cpu_rate_limit 20;
alter resource group rg1 set cpu_rate_limit 10;
drop resource group rg1;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
alter resource group rg2 set cpu_rate_limit 20;
alter resource group rg2 set cpu_rate_limit 10;
-- TODO: change to the resgroup
show gp_resource_manager;
select * from pg_resgroup
where rsgname not like 'rg_dump_test%'
order by oid;
select avg(reslimittype)
from pg_resgroupcapability
where reslimittype <= 1;
select groupname from gp_toolkit.gp_resgroup_config
where groupname not like 'rg_dump_test%'
order by groupid;
select rsgname from gp_toolkit.gp_resgroup_status
where rsgname not like 'rg_dump_test%'
order by groupid;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
alter resource group rg1 set cpu_rate_limit 20;
alter resource group rg1 set cpu_rate_limit 10;
drop resource group rg1;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
create resource group rg2 with (cpu_rate_limit=10, memory_limit=10);
show gp_resource_manager;
select * from pg_resgroup
where rsgname not like 'rg_dump_test%'
order by oid;
select avg(reslimittype)
from pg_resgroupcapability
where reslimittype <= 1;
select groupname from gp_toolkit.gp_resgroup_config
where groupname not like 'rg_dump_test%'
order by groupid;
select rsgname from gp_toolkit.gp_resgroup_status
where rsgname not like 'rg_dump_test%'
order by groupid;
alter resource group rg1 set cpu_rate_limit 20;
alter resource group rg1 set cpu_rate_limit 10;
drop resource group rg1;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
alter resource group rg2 set cpu_rate_limit 20;
alter resource group rg2 set cpu_rate_limit 10;
-- TODO: change to the resgroup
show gp_resource_manager;
select * from pg_resgroup
where rsgname not like 'rg_dump_test%'
order by oid;
select avg(reslimittype)
from pg_resgroupcapability
where reslimittype <= 1;
select groupname from gp_toolkit.gp_resgroup_config
where groupname not like 'rg_dump_test%'
order by groupid;
select rsgname from gp_toolkit.gp_resgroup_status
where rsgname not like 'rg_dump_test%'
order by groupid;
alter resource group rg1 set cpu_rate_limit 20;
alter resource group rg1 set cpu_rate_limit 10;
drop resource group rg1;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
alter resource group rg2 set cpu_rate_limit 20;
alter resource group rg2 set cpu_rate_limit 10;
show gp_resource_manager;
select * from pg_resgroup
where rsgname not like 'rg_dump_test%'
order by oid;
select avg(reslimittype)
from pg_resgroupcapability
where reslimittype <= 1;
select groupname from gp_toolkit.gp_resgroup_config
where groupname not like 'rg_dump_test%'
order by groupid;
select rsgname from gp_toolkit.gp_resgroup_status
where rsgname not like 'rg_dump_test%'
order by groupid;
alter resource group rg1 set cpu_rate_limit 20;
alter resource group rg1 set cpu_rate_limit 10;
drop resource group rg1;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
alter resource group rg2 set cpu_rate_limit 20;
alter resource group rg2 set cpu_rate_limit 10;
-- TODO: change to the resgroup
show gp_resource_manager;
select * from pg_resgroup
where rsgname not like 'rg_dump_test%'
order by oid;
select avg(reslimittype)
from pg_resgroupcapability
where reslimittype <= 1;
select groupname from gp_toolkit.gp_resgroup_config
where groupname not like 'rg_dump_test%'
order by groupid;
select rsgname from gp_toolkit.gp_resgroup_status
where rsgname not like 'rg_dump_test%'
order by groupid;
alter resource group rg1 set cpu_rate_limit 20;
alter resource group rg1 set cpu_rate_limit 10;
drop resource group rg1;
create resource group rg1 with (cpu_rate_limit=10, memory_limit=10);
alter resource group rg2 set cpu_rate_limit 20;
alter resource group rg2 set cpu_rate_limit 10;
-- start_ignore
\! gpconfig -c gp_resource_manager -v group
-- end_ignore
\! echo $?
-- start_ignore
\! gpstop -rai
-- end_ignore
\! echo $?
-- start_ignore
\! gpconfig -c gp_resource_manager -v queue
-- end_ignore
\! echo $?
-- start_ignore
\! gpstop -rai
-- end_ignore
\! echo $?
......@@ -54,10 +54,11 @@ usage()
echo " -c <dir> Greenplum install path for current binary to test upgrade/downgrade to (Default: \$GPHOME)"
echo " -m <dir> Greenplum Master Data Directory (Default: \$MASTER_DATA_DIRECTORY)"
echo " -p <port> Greenplum Master Port (Default: \$PGPORT)"
echo " -v <variant> Variant of the test plan (Default: '')"
exit 0
}
while getopts ":c:b:m:p" opt; do
while getopts ":c:b:m:p:v:" opt; do
case ${opt} in
c)
GPHOME_CURRENT=$OPTARG
......@@ -71,6 +72,9 @@ while getopts ":c:b:m:p" opt; do
p)
PGPORT_CURRENT=$OPTARG
;;
v)
VARIANT=$OPTARG
;;
*)
usage
;;
......@@ -105,6 +109,13 @@ if [ "${PGPORT_CURRENT}x" == "x" ]; then
exit 1
fi
if ! [ -e schedule1${VARIANT} -a \
-e schedule2${VARIANT} -a \
-e schedule3${VARIANT} ]; then
echo "Use -v to provide a valid variant of the test plan (Default: '')"
exit 1
fi
## Grab the Greenplum versions of each binary for display
CURRENT_VERSION=`$GPHOME_CURRENT/bin/gpstart --version | awk '{ for (i=3; i<NF; i++) printf $i " "; print $NF }'`
OTHER_VERSION=`$GPHOME_OTHER/bin/gpstart --version | awk '{ for (i=3; i<NF; i++) printf $i " "; print $NF }'`
......@@ -115,6 +126,7 @@ echo "Current binaries: ${CURRENT_VERSION}"
echo " ${GPHOME_CURRENT}"
echo " Other binaries: ${OTHER_VERSION}"
echo " ${GPHOME_OTHER}"
echo " Variant: ${VARIANT}"
echo "=================================================="
## Clean our directory of any previous test output
......@@ -122,7 +134,7 @@ clean_output
## Start/restart current Greenplum and do initial dump to compare against
start_binary $GPHOME_CURRENT
run_tests schedule1
run_tests schedule1${VARIANT}
## Change the binary, dump, and then compare the two dumps generated
## by both binaries. Then we do some inserts and dump again. We source
......@@ -134,13 +146,13 @@ run_tests schedule1
## test.
start_binary $GPHOME_OTHER
source $GPHOME_CURRENT/greenplum_path.sh
run_tests schedule2
run_tests schedule2${VARIANT}
## Change the binary back, dump, and then compare the two new dumps
## generated by both binaries. Then we do some inserts and check to see
## if dump still works fine.
start_binary $GPHOME_CURRENT
run_tests schedule3
run_tests schedule3${VARIANT}
## Print unnecessary success output
echo "SUCCESS! Provided binaries are swappable."
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册