diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
index 0bd720ea054d4b5225d59f6975f5c4fbfe3db033..8cecf3576bf7fbdfddc972ce41f3dd1db3861826 100644
--- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
@@ -2477,10 +2477,11 @@ CTranslatorRelcacheToDXL::PimdobjColStats
 
 	CDouble dNDVBuckets(0.0);
 	CDouble dFreqBuckets(0.0);
+	CDouble dDistinctRemain(0.0);
+	CDouble dFreqRemain(0.0);
 
 	// We only want to create statistics buckets if the column is NOT a text, varchar, char or bpchar type
 	// For the above column types we will use NDVRemain and NullFreq to do cardinality estimation.
-
 	if (CTranslatorUtils::FCreateStatsBucket(oidAttType))
 	{
 		// transform all the bits and pieces from pg_statistic
@@ -2511,18 +2512,23 @@ CTranslatorRelcacheToDXL::PimdobjColStats
 
 		CUtils::AddRefAppend(pdrgpdxlbucket, pdrgpdxlbucketTransformed);
 		pdrgpdxlbucketTransformed->Release();
-	}
 
-	// there will be remaining tuples if the merged histogram and the NULLS do not cover
-	// the total number of distinct values
-	CDouble dDistinctRemain(0.0);
-	CDouble dFreqRemain(0.0);
-
- 	if ((1 - CStatistics::DEpsilon > dFreqBuckets + dNullFrequency) &&
-	 	(0 < dDistinct - dNDVBuckets - iNullNDV))
+		// there will be remaining tuples if the merged histogram and the NULLS do not cover
+		// the total number of distinct values
+		if ((1 - CStatistics::DEpsilon > dFreqBuckets + dNullFrequency) &&
+			(0 < dDistinct - dNDVBuckets - iNullNDV))
+		{
+			dDistinctRemain = std::max(CDouble(0.0), (dDistinct - dNDVBuckets - iNullNDV));
+			dFreqRemain = std::max(CDouble(0.0), (1 - dFreqBuckets - dNullFrequency));
+		}
+	}
+	else
 	{
- 		dDistinctRemain = std::max(CDouble(0.0), (dDistinct - dNDVBuckets - iNullNDV));
- 		dFreqRemain = std::max(CDouble(0.0), (1 - dFreqBuckets - dNullFrequency));
+		// in case of text, varchar, char or bpchar, there are no stats buckets, so the
+		// remaining frequency is everything excluding NULLs, and distinct remaining is the
+		// stadistinct as available in pg_statistic
+		dDistinctRemain = dDistinct;
+ 		dFreqRemain = 1 - dNullFrequency;
 	}
 
 	// free up allocated datum and float4 arrays
diff --git a/src/test/regress/expected/bfv_statistic.out b/src/test/regress/expected/bfv_statistic.out
index 9d58cf26f26fa3743406e40c981184ff28f2cf11..cee4632939d678b529f70eba7aba711486959b24 100644
--- a/src/test/regress/expected/bfv_statistic.out
+++ b/src/test/regress/expected/bfv_statistic.out
@@ -447,3 +447,49 @@ SELECT * FROM test_broken_stats t1, good_tab t2 WHERE t1.b = t2.b;
 (0 rows)
 
 RESET allow_system_table_mods;
+-- cardinality estimation for join on varchar, text, char and bpchar columns must account for FreqRemaining and NDVRemaining
+-- resulting in better cardinality numbers for the joins in orca
+-- start_ignore
+DROP TABLE IF EXISTS test_join_card1;
+NOTICE:  table "test_join_card1" does not exist, skipping
+DROP TABLE IF EXISTS test_join_card2;
+NOTICE:  table "test_join_card2" does not exist, skipping
+-- end_ignore
+CREATE TABLE test_join_card1 (a varchar, b varchar);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE TABLE test_join_card2 (a varchar, b varchar);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE TABLE test_join_card3 (a varchar, b varchar);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO test_join_card1 SELECT i::text, i::text FROM generate_series(1, 20000)i;
+INSERT INTO test_join_card2 SELECT i::text, NULL FROM generate_series(1, 179)i;
+INSERT INTO test_join_card2 SELECT 1::text, 'a' FROM generate_series(1, 5820)i;
+INSERT INTO test_join_card3 SELECT i::text, i::text FROM generate_series(1,10000)i;
+ANALYZE test_join_card1;
+ANALYZE test_join_card2;
+ANALYZE test_join_card3;
+EXPLAIN SELECT * FROM test_join_card1 t1, test_join_card2 t2, test_join_card3 t3 WHERE t1.b = t2.b and t3.b = t2.b;
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice3; segments: 3)  (cost=717.00..1479.01 rows=2910 width=22)
+   ->  Hash Join  (cost=717.00..1479.01 rows=970 width=22)
+         Hash Cond: t2.b::text = t1.b::text
+         ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=240.00..907.44 rows=5820 width=12)
+               ->  Hash Join  (cost=240.00..674.64 rows=1940 width=12)
+                     Hash Cond: t2.b::text = t3.b::text
+                     ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..308.95 rows=5999 width=4)
+                           ->  Seq Scan on test_join_card2 t2  (cost=0.00..68.99 rows=2000 width=4)
+                     ->  Hash  (cost=115.00..115.00 rows=3334 width=8)
+                           ->  Seq Scan on test_join_card3 t3  (cost=0.00..115.00 rows=3334 width=8)
+         ->  Hash  (cost=227.00..227.00 rows=6667 width=10)
+               ->  Seq Scan on test_join_card1 t1  (cost=0.00..227.00 rows=6667 width=10)
+ Optimizer: legacy query optimizer
+(13 rows)
+
+-- start_ignore
+DROP TABLE IF EXISTS test_join_card1;
+DROP TABLE IF EXISTS test_join_card2;
+-- end_ignore
diff --git a/src/test/regress/expected/bfv_statistic_optimizer.out b/src/test/regress/expected/bfv_statistic_optimizer.out
index 9f1b53fdd02f64bae628cf3087a6315d0ab944fe..6b3817a0c685dc9e1ae60207208ecfb34a3a118c 100644
--- a/src/test/regress/expected/bfv_statistic_optimizer.out
+++ b/src/test/regress/expected/bfv_statistic_optimizer.out
@@ -447,3 +447,53 @@ SELECT * FROM test_broken_stats t1, good_tab t2 WHERE t1.b = t2.b;
 (0 rows)
 
 RESET allow_system_table_mods;
+-- cardinality estimation for join on varchar, text, char and bpchar columns must account for FreqRemaining and NDVRemaining
+-- resulting in better cardinality numbers for the joins in orca
+-- start_ignore
+DROP TABLE IF EXISTS test_join_card1;
+NOTICE:  table "test_join_card1" does not exist, skipping
+DROP TABLE IF EXISTS test_join_card2;
+NOTICE:  table "test_join_card2" does not exist, skipping
+-- end_ignore
+CREATE TABLE test_join_card1 (a varchar, b varchar);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE TABLE test_join_card2 (a varchar, b varchar);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE TABLE test_join_card3 (a varchar, b varchar);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO test_join_card1 SELECT i::text, i::text FROM generate_series(1, 20000)i;
+INSERT INTO test_join_card2 SELECT i::text, NULL FROM generate_series(1, 179)i;
+INSERT INTO test_join_card2 SELECT 1::text, 'a' FROM generate_series(1, 5820)i;
+INSERT INTO test_join_card3 SELECT i::text, i::text FROM generate_series(1,10000)i;
+ANALYZE test_join_card1;
+ANALYZE test_join_card2;
+ANALYZE test_join_card3;
+EXPLAIN SELECT * FROM test_join_card1 t1, test_join_card2 t2, test_join_card3 t3 WHERE t1.b = t2.b and t3.b = t2.b;
+                                                  QUERY PLAN                                                   
+---------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice4; segments: 3)  (cost=0.00..1297.58 rows=5999 width=22)
+   ->  Hash Join  (cost=0.00..1297.09 rows=2000 width=22)
+         Hash Cond: test_join_card2.b::text = test_join_card3.b::text
+         ->  Hash Join  (cost=0.00..864.43 rows=2000 width=14)
+               Hash Cond: test_join_card1.b::text = test_join_card2.b::text
+               ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.48 rows=6667 width=10)
+                     Hash Key: test_join_card1.b::text
+                     ->  Table Scan on test_join_card1  (cost=0.00..431.15 rows=6667 width=10)
+               ->  Hash  (cost=431.08..431.08 rows=2000 width=4)
+                     ->  Redistribute Motion 3:3  (slice2; segments: 3)  (cost=0.00..431.08 rows=2000 width=4)
+                           Hash Key: test_join_card2.b
+                           ->  Table Scan on test_join_card2  (cost=0.00..431.04 rows=2000 width=4)
+         ->  Hash  (cost=431.20..431.20 rows=3334 width=8)
+               ->  Redistribute Motion 3:3  (slice3; segments: 3)  (cost=0.00..431.20 rows=3334 width=8)
+                     Hash Key: test_join_card3.b::text
+                     ->  Table Scan on test_join_card3  (cost=0.00..431.07 rows=3334 width=8)
+ Optimizer: PQO version 2.56.0
+(17 rows)
+
+-- start_ignore
+DROP TABLE IF EXISTS test_join_card1;
+DROP TABLE IF EXISTS test_join_card2;
+-- end_ignore
diff --git a/src/test/regress/sql/bfv_statistic.sql b/src/test/regress/sql/bfv_statistic.sql
index 0f18318f8de91383f894903e50c6448b9f8632f4..693fd963e391e3d8496f5062c3bdfa59ddb4e884 100644
--- a/src/test/regress/sql/bfv_statistic.sql
+++ b/src/test/regress/sql/bfv_statistic.sql
@@ -277,3 +277,25 @@ UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='bfv_statist
 SELECT * FROM test_broken_stats t1, good_tab t2 WHERE t1.b = t2.b;
 
 RESET allow_system_table_mods;
+
+-- cardinality estimation for join on varchar, text, char and bpchar columns must account for FreqRemaining and NDVRemaining
+-- resulting in better cardinality numbers for the joins in orca
+-- start_ignore
+DROP TABLE IF EXISTS test_join_card1;
+DROP TABLE IF EXISTS test_join_card2;
+-- end_ignore
+CREATE TABLE test_join_card1 (a varchar, b varchar);
+CREATE TABLE test_join_card2 (a varchar, b varchar);
+CREATE TABLE test_join_card3 (a varchar, b varchar);
+INSERT INTO test_join_card1 SELECT i::text, i::text FROM generate_series(1, 20000)i;
+INSERT INTO test_join_card2 SELECT i::text, NULL FROM generate_series(1, 179)i;
+INSERT INTO test_join_card2 SELECT 1::text, 'a' FROM generate_series(1, 5820)i;
+INSERT INTO test_join_card3 SELECT i::text, i::text FROM generate_series(1,10000)i;
+ANALYZE test_join_card1;
+ANALYZE test_join_card2;
+ANALYZE test_join_card3;
+EXPLAIN SELECT * FROM test_join_card1 t1, test_join_card2 t2, test_join_card3 t3 WHERE t1.b = t2.b and t3.b = t2.b;
+-- start_ignore
+DROP TABLE IF EXISTS test_join_card1;
+DROP TABLE IF EXISTS test_join_card2;
+-- end_ignore