""" 演示RDD的reduceByKey成员方法的使用 """ from pyspark import SparkConf, SparkContext import os os.environ['PYSPARK_PYTHON'] = "D:/dev/python/python310/python.exe" conf = SparkConf().setMaster("local[*]").setAppName("test_spark") sc = SparkContext(conf=conf) # 准备一个RDD rdd = sc.parallelize([('男', 99), ('男', 88), ('女', 99), ('女', 66)]) # 求男生和女生两个组的成绩之和 rdd2 = rdd.reduceByKey(lambda a, b: a + b) print(rdd2.collect())