Improve synchronized shuffle in datasets (#8325)

* Improve synchronized shuffle * pep8

Improve synchronized shuffle in datasets (#8325)
* Improve synchronized shuffle * pep8
ee4b0bcc · Oleg Zabluda · François Chollet · de17ff3b · ee4b0bcc · ee4b0bcc
隐藏空白更改
内联并排

Showing with 17 addition and 15 deletion

keras/datasets/boston_housing.py keras/datasets/boston_housing.py +4 -3

keras/datasets/imdb.py keras/datasets/imdb.py +9 -9

keras/datasets/reuters.py keras/datasets/reuters.py +4 -3

未找到文件。
--- a/keras/datasets/boston_housing.py
+++ b/keras/datasets/boston_housing.py
@@ -25,9 +25,10 @@ def load_data(path='boston_housing.npz', test_split=0.2, seed=113):
    f.close()

    np.random.seed(seed)
-    np.random.shuffle(x)
-    np.random.seed(seed)
-    np.random.shuffle(y)
+    indices = np.arange(len(x))
+    np.random.shuffle(indices)
+    x = x[indices]
+    y = y[indices]

    x_train = np.array(x[:int(len(x) * (1 - test_split))])
    y_train = np.array(y[:int(len(x) * (1 - test_split))])

--- a/keras/datasets/imdb.py
+++ b/keras/datasets/imdb.py
@@ -55,15 +55,15 @@ def load_data(path='imdb.npz', num_words=None, skip_top=0,
        x_train, labels_train = f['x_train'], f['y_train']
        x_test, labels_test = f['x_test'], f['y_test']

-    np.random.seed(seed)
-    np.random.shuffle(x_train)
-    np.random.seed(seed)
-    np.random.shuffle(labels_train)
-
-    np.random.seed(seed * 2)
-    np.random.shuffle(x_test)
-    np.random.seed(seed * 2)
-    np.random.shuffle(labels_test)
+    indices = np.arange(len(x_train))
+    np.random.shuffle(indices)
+    x_train = x_train[indices]
+    labels_train = labels_train[indices]
+
+    indices = np.arange(len(x_test))
+    np.random.shuffle(indices)
+    x_test = x_test[indices]
+    labels_test = labels_test[indices]

    xs = np.concatenate([x_train, x_test])
    labels = np.concatenate([labels_train, labels_test])

--- a/keras/datasets/reuters.py
+++ b/keras/datasets/reuters.py
@@ -53,9 +53,10 @@ def load_data(path='reuters.npz', num_words=None, skip_top=0,
        xs, labels = f['x'], f['y']

    np.random.seed(seed)
-    np.random.shuffle(xs)
-    np.random.seed(seed)
-    np.random.shuffle(labels)
+    indices = np.arange(len(xs))
+    np.random.shuffle(indices)
+    xs = xs[indices]
+    labels = labels[indices]

    if start_char is not None:
        xs = [[start_char] + [w + index_from for w in x] for x in xs]