提交 daad5778 编写于 作者: S Stephan Ewen

[hotfix] Add comments to BroadcastVariableInitializer and SplittableIterator

上级 f414e55e
......@@ -18,7 +18,66 @@
package org.apache.flink.api.common.functions;
/**
* A broadcast variable initializer can be used to transform a broadcast variable
* into another format during initialization. The transformed variable is shared
* among the parallel instances of a function inside one TaskManager, the
* same way as the plain broadcast variables (lists) are shared.
*
* <p>The broadcast variable initializer will in many cases only be executed by one
* parallel function instance per TaskManager, when acquiring the broadcast variable
* for the first time inside that particular TaskManager. It is possible that a
* broadcast variable is read and initialized multiple times, if the tasks that use
* the variables are not overlapping in their execution time; in such cases, it can
* happen that one function instance released the broadcast variable, and another
* function instance materializes it again.
*
* <p>This is an example of how to use a broadcast variable initializer, transforming
* the shared list of values into a shared map.
*
* <pre>{@code
* public class MyFunction extends RichMapFunction<Long, String> {
*
* private Map<Long, String> map;
*
* public void open(Configuration cfg) throws Exception {
* getRuntimeContext().getBroadcastVariableWithInitializer("mapvar",
* new BroadcastVariableInitializer<Tuple2<Long, String>, Map<Long, String>>() {
*
* public Map<Long, String> initializeBroadcastVariable(Iterable<Tuple2<Long, String>> data) {
* Map<Long, String> map = new HashMap<>();
*
* for (Tuple2<Long, String> t : data) {
* map.put(t.f0, t.f1);
* }
*
* return map;
* }
* });
* }
*
* public String map(Long value) {
* // replace the long by the String, based on the map
* return map.get(value);
* }
* }
*
* }</pre>
*
* @param <T> The type of the elements in the list of the original untransformed broadcast variable.
* @param <O> The type of the transformed broadcast variable.
*/
public interface BroadcastVariableInitializer<T, O> {
/**
* The method that reads the data elements from the broadcast variable and
* creates the transformed data structure.
*
* <p>The iterable with the data elements can be traversed only once, i.e.,
* only the first call to {@code iterator()} will succeed.
*
* @param data The sequence of elements in the broadcast variable.
* @return The transformed broadcast variable.
*/
O initializeBroadcastVariable(Iterable<T> data);
}
......@@ -16,10 +16,8 @@
* limitations under the License.
*/
package org.apache.flink.core.memory;
/**
* Interface describing entities that can provide memory segments.
*/
......
......@@ -29,13 +29,16 @@ public class NumberSequenceIterator extends SplittableIterator<Long> {
private static final long serialVersionUID = 1L;
/** The last number returned by the iterator */
private final long to;
/** The next number to be returned */
private long current;
/**
* Internal constructor to allow for empty iterators.
* Creates a new splittable iterator, returning the range [from, to].
* Both boundaries of the interval are inclusive.
*
* @param from The first number returned by the iterator.
* @param to The last number returned by the iterator.
......
......@@ -21,13 +21,33 @@ package org.apache.flink.util;
import java.io.Serializable;
import java.util.Iterator;
/**
* Abstract base class for iterators that can split themselves into multiple disjoint
* iterators. The union of these iterators returns the original iterator values.
*
* @param <T> The type of elements returned by the iterator.
*/
public abstract class SplittableIterator<T> implements Iterator<T>, Serializable {
private static final long serialVersionUID = 200377674313072307L;
/**
* Splits this iterator into a number disjoint iterators.
* The union of these iterators returns the original iterator values.
*
* @param numPartitions The number of iterators to split into.
* @return An array with the split iterators.
*/
public abstract Iterator<T>[] split(int numPartitions);
/**
* Splits this iterator into <i>n</i> partitions and returns the <i>i-th</i> partition
* out of those.
*
* @param num The partition to return (<i>i</i>).
* @param numPartitions The number of partitions to split into (<i>n</i>).
* @return The iterator for the partition.
*/
public Iterator<T> getSplit(int num, int numPartitions) {
if (numPartitions < 1 || num < 0 || num >= numPartitions) {
throw new IllegalArgumentException();
......@@ -36,5 +56,10 @@ public abstract class SplittableIterator<T> implements Iterator<T>, Serializable
return split(numPartitions)[num];
}
/**
* The maximum number of splits into which this iterator can be split up.
*
* @return The maximum number of splits into which this iterator can be split up.
*/
public abstract int getMaximumNumberOfSplits();
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册