提交 e03e60da 编写于 作者: J jaoki 提交者: Fabian Hueske

[FLINK-3059] Improve JavaDocs for DataSet.writeAsText()

Currently the JavaDocs of writeAsText() state it simply generates a file,
but this is not always true and it depends on the environment configuration.
This commit improves the JavaDocs of writeAsText().

This closes #1392
上级 c787a037
......@@ -1343,10 +1343,49 @@ public abstract class DataSet<T> {
// --------------------------------------------------------------------------------------------
/**
* Writes a DataSet as a text file to the specified location.<br>
* For each element of the DataSet the result of {@link Object#toString()} is written.
* Writes a DataSet as text file(s) to the specified location.<br>
* For each element of the DataSet the result of {@link Object#toString()} is written.<br/>
* <br/>
* <span class="strong">Output files and directories</span><br/>
* What output how writeAsText() method produces is depending on other circumstance
* <ul>
* <li>
* A directory is created and multiple files are written underneath. (Default behavior)<br/>
* This sink creates a directory called "path1", and files "1", "2" ... are writen underneath depending on <a href="https://flink.apache.org/faq.html#what-is-the-parallelism-how-do-i-set-it">parallelism</a>
* <pre>{@code .
* └── path1/
* ├── 1
* ├── 2
* └── ...}</pre>
* Code Example
* <pre>{@code dataset.writeAsText("file:///path1");}</pre>
* </li>
* <li>
* A single file called "path1" is created when parallelism is set to 1
* <pre>{@code .
* └── path1 }</pre>
* Code Example
* <pre>{@code // Parallelism is set to only this particular operation
*dataset.writeAsText("file:///path1").setParallelism(1);
*
* // This will creates the same effect but note all operators' parallelism are set to one
*env.setParallelism(1);
*...
*dataset.writeAsText("file:///path1"); }</pre>
* </li>
* <li>
* A directory is always created when <a href="https://ci.apache.org/projects/flink/flink-docs-master/setup/config.html#file-systems">fs.output.always-create-directory</a>
* is set to true in flink-conf.yaml file, even when parallelism is set to 1.
* <pre>{@code .
* └── path1/
* └── 1 }</pre>
* Code Example
* <pre>{@code // fs.output.always-create-directory = true
*dataset.writeAsText("file:///path1").setParallelism(1); }</pre>
* </li>
* </ul>
*
* @param filePath The path pointing to the location the text file is written to.
* @param filePath The path pointing to the location the text file or files under the directory is written to.
* @return The DataSink that writes the DataSet.
*
* @see TextOutputFormat
......@@ -1356,7 +1395,7 @@ public abstract class DataSet<T> {
}
/**
* Writes a DataSet as a text file to the specified location.<br>
* Writes a DataSet as text file(s) to the specified location.<br>
* For each element of the DataSet the result of {@link Object#toString()} is written.
*
* @param filePath The path pointing to the location the text file is written to.
......@@ -1364,6 +1403,7 @@ public abstract class DataSet<T> {
* @return The DataSink that writes the DataSet.
*
* @see TextOutputFormat
* @see DataSet#writeAsText(String) Output files and directories
*/
public DataSink<T> writeAsText(String filePath, WriteMode writeMode) {
TextOutputFormat<T> tof = new TextOutputFormat<T>(new Path(filePath));
......@@ -1372,7 +1412,7 @@ public abstract class DataSet<T> {
}
/**
* Writes a DataSet as a text file to the specified location.<br>
* Writes a DataSet as text file(s) to the specified location.<br>
* For each element of the DataSet the result of {@link TextFormatter#format(Object)} is written.
*
* @param filePath The path pointing to the location the text file is written to.
......@@ -1380,13 +1420,14 @@ public abstract class DataSet<T> {
* @return The DataSink that writes the DataSet.
*
* @see TextOutputFormat
* @see DataSet#writeAsText(String) Output files and directories
*/
public DataSink<String> writeAsFormattedText(String filePath, TextFormatter<T> formatter) {
return map(new FormattingMapper<T>(clean(formatter))).writeAsText(filePath);
}
/**
* Writes a DataSet as a text file to the specified location.<br>
* Writes a DataSet as text file(s) to the specified location.<br>
* For each element of the DataSet the result of {@link TextFormatter#format(Object)} is written.
*
* @param filePath The path pointing to the location the text file is written to.
......@@ -1395,13 +1436,14 @@ public abstract class DataSet<T> {
* @return The DataSink that writes the DataSet.
*
* @see TextOutputFormat
* @see DataSet#writeAsText(String) Output files and directories
*/
public DataSink<String> writeAsFormattedText(String filePath, WriteMode writeMode, TextFormatter<T> formatter) {
return map(new FormattingMapper<T>(clean(formatter))).writeAsText(filePath, writeMode);
}
/**
* Writes a {@link Tuple} DataSet as a CSV file to the specified location.<br>
* Writes a {@link Tuple} DataSet as CSV file(s) to the specified location.<br>
* <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br>
* For each Tuple field the result of {@link Object#toString()} is written.
* Tuple fields are separated by the default field delimiter {@code "comma" (,)}.<br>
......@@ -1412,13 +1454,14 @@ public abstract class DataSet<T> {
*
* @see Tuple
* @see CsvOutputFormat
* @see DataSet#writeAsText(String) Output files and directories
*/
public DataSink<T> writeAsCsv(String filePath) {
return writeAsCsv(filePath, CsvOutputFormat.DEFAULT_LINE_DELIMITER, CsvOutputFormat.DEFAULT_FIELD_DELIMITER);
}
/**
* Writes a {@link Tuple} DataSet as a CSV file to the specified location.<br>
* Writes a {@link Tuple} DataSet as CSV file(s) to the specified location.<br>
* <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br>
* For each Tuple field the result of {@link Object#toString()} is written.
* Tuple fields are separated by the default field delimiter {@code "comma" (,)}.<br>
......@@ -1430,13 +1473,14 @@ public abstract class DataSet<T> {
*
* @see Tuple
* @see CsvOutputFormat
* @see DataSet#writeAsText(String) Output files and directories
*/
public DataSink<T> writeAsCsv(String filePath, WriteMode writeMode) {
return internalWriteAsCsv(new Path(filePath),CsvOutputFormat.DEFAULT_LINE_DELIMITER, CsvOutputFormat.DEFAULT_FIELD_DELIMITER, writeMode);
}
/**
* Writes a {@link Tuple} DataSet as a CSV file to the specified location with the specified field and line delimiters.<br>
* Writes a {@link Tuple} DataSet as CSV file(s) to the specified location with the specified field and line delimiters.<br>
* <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br>
* For each Tuple field the result of {@link Object#toString()} is written.
*
......@@ -1446,13 +1490,14 @@ public abstract class DataSet<T> {
*
* @see Tuple
* @see CsvOutputFormat
* @see DataSet#writeAsText(String) Output files and directories
*/
public DataSink<T> writeAsCsv(String filePath, String rowDelimiter, String fieldDelimiter) {
return internalWriteAsCsv(new Path(filePath), rowDelimiter, fieldDelimiter, null);
}
/**
* Writes a {@link Tuple} DataSet as a CSV file to the specified location with the specified field and line delimiters.<br>
* Writes a {@link Tuple} DataSet as CSV file(s) to the specified location with the specified field and line delimiters.<br>
* <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br>
§ * For each Tuple field the result of {@link Object#toString()} is written.
*
......@@ -1463,6 +1508,7 @@ public abstract class DataSet<T> {
*
* @see Tuple
* @see CsvOutputFormat
* @see DataSet#writeAsText(String) Output files and directories
*/
public DataSink<T> writeAsCsv(String filePath, String rowDelimiter, String fieldDelimiter, WriteMode writeMode) {
return internalWriteAsCsv(new Path(filePath), rowDelimiter, fieldDelimiter, writeMode);
......
......@@ -1461,6 +1461,7 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) {
/**
* Writes `this` DataSet to the specified location. This uses [[AnyRef.toString]] on
* each element.
* @see org.apache.flink.api.java.DataSet#writeAsText(String)
*/
def writeAsText(
filePath: String,
......@@ -1473,9 +1474,10 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) {
}
/**
* Writes `this` DataSet to the specified location as a CSV file.
* Writes `this` DataSet to the specified location as CSV file(s).
*
* This only works on Tuple DataSets. For individual tuple fields [[AnyRef.toString]] is used.
* @see org.apache.flink.api.java.DataSet#writeAsText(String)
*/
def writeAsCsv(
filePath: String,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册