提交 862b20b3 编写于 作者: B Bruce Momjian

Complete TODO item:

        o -Allow dump/load of CSV format

This adds new keywords to COPY and \copy:

        CSV - enable CSV mode (comma separated variable)
        QUOTE - specify quote character
        ESCAPE - specify escape character
        FORCE - force quoting of specified column
	LITERAL - suppress null comparison for columns

Doc changes included.  Regression updates coming from Andrew.
上级 83ab1c04
<!--
$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.55 2003/12/13 23:59:07 neilc Exp $
$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.56 2004/04/19 17:22:30 momjian Exp $
PostgreSQL documentation
-->
......@@ -26,7 +26,10 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
[ BINARY ]
[ OIDS ]
[ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
[ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] ]
[ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
[ CSV [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
[ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
[ LITERAL <replaceable class="parameter">column</replaceable> [, ...] ]
COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable class="parameter">column</replaceable> [, ...] ) ]
TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
......@@ -34,7 +37,10 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
[ BINARY ]
[ OIDS ]
[ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
[ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] ]
[ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
[ CSV [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
[ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
[ FORCE <replaceable class="parameter">column</replaceable> [, ...] ]
</synopsis>
</refsynopsisdiv>
......@@ -146,7 +152,8 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
<listitem>
<para>
The single character that separates columns within each row
(line) of the file. The default is a tab character.
(line) of the file. The default is a tab character in text mode,
a comma in <literal>CSV</> mode.
</para>
</listitem>
</varlistentry>
......@@ -156,20 +163,86 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
<listitem>
<para>
The string that represents a null value. The default is
<literal>\N</literal> (backslash-N). You might prefer an empty
string, for example.
<literal>\N</literal> (backslash-N) in text mode, and a empty
value with no quotes in <literal>CSV</> mode. You might prefer an
empty string even in text mode for cases where you don't want to
distinguish nulls from empty strings.
</para>
<note>
<para>
On a <command>COPY FROM</command>, any data item that matches
When using <command>COPY FROM</command>, any data item that matches
this string will be stored as a null value, so you should make
sure that you use the same string as you used with
<command>COPY TO</command>.
</para>
</note>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>CSV</literal></term>
<listitem>
<para>
Enables Comma Separated Variable (<literal>CSV</>) mode. (Also called
Comma Separated Value). It sets the default <literal>DELIMITER</> to
comma, and <literal>QUOTE</> and <literal>ESCAPE</> values to
double-quote.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="parameter">quote</replaceable></term>
<listitem>
<para>
Specifies the quotation character in <literal>CSV</> mode.
The default is double-quote.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="parameter">escape</replaceable></term>
<listitem>
<para>
Specifies the character that should appear before a <literal>QUOTE</>
data character value in <literal>CSV</> mode. The default is the
<literal>QUOTE</> value (usually double-quote).
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>FORCE</></term>
<listitem>
<para>
In <literal>CSV</> <command>COPY TO</> mode, forces quoting
to be used for all non-<literal>NULL</> values in each specified
column. <literal>NULL</> output is never quoted.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>LITERAL</></term>
<listitem>
<para>
In <literal>CSV</> <command>COPY FROM</> mode, for each column specified,
do not do a <literal>null string</> comparison; instead load the value
literally. <literal>QUOTE</> and <literal>ESCAPE</> processing are still
performed.
</para>
<para>
If the <literal>null string</> is <literal>''</> (the default
in <literal>CSV</> mode), a missing input value (<literal>delimiter,
delimiter</>), will load as a zero-length string. <literal>Delimiter, quote,
quote, delimiter</> is always treated as a zero-length string on input.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
......@@ -233,6 +306,17 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
constraints on the destination table. However, it will not invoke rules.
</para>
<para>
<command>COPY</command> input and output is affected by
<varname>DateStyle </varname>. For portability with other
<productname>PostgreSQL</productname> installations which might use
non-default <varname>DateStyle</varname> settings,
<varname>DateStyle</varname> should be set to <literal>ISO</> before
using <command>COPY</>. In <literal>CSV</> mode, use <literal>ISO</>
or a <varname>DateStyle</varname> setting appropriate for the
external application.
</para>
<para>
<command>COPY</command> stops operation at the first error. This
should not lead to problems in the event of a <command>COPY
......@@ -253,7 +337,8 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
<para>
When <command>COPY</command> is used without the <literal>BINARY</literal> option,
the data read or written is a text file with one line per table row.
the data read or written is a text file with one line per table row,
unless <literal>CSV</> mode is used.
Columns in a row are separated by the delimiter character.
The column values themselves are strings generated by the
output function, or acceptable to the input function, of each
......@@ -379,6 +464,63 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
</para>
</refsect2>
<refsect2>
<title>CSV Format</title>
<para>
This format is used for importing and exporting the Comma
Separated Variable (<literal>CSV</>) file format used by many other
programs, such as spreadsheets. Instead of the escaping used by
<productname>PostgreSQL</productname>'s standard text mode, it
produces and recognises the common CSV escaping mechanism.
</para>
<para>
The values in each record are separated by the <literal>DELIMITER</>
character. If the value contains the delimiter character, the
<literal>QUOTE</> character, the <literal>NULL</> string, a carriage
return, or line feed character, then the whole value is prefixed and
suffixed by the <literal>QUOTE</> character, and any occurrence
within the value of a <literal>QUOTE</> character or the
<literal>ESCAPE</> character is preceded by the escape character.
You can also use <literal>FORCE</> to force quotes when outputting
non-<literal>NULL</> values in specific columns.
</para>
<para>
In general, the <literal>CSV</> format has no way to distinguish a
<literal>NULL</> from an empty string.
<productname>PostgreSQL</productname>'s COPY handles this by
quoting. A <literal>NULL</> is output as the <literal>NULL</> string
and is not quoted, while a data value matching the <literal>NULL</> string
is quoted. Therefore, using the default settings, a <literal>NULL</> is
written as an unquoted empty string, while an empty string is
written with double quotes (<literal>""</>). Reading values follows
similar rules. You can use <literal>LITERAL</> to prevent <literal>NULL</>
input comparisons for specific columns.
</para>
<note>
<para>
CSV mode will both recognize and produce CSV files with quoted
values containing embedded carriage returns and line feeds. Thus
the files are not strictly one line per table row like text-mode
files.
</para>
</note>
<note>
<para>
Many programs produce strange and occasionally perverse CSV files,
so the file format is more a convention than a standard. Thus you
might encounter some files that cannot be imported using this
mechanism, and <command>COPY</> might produce files that other
programs can not process.
</para>
</note>
</refsect2>
<refsect2>
<title>Binary Format</title>
......
<!--
$PostgreSQL: pgsql/doc/src/sgml/ref/psql-ref.sgml,v 1.110 2004/04/12 15:58:52 momjian Exp $
$PostgreSQL: pgsql/doc/src/sgml/ref/psql-ref.sgml,v 1.111 2004/04/19 17:22:30 momjian Exp $
PostgreSQL documentation
-->
......@@ -711,6 +711,10 @@ testdb=>
[ <literal>oids</literal> ]
[ <literal>delimiter [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
[ <literal>null [as] </literal> '<replaceable class="parameter">string</replaceable>' ]</literal>
[ <literal>csv [ quote [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
[ <literal>escape [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
[ <literal>force</> <replaceable class="parameter">column_list</replaceable> ]
[ <literal>literal</> <replaceable class="parameter">column_list</replaceable> ] ]
</term>
<listitem>
......
此差异已折叠。
......@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.450 2004/04/05 03:07:26 momjian Exp $
* $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.451 2004/04/19 17:22:30 momjian Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
......@@ -343,7 +343,7 @@ static void doNegateFloat(Value *v);
CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT
COMMITTED CONSTRAINT CONSTRAINTS CONVERSION_P CONVERT COPY CREATE CREATEDB
CREATEUSER CROSS CURRENT_DATE CURRENT_TIME
CREATEUSER CROSS CSV CURRENT_DATE CURRENT_TIME
CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS
......@@ -370,7 +370,7 @@ static void doNegateFloat(Value *v);
KEY
LANCOMPILER LANGUAGE LARGE_P LAST_P LEADING LEFT LEVEL LIKE LIMIT
LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION
LISTEN LITERAL LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION
LOCK_P
MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE
......@@ -386,6 +386,8 @@ static void doNegateFloat(Value *v);
PRECISION PRESERVE PREPARE PRIMARY
PRIOR PRIVILEGES PROCEDURAL PROCEDURE
QUOTE
READ REAL RECHECK REFERENCES REINDEX RELATIVE_P RENAME REPEATABLE REPLACE
RESET RESTART RESTRICT RETURNS REVOKE RIGHT ROLLBACK ROW ROWS
RULE
......@@ -1360,6 +1362,26 @@ copy_opt_item:
{
$$ = makeDefElem("null", (Node *)makeString($3));
}
| CSV
{
$$ = makeDefElem("csv", (Node *)makeInteger(TRUE));
}
| QUOTE opt_as Sconst
{
$$ = makeDefElem("quote", (Node *)makeString($3));
}
| ESCAPE opt_as Sconst
{
$$ = makeDefElem("escape", (Node *)makeString($3));
}
| FORCE columnList
{
$$ = makeDefElem("force", (Node *)$2);
}
| LITERAL columnList
{
$$ = makeDefElem("literal", (Node *)$2);
}
;
/* The following exist for backward compatibility */
......@@ -7420,6 +7442,7 @@ unreserved_keyword:
| COPY
| CREATEDB
| CREATEUSER
| CSV
| CURSOR
| CYCLE
| DATABASE
......@@ -7473,6 +7496,7 @@ unreserved_keyword:
| LAST_P
| LEVEL
| LISTEN
| LITERAL
| LOAD
| LOCAL
| LOCATION
......@@ -7507,6 +7531,7 @@ unreserved_keyword:
| PRIVILEGES
| PROCEDURAL
| PROCEDURE
| QUOTE
| READ
| RECHECK
| REINDEX
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.147 2004/03/11 01:47:40 ishii Exp $
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.148 2004/04/19 17:22:31 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -90,6 +90,7 @@ static const ScanKeyword ScanKeywords[] = {
{"createdb", CREATEDB},
{"createuser", CREATEUSER},
{"cross", CROSS},
{"csv", CSV},
{"current_date", CURRENT_DATE},
{"current_time", CURRENT_TIME},
{"current_timestamp", CURRENT_TIMESTAMP},
......@@ -186,6 +187,7 @@ static const ScanKeyword ScanKeywords[] = {
{"like", LIKE},
{"limit", LIMIT},
{"listen", LISTEN},
{"literal", LITERAL},
{"load", LOAD},
{"local", LOCAL},
{"localtime", LOCALTIME},
......@@ -248,6 +250,7 @@ static const ScanKeyword ScanKeywords[] = {
{"privileges", PRIVILEGES},
{"procedural", PROCEDURAL},
{"procedure", PROCEDURE},
{"quote", QUOTE},
{"read", READ},
{"real", REAL},
{"recheck", RECHECK},
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tcop/fastpath.c,v 1.71 2004/01/07 18:56:27 neilc Exp $
* $PostgreSQL: pgsql/src/backend/tcop/fastpath.c,v 1.72 2004/04/19 17:22:31 momjian Exp $
*
* NOTES
* This cruft is the server side of PQfn.
......@@ -154,8 +154,7 @@ SendFunctionResult(Datum retval, bool isnull, Oid rettype, int16 format)
bool typisvarlena;
char *outputstr;
getTypeOutputInfo(rettype,
&typoutput, &typelem, &typisvarlena);
getTypeOutputInfo(rettype, &typoutput, &typelem, &typisvarlena);
outputstr = DatumGetCString(OidFunctionCall3(typoutput,
retval,
ObjectIdGetDatum(typelem),
......
......@@ -3,7 +3,7 @@
*
* Copyright (c) 2000-2003, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/bin/psql/copy.c,v 1.43 2004/04/12 15:58:52 momjian Exp $
* $PostgreSQL: pgsql/src/bin/psql/copy.c,v 1.44 2004/04/19 17:22:31 momjian Exp $
*/
#include "postgres_fe.h"
#include "copy.h"
......@@ -66,8 +66,13 @@ struct copy_options
bool from;
bool binary;
bool oids;
bool csv_mode;
char *delim;
char *null;
char *quote;
char *escape;
char *force_list;
char *literal_list;
};
......@@ -81,6 +86,10 @@ free_copy_options(struct copy_options * ptr)
free(ptr->file);
free(ptr->delim);
free(ptr->null);
free(ptr->quote);
free(ptr->escape);
free(ptr->force_list);
free(ptr->literal_list);
free(ptr);
}
......@@ -272,11 +281,19 @@ parse_slash_copy(const char *args)
while (token)
{
bool fetch_next;
fetch_next = true;
/* someday allow BINARY here */
if (strcasecmp(token, "oids") == 0)
{
result->oids = true;
}
else if (strcasecmp(token, "csv") == 0)
{
result->csv_mode = true;
}
else if (strcasecmp(token, "delimiter") == 0)
{
token = strtokx(NULL, whitespace, NULL, "'",
......@@ -301,11 +318,78 @@ parse_slash_copy(const char *args)
else
goto error;
}
else if (strcasecmp(token, "quote") == 0)
{
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (token && strcasecmp(token, "as") == 0)
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (token)
result->quote = pg_strdup(token);
else
goto error;
}
else if (strcasecmp(token, "escape") == 0)
{
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (token && strcasecmp(token, "as") == 0)
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (token)
result->escape = pg_strdup(token);
else
goto error;
}
else if (strcasecmp(token, "force") == 0)
{
/* handle column list */
fetch_next = false;
for (;;)
{
token = strtokx(NULL, whitespace, ",", "\"",
0, false, pset.encoding);
if (!token || strchr(",", token[0]))
goto error;
if (!result->force_list)
result->force_list = pg_strdup(token);
else
xstrcat(&result->force_list, token);
token = strtokx(NULL, whitespace, ",", "\"",
0, false, pset.encoding);
if (!token || token[0] != ',')
break;
xstrcat(&result->force_list, token);
}
}
else if (strcasecmp(token, "literal") == 0)
{
/* handle column list */
fetch_next = false;
for (;;)
{
token = strtokx(NULL, whitespace, ",", "\"",
0, false, pset.encoding);
if (!token || strchr(",", token[0]))
goto error;
if (!result->literal_list)
result->literal_list = pg_strdup(token);
else
xstrcat(&result->literal_list, token);
token = strtokx(NULL, whitespace, ",", "\"",
0, false, pset.encoding);
if (!token || token[0] != ',')
break;
xstrcat(&result->literal_list, token);
}
}
else
goto error;
token = strtokx(NULL, whitespace, NULL, NULL,
0, false, pset.encoding);
if (fetch_next)
token = strtokx(NULL, whitespace, NULL, NULL,
0, false, pset.encoding);
}
}
......@@ -340,7 +424,7 @@ do_copy(const char *args)
PGresult *result;
bool success;
struct stat st;
/* parse options */
options = parse_slash_copy(args);
......@@ -379,6 +463,7 @@ do_copy(const char *args)
options->delim);
}
/* There is no backward-compatible CSV syntax */
if (options->null)
{
if (options->null[0] == '\'')
......@@ -387,6 +472,37 @@ do_copy(const char *args)
appendPQExpBuffer(&query, " WITH NULL AS '%s'", options->null);
}
if (options->csv_mode)
{
appendPQExpBuffer(&query, " CSV");
}
if (options->quote)
{
if (options->quote[0] == '\'')
appendPQExpBuffer(&query, " QUOTE AS %s", options->quote);
else
appendPQExpBuffer(&query, " QUOTE AS '%s'", options->quote);
}
if (options->escape)
{
if (options->escape[0] == '\'')
appendPQExpBuffer(&query, " ESCAPE AS %s", options->escape);
else
appendPQExpBuffer(&query, " ESCAPE AS '%s'", options->escape);
}
if (options->force_list)
{
appendPQExpBuffer(&query, " FORCE %s", options->force_list);
}
if (options->literal_list)
{
appendPQExpBuffer(&query, " LITERAL %s", options->literal_list);
}
if (options->from)
{
if (options->file)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册