提交 99075f87 编写于 作者: P Peifeng Qiu 提交者: Adam Lee

s3ext: refactor s3url related codes for both reader and writer

add a set of url related utilites, such as getSchemaFromURL(), getRegionFromURL(),
getBucketFromURL(), getPrefixFromURL(), replaceSchemaFromURL(), which are used
in both s3writers and s3readers. merge URL parser and URL utilities into an unique
s3url files.
Signed-off-by: NKuien Liu <kliu@pivotal.io>
上级 d48e624b
......@@ -54,34 +54,4 @@ GPWriter *writer_init(const char *url_with_options);
bool writer_transfer_data(GPWriter *writer, char *data_buf, int &data_len);
bool writer_cleanup(GPWriter **writer);
// Set schema to 'https' or 'http'
inline string replaceSchemaFromURL(const string &url) {
size_t iend = url.find("://");
if (iend == string::npos) {
return url;
}
return "https" + url.substr(iend);
}
inline string getRegionFromURL(const string &url) {
size_t ibegin =
url.find("://s3") + strlen("://s3"); // index of character('.' or '-') after "3"
size_t iend = url.find(".amazonaws.com");
string region;
if (iend == string::npos) {
return region;
} else if (ibegin == iend) { // "s3.amazonaws.com"
return "external-1";
} else {
// ibegin + 1 is the character after "s3." or "s3-"
// for instance: s3-us-west-2.amazonaws.com
region = url.substr(ibegin + 1, iend - (ibegin + 1));
}
if (region.compare("us-east-1") == 0) {
region = "external-1";
}
return region;
}
#endif /* INCLUDE_GPWRITER_H_ */
COMMON_OBJS = gpreader.o gpwriter.o s3conf.o s3common.o s3utils.o s3log.o s3url_parser.o s3http_headers.o s3interface.o s3restful_service.o decompress_reader.o s3key_reader.o s3key_writer.o s3bucket_reader.o s3common_reader.o
COMMON_OBJS = gpreader.o gpwriter.o s3conf.o s3common.o s3utils.o s3log.o s3url.o s3http_headers.o s3interface.o s3restful_service.o decompress_reader.o s3key_reader.o s3key_writer.o s3bucket_reader.o s3common_reader.o
COMMON_LINK_OPTIONS = -lstdc++ -lxml2 -lpthread -lcrypto -lcurl -lz
......
......@@ -73,9 +73,6 @@ class S3BucketReader : public Reader {
ListBucketResult keyList; // List of matched keys/files.
uint64_t keyIndex; // BucketContent index of keylist->contents.
void SetSchema();
void SetRegion();
void SetBucketAndPrefix();
BucketContent *getNextKey();
ReaderParams getReaderParams(BucketContent *key);
};
......
......@@ -9,7 +9,7 @@ using std::string;
#include "s3interface.h"
#include "s3macros.h"
#include "s3restful_service.h"
#include "s3url_parser.h"
#include "s3url.h"
#include <vector>
......
......@@ -5,7 +5,7 @@
#include "s3interface.h"
#include "s3macros.h"
#include "s3restful_service.h"
#include "s3url_parser.h"
#include "s3url.h"
#include "writer.h"
#include <string>
......
#ifndef __S3_URL_PARSER_H__
#define __S3_URL_PARSER_H__
#ifndef __S3_URL_H__
#define __S3_URL_H__
#include "http_parser.h"
......@@ -34,4 +34,17 @@ class UrlParser {
string fullurl;
};
class S3UrlUtility {
public:
// Set schema to 'https' or 'http'
static string replaceSchemaFromURL(const string &url, bool useHttps = true);
static string getDefaultSchema(bool useHttps = true);
static string getRegionFromURL(const string &url);
static string getBucketFromURL(const string &url);
static string getPrefixFromURL(const string &url);
};
#endif
......@@ -12,13 +12,14 @@
#include "s3conf.h"
#include "s3log.h"
#include "s3macros.h"
#include "s3url.h"
#include "s3utils.h"
using std::string;
using std::stringstream;
GPWriter::GPWriter(const string& url) {
string file = replaceSchemaFromURL(url);
string file = S3UrlUtility::replaceSchemaFromURL(url, s3ext_encryption);
constructWriterParams(file);
restfulServicePtr = &restfulService;
}
......@@ -29,7 +30,7 @@ void GPWriter::constructWriterParams(const string& url) {
this->params.setSegNum(s3ext_segnum);
this->params.setNumOfChunks(s3ext_threadnum);
this->params.setChunkSize(s3ext_chunksize);
this->params.setRegion(getRegionFromURL(url));
this->params.setRegion(S3UrlUtility::getRegionFromURL(url));
this->cred.accessID = s3ext_accessid;
this->cred.secret = s3ext_secret;
......
......@@ -11,6 +11,7 @@
#include "s3log.h"
#include "s3macros.h"
#include "s3params.h"
#include "s3url.h"
#include "s3utils.h"
using std::string;
......@@ -111,19 +112,6 @@ void S3BucketReader::close() {
return;
}
// Set schema to 'https' or 'http'
void S3BucketReader::SetSchema() {
size_t iend = this->url.find("://");
if (iend == string::npos) {
return;
}
this->schema = this->url.substr(0, iend);
if (this->schema == "s3") {
this->schema = s3ext_encryption ? "https" : "http";
}
}
string S3BucketReader::getKeyURL(const string &key) {
stringstream sstr;
sstr << this->schema << "://"
......@@ -132,60 +120,11 @@ string S3BucketReader::getKeyURL(const string &key) {
return sstr.str();
}
// Set AWS region, use 'external-1' if it is 'us-east-1' or not present
// http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region
void S3BucketReader::SetRegion() {
size_t ibegin =
this->url.find("://s3") + strlen("://s3"); // index of character('.' or '-') after "3"
size_t iend = this->url.find(".amazonaws.com");
if (iend == string::npos) {
return;
} else if (ibegin == iend) { // "s3.amazonaws.com"
this->region = "external-1";
} else {
// ibegin + 1 is the character after "s3." or "s3-"
// for instance: s3-us-west-2.amazonaws.com
this->region = this->url.substr(ibegin + 1, iend - (ibegin + 1));
}
if (this->region.compare("us-east-1") == 0) {
this->region = "external-1";
}
}
void S3BucketReader::SetBucketAndPrefix() {
size_t ibegin = find_Nth(this->url, 3, "/");
size_t iend = find_Nth(this->url, 4, "/");
if (ibegin == string::npos) {
return;
}
// s3://s3-region.amazonaws.com/bucket
if (iend == string::npos) {
this->bucket = url.substr(ibegin + 1, url.length() - ibegin - 1);
this->prefix = "";
return;
}
this->bucket = url.substr(ibegin + 1, iend - ibegin - 1);
// s3://s3-region.amazonaws.com/bucket/
if (iend == url.length() - 1) {
this->prefix = "";
return;
}
ibegin = find_Nth(url, 4, "/");
// s3://s3-region.amazonaws.com/bucket/prefix
// s3://s3-region.amazonaws.com/bucket/prefix/whatever
this->prefix = url.substr(ibegin + 1, url.length() - ibegin - 1);
}
void S3BucketReader::parseURL() {
this->SetSchema();
this->SetRegion();
this->SetBucketAndPrefix();
this->schema = s3ext_encryption ? "https" : "http";
this->region = S3UrlUtility::getRegionFromURL(this->url);
this->bucket = S3UrlUtility::getBucketFromURL(this->url);
this->prefix = S3UrlUtility::getPrefixFromURL(this->url);
bool ok = !(this->schema.empty() || this->region.empty() || this->bucket.empty());
CHECK_OR_DIE_MSG(ok, "'%s' is not valid", this->url.c_str());
......
......@@ -13,7 +13,7 @@
#include "s3key_reader.h"
#include "s3log.h"
#include "s3macros.h"
#include "s3url_parser.h"
#include "s3url.h"
#include "s3utils.h"
#include "s3interface.h"
......
......@@ -10,7 +10,8 @@
#include "s3log.h"
#include "s3macros.h"
#include "s3url_parser.h"
#include "s3url.h"
#include "s3utils.h"
using std::string;
using std::stringstream;
......@@ -38,4 +39,75 @@ string UrlParser::extractField(const struct http_parser_url *url_parser, http_pa
}
return this->fullurl.substr(url_parser->field_data[i].off, url_parser->field_data[i].len);
}
string S3UrlUtility::replaceSchemaFromURL(const string &url, bool useHttps) {
size_t iend = url.find("://");
if (iend == string::npos) {
return url;
}
return getDefaultSchema(useHttps) + url.substr(iend);
}
string S3UrlUtility::getDefaultSchema(bool useHttps) {
return useHttps ? "https" : "http";
}
// Set AWS region, use 'external-1' if it is 'us-east-1' or not present
// http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region
string S3UrlUtility::getRegionFromURL(const string &url) {
size_t ibegin =
url.find("://s3") + strlen("://s3"); // index of character('.' or '-') after "3"
size_t iend = url.find(".amazonaws.com");
string region;
if (iend == string::npos) {
return region;
} else if (ibegin == iend) { // "s3.amazonaws.com"
return "external-1";
} else {
// ibegin + 1 is the character after "s3." or "s3-"
// for instance: s3-us-west-2.amazonaws.com
region = url.substr(ibegin + 1, iend - (ibegin + 1));
}
if (region.compare("us-east-1") == 0) {
region = "external-1";
}
return region;
}
string S3UrlUtility::getBucketFromURL(const string &url) {
size_t ibegin = find_Nth(url, 3, "/");
size_t iend = find_Nth(url, 4, "/");
if (ibegin == string::npos) {
return string();
}
// s3://s3-region.amazonaws.com/bucket
if (iend == string::npos) {
return url.substr(ibegin + 1, url.length() - ibegin - 1);
}
return url.substr(ibegin + 1, iend - ibegin - 1);
}
string S3UrlUtility::getPrefixFromURL(const string &url) {
size_t ibegin = find_Nth(url, 3, "/");
size_t iend = find_Nth(url, 4, "/");
// s3://s3-region.amazonaws.com/bucket
if (ibegin == string::npos || iend == string::npos) {
return string();
}
// s3://s3-region.amazonaws.com/bucket/
if (iend == url.length() - 1) {
return string();
}
ibegin = find_Nth(url, 4, "/");
// s3://s3-region.amazonaws.com/bucket/prefix
// s3://s3-region.amazonaws.com/bucket/prefix/whatever
return url.substr(ibegin + 1, url.length() - ibegin - 1);
}
\ No newline at end of file
......@@ -74,6 +74,12 @@ TEST_F(S3BucketReaderTest, ParseURL_normal) {
EXPECT_EQ("dataset1/normal", this->bucketReader->getPrefix());
}
//cannot find '://', so return url itself
TEST_F(S3BucketReaderTest, ParseURL_noSchema) {
string url = "abcd";
EXPECT_EQ(url, S3UrlUtility::replaceSchemaFromURL(url));
}
TEST_F(S3BucketReaderTest, ParseURL_NoPrefixAndSlash) {
EXPECT_NO_THROW(
this->bucketReader->parseURL("s3://s3-us-west-2.amazonaws.com/s3test.pivotal.io"));
......
#include "s3url_parser.cpp"
#include "s3url.cpp"
#include "gtest/gtest.h"
#include "s3macros.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册