HadoopUtils.java 20.1 KB
Newer Older
L
ligang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
Q
qiaozhanwei 已提交
17
package org.apache.dolphinscheduler.common.utils;
L
ligang 已提交
18

Q
qiaozhanwei 已提交
19 20 21
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.enums.ExecutionStatus;
import org.apache.dolphinscheduler.common.enums.ResUploadType;
L
ligang 已提交
22 23 24 25 26 27
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
L
lgcareer 已提交
28
import org.apache.hadoop.fs.*;
L
ligang 已提交
29
import org.apache.hadoop.fs.FileSystem;
journey2018's avatar
journey2018 已提交
30
import org.apache.hadoop.security.UserGroupInformation;
L
ligang 已提交
31 32 33 34 35
import org.apache.hadoop.yarn.client.cli.RMAdminCLI;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
L
add  
lgcareer 已提交
36
import java.security.PrivilegedExceptionAction;
L
ligang 已提交
37
import java.util.List;
38
import java.util.Map;
L
ligang 已提交
39 40 41 42 43 44 45 46 47 48 49
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * hadoop utils
 * single instance
 */
public class HadoopUtils implements Closeable {

    private static final Logger logger = LoggerFactory.getLogger(HadoopUtils.class);

L
lgcareer 已提交
50
    private static String hdfsUser = PropertyUtils.getString(Constants.HDFS_ROOT_USER);
L
ligang 已提交
51 52 53 54
    private static volatile HadoopUtils instance = new HadoopUtils();
    private static volatile Configuration configuration;
    private static FileSystem fs;

L
lgcareer 已提交
55

L
ligang 已提交
56
    private HadoopUtils(){
L
lgcareer 已提交
57 58 59
        if(StringUtils.isEmpty(hdfsUser)){
            hdfsUser = PropertyUtils.getString(Constants.HDFS_ROOT_USER);
        }
L
ligang 已提交
60
        init();
L
lgcareer 已提交
61
        initHdfsPath();
L
ligang 已提交
62 63 64
    }

    public static HadoopUtils getInstance(){
65 66 67 68
        // if kerberos startup , renew HadoopUtils
        if (CommonUtils.getKerberosStartupState()){
            return new HadoopUtils();
        }
L
ligang 已提交
69 70 71
        return instance;
    }

L
add  
lgcareer 已提交
72 73 74 75
    /**
     * init escheduler root path in hdfs
     */
    private void initHdfsPath(){
Q
qiaozhanwei 已提交
76
        String hdfsPath = PropertyUtils.getString(Constants.DATA_STORE_2_HDFS_BASEPATH);
L
add  
lgcareer 已提交
77
        Path path = new Path(hdfsPath);
L
lgcareer 已提交
78

L
add  
lgcareer 已提交
79
        try {
L
lgcareer 已提交
80 81 82
            if (!fs.exists(path)) {
                fs.mkdirs(path);
            }
L
add  
lgcareer 已提交
83 84 85 86 87
        } catch (Exception e) {
            logger.error(e.getMessage(),e);
        }
    }

L
lgcareer 已提交
88

L
ligang 已提交
89 90 91 92 93 94 95 96 97
    /**
     * init hadoop configuration
     */
    private void init() {
        if (configuration == null) {
            synchronized (HadoopUtils.class) {
                if (configuration == null) {
                    try {
                        configuration = new Configuration();
journey2018's avatar
journey2018 已提交
98

journey2018's avatar
journey2018 已提交
99 100 101 102
                        String resUploadStartupType = PropertyUtils.getString(Constants.RES_UPLOAD_STARTUP_TYPE);
                        ResUploadType resUploadType = ResUploadType.valueOf(resUploadStartupType);

                        if (resUploadType == ResUploadType.HDFS){
Q
qiaozhanwei 已提交
103
                            if (PropertyUtils.getBoolean(Constants.HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE)){
journey2018's avatar
journey2018 已提交
104
                                System.setProperty(Constants.JAVA_SECURITY_KRB5_CONF,
Q
qiaozhanwei 已提交
105
                                        PropertyUtils.getString(Constants.JAVA_SECURITY_KRB5_CONF_PATH));
journey2018's avatar
journey2018 已提交
106 107
                                configuration.set(Constants.HADOOP_SECURITY_AUTHENTICATION,"kerberos");
                                UserGroupInformation.setConfiguration(configuration);
Q
qiaozhanwei 已提交
108 109
                                UserGroupInformation.loginUserFromKeytab(PropertyUtils.getString(Constants.LOGIN_USER_KEY_TAB_USERNAME),
                                        PropertyUtils.getString(Constants.LOGIN_USER_KEY_TAB_PATH));
journey2018's avatar
journey2018 已提交
110
                            }
journey2018's avatar
journey2018 已提交
111

Q
qiaozhanwei 已提交
112
                            String defaultFS = configuration.get(Constants.FS_DEFAULTFS);
journey2018's avatar
journey2018 已提交
113 114 115
                            //first get key from core-site.xml hdfs-site.xml ,if null ,then try to get from properties file
                            // the default is the local file system
                            if(defaultFS.startsWith("file")){
Q
qiaozhanwei 已提交
116
                                String defaultFSProp = PropertyUtils.getString(Constants.FS_DEFAULTFS);
journey2018's avatar
journey2018 已提交
117
                                if(StringUtils.isNotBlank(defaultFSProp)){
Q
qiaozhanwei 已提交
118 119
                                    Map<String, String> fsRelatedProps = PropertyUtils.getPrefixedProperties("fs.");
                                    configuration.set(Constants.FS_DEFAULTFS,defaultFSProp);
journey2018's avatar
journey2018 已提交
120 121 122 123 124
                                    fsRelatedProps.entrySet().stream().forEach(entry -> configuration.set(entry.getKey(), entry.getValue()));
                                }else{
                                    logger.error("property:{} can not to be empty, please set!");
                                    throw new RuntimeException("property:{} can not to be empty, please set!");
                                }
L
ligang 已提交
125
                            }else{
Q
qiaozhanwei 已提交
126
                                logger.info("get property:{} -> {}, from core-site.xml hdfs-site.xml ", Constants.FS_DEFAULTFS, defaultFS);
L
ligang 已提交
127 128
                            }

journey2018's avatar
journey2018 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
                            if (fs == null) {
                                if(StringUtils.isNotEmpty(hdfsUser)){
                                    //UserGroupInformation ugi = UserGroupInformation.createProxyUser(hdfsUser,UserGroupInformation.getLoginUser());
                                    UserGroupInformation ugi = UserGroupInformation.createRemoteUser(hdfsUser);
                                    ugi.doAs(new PrivilegedExceptionAction<Boolean>() {
                                        @Override
                                        public Boolean run() throws Exception {
                                            fs = FileSystem.get(configuration);
                                            return true;
                                        }
                                    });
                                }else{
                                    logger.warn("hdfs.root.user is not set value!");
                                    fs = FileSystem.get(configuration);
                                }
L
lgcareer 已提交
144
                            }
journey2018's avatar
journey2018 已提交
145
                        }else if (resUploadType == ResUploadType.S3){
Q
qiaozhanwei 已提交
146 147 148 149
                            configuration.set(Constants.FS_DEFAULTFS, PropertyUtils.getString(Constants.FS_DEFAULTFS));
                            configuration.set(Constants.FS_S3A_ENDPOINT, PropertyUtils.getString(Constants.FS_S3A_ENDPOINT));
                            configuration.set(Constants.FS_S3A_ACCESS_KEY, PropertyUtils.getString(Constants.FS_S3A_ACCESS_KEY));
                            configuration.set(Constants.FS_S3A_SECRET_KEY, PropertyUtils.getString(Constants.FS_S3A_SECRET_KEY));
journey2018's avatar
journey2018 已提交
150
                            fs = FileSystem.get(configuration);
L
ligang 已提交
151
                        }
journey2018's avatar
journey2018 已提交
152 153


Q
qiaozhanwei 已提交
154 155
                        String rmHaIds = PropertyUtils.getString(Constants.YARN_RESOURCEMANAGER_HA_RM_IDS);
                        String appAddress = PropertyUtils.getString(Constants.YARN_APPLICATION_STATUS_ADDRESS);
L
ligang 已提交
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
                        if (!StringUtils.isEmpty(rmHaIds)) {
                            appAddress = getAppAddress(appAddress, rmHaIds);
                            logger.info("appAddress : {}", appAddress);
                        }
                        configuration.set(Constants.YARN_APPLICATION_STATUS_ADDRESS, appAddress);
                    } catch (Exception e) {
                        logger.error(e.getMessage(), e);
                    }

                }
            }
        }
    }

    /**
     * @return Configuration
     */
    public Configuration getConfiguration() {
        return configuration;
    }

    /**
     * get application url
     *
     * @param applicationId
     * @return
     */
    public String getApplicationUrl(String applicationId) {
Q
qiaozhanwei 已提交
184
        return String.format(configuration.get(Constants.YARN_APPLICATION_STATUS_ADDRESS), applicationId);
L
ligang 已提交
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
    }

    /**
     * cat file on hdfs
     *
     * @param hdfsFilePath  hdfs file path
     * @return byte[]
     */
    public byte[] catFile(String hdfsFilePath) throws IOException {

        if(StringUtils.isBlank(hdfsFilePath)){
            logger.error("hdfs file path:{} is blank",hdfsFilePath);
            return null;
        }

        FSDataInputStream fsDataInputStream = fs.open(new Path(hdfsFilePath));
        return IOUtils.toByteArray(fsDataInputStream);
    }



    /**
     * cat file on hdfs
     *
     * @param hdfsFilePath  hdfs file path
     * @param skipLineNums  skip line numbers
     * @param limit         read how many lines
     * @return
     */
    public List<String> catFile(String hdfsFilePath, int skipLineNums, int limit) throws IOException {

journey2018's avatar
journey2018 已提交
216
        if (StringUtils.isBlank(hdfsFilePath)){
L
ligang 已提交
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
            logger.error("hdfs file path:{} is blank",hdfsFilePath);
            return null;
        }

        FSDataInputStream in = fs.open(new Path(hdfsFilePath));
        BufferedReader br = new BufferedReader(new InputStreamReader(in));
        Stream<String> stream = br.lines().skip(skipLineNums).limit(limit);
        return stream.collect(Collectors.toList());
    }

    /**
     * make the given file and all non-existent parents into
     * directories. Has the semantics of Unix 'mkdir -p'.
     * Existence of the directory hierarchy is not an error.
     *
     * @param hdfsPath path to create
     */
    public boolean mkdir(String hdfsPath) throws IOException {
        return fs.mkdirs(new Path(hdfsPath));
    }

    /**
     * copy files between FileSystems
     *
     * @param srcPath      source hdfs path
     * @param dstPath      destination hdfs path
     * @param deleteSource whether to delete the src
     * @param overwrite    whether to overwrite an existing file
     * @return 是否成功
     */
    public boolean copy(String srcPath, String dstPath, boolean deleteSource, boolean overwrite) throws IOException {
        return FileUtil.copy(fs, new Path(srcPath), fs, new Path(dstPath), deleteSource, overwrite, fs.getConf());
    }

    /**
     * the src file is on the local disk.  Add it to FS at
     * the given dst name.

     * @param srcFile       local file
     * @param dstHdfsPath   destination hdfs path
     * @param deleteSource  whether to delete the src
     * @param overwrite     whether to overwrite an existing file
     */
    public boolean copyLocalToHdfs(String srcFile, String dstHdfsPath, boolean deleteSource, boolean overwrite) throws IOException {
        Path srcPath = new Path(srcFile);
        Path dstPath= new Path(dstHdfsPath);

        fs.copyFromLocalFile(deleteSource, overwrite, srcPath, dstPath);

        return true;
    }

    /**
     * copy hdfs file to local
     *
     * @param srcHdfsFilePath   source hdfs file path
     * @param dstFile           destination file
     * @param deleteSource      delete source
     * @param overwrite         overwrite
     * @return
     * @throws IOException
     */
    public boolean copyHdfsToLocal(String srcHdfsFilePath, String dstFile, boolean deleteSource, boolean overwrite) throws IOException {
        Path srcPath = new Path(srcHdfsFilePath);
        File dstPath = new File(dstFile);

        if (dstPath.exists()) {
            if (dstPath.isFile()) {
                if (overwrite) {
                    dstPath.delete();
                }
            } else {
                logger.error("destination file must be a file");
            }
        }

        if(!dstPath.getParentFile().exists()){
            dstPath.getParentFile().mkdirs();
        }

        return FileUtil.copy(fs, srcPath, dstPath, deleteSource, fs.getConf());
    }

    /**
     *
     * delete a file
     *
     * @param hdfsFilePath the path to delete.
     * @param recursive if path is a directory and set to
     * true, the directory is deleted else throws an exception. In
     * case of a file the recursive can be set to either true or false.
     * @return  true if delete is successful else false.
     * @throws IOException
     */
    public boolean delete(String hdfsFilePath, boolean recursive) throws IOException {
        return fs.delete(new Path(hdfsFilePath), recursive);
    }

    /**
     * check if exists
     *
     * @param hdfsFilePath source file path
     * @return
     */
    public boolean exists(String hdfsFilePath) throws IOException {
        return fs.exists(new Path(hdfsFilePath));
    }

L
ligang 已提交
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
    /**
     * Gets a list of files in the directory
     *
     * @param filePath
     * @return {@link FileStatus}
     */
    public FileStatus[] listFileStatus(String filePath)throws Exception{
        Path path = new Path(filePath);
        try {
            return fs.listStatus(new Path(filePath));
        } catch (IOException e) {
            logger.error("Get file list exception", e);
            throw new Exception("Get file list exception", e);
        }
    }

L
ligang 已提交
341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
    /**
     * Renames Path src to Path dst.  Can take place on local fs
     * or remote DFS.
     * @param src path to be renamed
     * @param dst new path after rename
     * @throws IOException on failure
     * @return true if rename is successful
     */
    public boolean rename(String src, String dst) throws IOException {
        return fs.rename(new Path(src), new Path(dst));
    }


    /**
     * get the state of an application
     *
     * @param applicationId
     * @return the return may be null or there may be other parse exceptions
     * @throws JSONException
     * @throws IOException
     */
    public ExecutionStatus getApplicationStatus(String applicationId) throws JSONException {
        if (StringUtils.isEmpty(applicationId)) {
            return null;
        }

        String applicationUrl = getApplicationUrl(applicationId);

        String responseContent = HttpUtils.get(applicationUrl);

        JSONObject jsonObject = JSONObject.parseObject(responseContent);
        String result = jsonObject.getJSONObject("app").getString("finalStatus");

        switch (result) {
Q
qiaozhanwei 已提交
375
            case Constants.ACCEPTED:
L
ligang 已提交
376
                return ExecutionStatus.SUBMITTED_SUCCESS;
Q
qiaozhanwei 已提交
377
            case Constants.SUCCEEDED:
L
ligang 已提交
378
                return ExecutionStatus.SUCCESS;
Q
qiaozhanwei 已提交
379 380 381 382
            case Constants.NEW:
            case Constants.NEW_SAVING:
            case Constants.SUBMITTED:
            case Constants.FAILED:
L
ligang 已提交
383
                return ExecutionStatus.FAILURE;
Q
qiaozhanwei 已提交
384
            case Constants.KILLED:
L
ligang 已提交
385 386
                return ExecutionStatus.KILL;

Q
qiaozhanwei 已提交
387
            case Constants.RUNNING:
L
ligang 已提交
388 389 390 391 392 393 394 395 396 397
            default:
                return ExecutionStatus.RUNNING_EXEUTION;
        }
    }

    /**
     *
     * @return data hdfs path
     */
    public static String getHdfsDataBasePath() {
Q
qiaozhanwei 已提交
398
        String basePath = PropertyUtils.getString(Constants.DATA_STORE_2_HDFS_BASEPATH);
399 400 401 402 403 404
        if ("/".equals(basePath)) {
            // if basepath is configured to /,  the generated url may be  //default/resources (with extra leading /)
            return "";
        } else {
            return basePath;
        }
L
ligang 已提交
405 406 407 408 409 410 411 412
    }

    /**
     * hdfs resource dir
     *
     * @param tenantCode tenant code
     * @return hdfs resource dir
     */
413
    public static String getHdfsResDir(String tenantCode) {
L
ligang 已提交
414 415 416 417
        return String.format("%s/resources", getHdfsTenantDir(tenantCode));
    }

    /**
418 419 420 421 422 423 424 425 426 427 428
     * hdfs user dir
     *
     * @param tenantCode tenant code
     * @return hdfs resource dir
     */
    public static String getHdfsUserDir(String tenantCode,int userId) {
        return String.format("%s/home/%d", getHdfsTenantDir(tenantCode),userId);
    }

    /**
     * hdfs udf dir
L
ligang 已提交
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
     *
     * @param tenantCode tenant code
     * @return get udf dir on hdfs
     */
    public static String getHdfsUdfDir(String tenantCode) {
        return String.format("%s/udfs", getHdfsTenantDir(tenantCode));
    }

    /**
     * get absolute path and name for file on hdfs
     *
     * @param tenantCode tenant code
     * @param filename   file name
     * @return get absolute path and name for file on hdfs
     */
    public static String getHdfsFilename(String tenantCode, String filename) {
445
        return String.format("%s/%s", getHdfsResDir(tenantCode), filename);
L
ligang 已提交
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
    }

    /**
     * get absolute path and name for udf file on hdfs
     *
     * @param tenantCode tenant code
     * @param filename   file name
     * @return get absolute path and name for udf file on hdfs
     */
    public static String getHdfsUdfFilename(String tenantCode, String filename) {
        return String.format("%s/%s", getHdfsUdfDir(tenantCode), filename);
    }

    /**
     * @return file directory of tenants on hdfs
     */
462
    public static String getHdfsTenantDir(String tenantCode) {
463
        return String.format("%s/%s", getHdfsDataBasePath(), tenantCode);
L
ligang 已提交
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
    }


    /**
     * getAppAddress
     *
     * @param appAddress
     * @param rmHa
     * @return
     */
    public static String getAppAddress(String appAddress, String rmHa) {

        //get active ResourceManager
        String activeRM = YarnHAAdminUtils.getAcitveRMName(rmHa);

Q
qiaozhanwei 已提交
479
        String[] split1 = appAddress.split(Constants.DOUBLE_SLASH);
L
ligang 已提交
480 481 482 483 484

        if (split1.length != 2) {
            return null;
        }

Q
qiaozhanwei 已提交
485 486
        String start = split1[0] + Constants.DOUBLE_SLASH;
        String[] split2 = split1[1].split(Constants.COLON);
L
ligang 已提交
487 488 489 490 491

        if (split2.length != 2) {
            return null;
        }

Q
qiaozhanwei 已提交
492
        String end = Constants.COLON + split2[1];
L
ligang 已提交
493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525

        return start + activeRM + end;
    }


    @Override
    public void close() throws IOException {
        if (fs != null) {
            try {
                fs.close();
            } catch (IOException e) {
                logger.error("Close HadoopUtils instance failed", e);
                throw new IOException("Close HadoopUtils instance failed", e);
            }
        }
    }


    /**
     * yarn ha admin utils
     */
    private static final class YarnHAAdminUtils extends RMAdminCLI {

        private static final Logger logger = LoggerFactory.getLogger(YarnHAAdminUtils.class);

        /**
         * get active resourcemanager
         *
         * @param rmIds
         * @return
         */
        public static String getAcitveRMName(String rmIds) {

Q
qiaozhanwei 已提交
526
            String[] rmIdArr = rmIds.split(Constants.COMMA);
L
ligang 已提交
527

Q
qiaozhanwei 已提交
528
            int activeResourceManagerPort = PropertyUtils.getInt(Constants.HADOOP_RESOURCE_MANAGER_HTTPADDRESS_PORT, 8088);
L
ligang 已提交
529 530 531 532 533 534 535 536 537 538

            String yarnUrl = "http://%s:" + activeResourceManagerPort + "/ws/v1/cluster/info";

            String state = null;
            try {
                /**
                 * send http get request to rm1
                 */
                state = getRMState(String.format(yarnUrl, rmIdArr[0]));

Q
qiaozhanwei 已提交
539
                if (Constants.HADOOP_RM_STATE_ACTIVE.equals(state)) {
L
ligang 已提交
540
                    return rmIdArr[0];
Q
qiaozhanwei 已提交
541
                } else if (Constants.HADOOP_RM_STATE_STANDBY.equals(state)) {
L
ligang 已提交
542
                    state = getRMState(String.format(yarnUrl, rmIdArr[1]));
Q
qiaozhanwei 已提交
543
                    if (Constants.HADOOP_RM_STATE_ACTIVE.equals(state)) {
L
ligang 已提交
544 545 546 547 548 549 550
                        return rmIdArr[1];
                    }
                } else {
                    return null;
                }
            } catch (Exception e) {
                state = getRMState(String.format(yarnUrl, rmIdArr[1]));
Q
qiaozhanwei 已提交
551
                if (Constants.HADOOP_RM_STATE_ACTIVE.equals(state)) {
L
ligang 已提交
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
                    return rmIdArr[0];
                }
            }
            return null;
        }


        /**
         * get ResourceManager state
         *
         * @param url
         * @return
         */
        public static String getRMState(String url) {

            String retStr = HttpUtils.get(url);

            if (StringUtils.isEmpty(retStr)) {
                return null;
            }
            //to json
            JSONObject jsonObject = JSON.parseObject(retStr);

            //get ResourceManager state
            String state = jsonObject.getJSONObject("clusterInfo").getString("haState");
            return state;
        }

    }
}