AnalyzerLib.hs 33.3 KB
Newer Older
1 2 3 4
{-
    Copyright 2012-2015 Vidar Holen

    This file is part of ShellCheck.
M
Mike Frysinger 已提交
5
    https://www.shellcheck.net
6 7 8 9 10 11 12 13 14 15 16 17

    ShellCheck is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    ShellCheck is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
M
Mike Frysinger 已提交
18
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
19 20
-}
{-# LANGUAGE FlexibleContexts #-}
V
Vanessa McHale 已提交
21
{-# LANGUAGE TemplateHaskell  #-}
22
module ShellCheck.AnalyzerLib where
V
Vanessa McHale 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
import           ShellCheck.AST
import           ShellCheck.ASTLib
import           ShellCheck.Data
import           ShellCheck.Interface
import           ShellCheck.Parser
import           ShellCheck.Regex

import           Control.Arrow          (first)
import           Control.Monad.Identity
import           Control.Monad.RWS
import           Control.Monad.State
import           Control.Monad.Writer
import           Data.Char
import           Data.List
import qualified Data.Map               as Map
import           Data.Maybe
import           Data.Semigroup

import           Test.QuickCheck.All    (forAllProperties)
V
Vidar Holen 已提交
42
import           Test.QuickCheck.Test   (maxSuccess, quickCheckWithResult, stdArgs)
43

V
Vidar Holen 已提交
44 45 46
type Analysis = AnalyzerM ()
type AnalyzerM a = RWS Parameters [TokenComment] Cache a
nullCheck = const $ return ()
47 48


V
Vidar Holen 已提交
49 50
data Checker = Checker {
    perScript :: Root -> Analysis,
V
Vanessa McHale 已提交
51
    perToken  :: Token -> Analysis
V
Vidar Holen 已提交
52 53 54 55 56 57 58 59 60
}

runChecker :: Parameters -> Checker -> [TokenComment]
runChecker params checker = notes
    where
        root = rootNode params
        check = perScript checker `composeAnalyzers` (\(Root x) -> void $ doAnalysis (perToken checker) x)
        notes = snd $ evalRWS (check $ Root root) params Cache

V
Vanessa McHale 已提交
61 62 63 64 65 66
instance Semigroup Checker where
    (<>) x y = Checker {
        perScript = perScript x `composeAnalyzers` perScript y,
        perToken = perToken x `composeAnalyzers` perToken y
        }

V
Vidar Holen 已提交
67 68 69 70 71
instance Monoid Checker where
    mempty = Checker {
        perScript = nullCheck,
        perToken = nullCheck
        }
V
Vanessa McHale 已提交
72
    mappend = (Data.Semigroup.<>)
V
Vidar Holen 已提交
73 74 75 76

composeAnalyzers :: (a -> Analysis) -> (a -> Analysis) -> a -> Analysis
composeAnalyzers f g x = f x >> g x

77
data Parameters = Parameters {
V
Vanessa McHale 已提交
78 79 80 81 82
    hasLastpipe        :: Bool,           -- Whether this script has the 'lastpipe' option set/default.
    hasSetE            :: Bool,           -- Whether this script has 'set -e' anywhere.
    variableFlow       :: [StackData],   -- A linear (bad) analysis of data flow
    parentMap          :: Map.Map Id Token, -- A map from Id to parent Token
    shellType          :: Shell,            -- The shell type, such as Bash or Ksh
83
    shellTypeSpecified :: Bool,    -- True if shell type was forced via flags
V
Vanessa McHale 已提交
84
    rootNode           :: Token              -- The root node of the AST
85 86
    }

V
Vidar Holen 已提交
87 88 89
-- TODO: Cache results of common AST ops here
data Cache = Cache {}

90 91 92 93 94 95 96 97 98 99 100 101
data Scope = SubshellScope String | NoneScope deriving (Show, Eq)
data StackData =
    StackScope Scope
    | StackScopeEnd
    -- (Base expression, specific position, var name, assigned values)
    | Assignment (Token, Token, String, DataType)
    | Reference (Token, Token, String)
  deriving (Show)

data DataType = DataString DataSource | DataArray DataSource
  deriving (Show)

102 103 104 105 106 107
data DataSource =
    SourceFrom [Token]
    | SourceExternal
    | SourceDeclaration
    | SourceInteger
    | SourceChecked
108 109 110 111
  deriving (Show)

data VariableState = Dead Token String | Alive deriving (Show)

N
Ng Zhi An 已提交
112
defaultSpec root = spec {
113
    asShellType = Nothing,
114
    asCheckSourced = False,
115
    asExecutionMode = Executed
N
Ng Zhi An 已提交
116
} where spec = newAnalysisSpec root
117 118 119

pScript s =
  let
120
    pSpec = newParseSpec {
121
        psFilename = "script",
122
        psScript = s
123 124 125
    }
  in prRoot . runIdentity $ parseScript (mockedSystemInterface []) pSpec

V
Vidar Holen 已提交
126 127 128 129 130 131 132 133
-- For testing. If parsed, returns whether there are any comments
producesComments :: Checker -> String -> Maybe Bool
producesComments c s = do
        root <- pScript s
        let spec = defaultSpec root
        let params = makeParameters spec
        return . not . null $ runChecker params c

134 135
makeComment :: Severity -> Id -> Code -> String -> TokenComment
makeComment severity id code note =
N
Ng Zhi An 已提交
136 137 138 139 140 141 142 143
    newTokenComment {
        tcId = id,
        tcComment = newComment {
            cSeverity = severity,
            cCode = code,
            cMessage = note
        }
    }
144 145 146 147 148 149 150 151 152

addComment note = tell [note]

warn :: MonadWriter [TokenComment] m => Id -> Code -> String -> m ()
warn  id code str = addComment $ makeComment WarningC id code str
err   id code str = addComment $ makeComment ErrorC id code str
info  id code str = addComment $ makeComment InfoC id code str
style id code str = addComment $ makeComment StyleC id code str

N
Ng Zhi An 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165
warnWithFix id code str fix = addComment $
    let comment = makeComment WarningC id code str in
    comment {
        tcFix = Just fix
    }

makeCommentWithFix :: Severity -> Id -> Code -> String -> Fix -> TokenComment
makeCommentWithFix severity id code str fix =
    let comment = makeComment severity id code str in
    comment {
        tcFix = Just fix
    }

166 167
makeParameters spec =
    let params = Parameters {
V
Vidar Holen 已提交
168
        rootNode = root,
169
        shellType = fromMaybe (determineShell root) $ asShellType spec,
170 171 172 173 174
        hasSetE = containsSetE root,
        hasLastpipe =
            case shellType params of
                Bash -> containsLastpipe root
                Dash -> False
V
Vanessa McHale 已提交
175 176
                Sh   -> False
                Ksh  -> True,
177

178 179
        shellTypeSpecified = isJust $ asShellType spec,
        parentMap = getParentTree root,
180
        variableFlow = getVariableFlow params root
181 182 183
    } in params
  where root = asScript spec

184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211

-- Does this script mention 'set -e' anywhere?
-- Used as a hack to disable certain warnings.
containsSetE root = isNothing $ doAnalysis (guard . not . isSetE) root
  where
    isSetE t =
        case t of
            T_Script _ str _ -> str `matches` re
            T_SimpleCommand {}  ->
                t `isUnqualifiedCommand` "set" &&
                    ("errexit" `elem` oversimplify t ||
                        "e" `elem` map snd (getAllFlags t))
            _ -> False
    re = mkRegex "[[:space:]]-[^-]*e"

-- Does this script mention 'shopt -s lastpipe' anywhere?
-- Also used as a hack.
containsLastpipe root =
        isNothing $ doAnalysis (guard . not . isShoptLastPipe) root
    where
        isShoptLastPipe t =
            case t of
                T_SimpleCommand {}  ->
                    t `isUnqualifiedCommand` "shopt" &&
                        ("lastpipe" `elem` oversimplify t)
                _ -> False


212 213 214 215 216 217 218 219
prop_determineShell0 = determineShell (fromJust $ pScript "#!/bin/sh") == Sh
prop_determineShell1 = determineShell (fromJust $ pScript "#!/usr/bin/env ksh") == Ksh
prop_determineShell2 = determineShell (fromJust $ pScript "") == Bash
prop_determineShell3 = determineShell (fromJust $ pScript "#!/bin/sh -e") == Sh
prop_determineShell4 = determineShell (fromJust $ pScript
    "#!/bin/ksh\n#shellcheck shell=sh\nfoo") == Sh
prop_determineShell5 = determineShell (fromJust $ pScript
    "#shellcheck shell=sh\nfoo") == Sh
220
prop_determineShell6 = determineShell (fromJust $ pScript "#! /bin/sh") == Sh
V
Vidar Holen 已提交
221
prop_determineShell7 = determineShell (fromJust $ pScript "#! /bin/ash") == Dash
222 223 224 225 226 227 228
determineShell t = fromMaybe Bash $ do
    shellString <- foldl mplus Nothing $ getCandidates t
    shellForExecutable shellString
  where
    forAnnotation t =
        case t of
            (ShellOverride s) -> return s
V
Vanessa McHale 已提交
229
            _                 -> fail ""
230
    getCandidates :: Token -> [Maybe String]
231
    getCandidates t@T_Script {} = [Just $ fromShebang t]
232 233 234
    getCandidates (T_Annotation _ annotations s) =
        map forAnnotation annotations ++
           [Just $ fromShebang s]
V
Vidar Holen 已提交
235
    fromShebang (T_Script _ s t) = executableFromShebang s
236

V
Vidar Holen 已提交
237 238 239 240 241
-- Given a string like "/bin/bash" or "/usr/bin/env dash",
-- return the shell basename like "bash" or "dash"
executableFromShebang :: String -> String
executableFromShebang = shellFor
  where
242 243 244 245 246 247
    shellFor s | "/env " `isInfixOf` s = head (drop 1 (words s)++[""])
    shellFor s | ' ' `elem` s = shellFor $ takeWhile (/= ' ') s
    shellFor s = reverse . takeWhile (/= '/') . reverse $ s



248 249 250
-- Given a root node, make a map from Id to parent Token.
-- This is used to populate parentMap in Parameters
getParentTree :: Token -> Map.Map Id Token
251 252 253 254 255
getParentTree t =
    snd . snd $ runState (doStackAnalysis pre post t) ([], Map.empty)
  where
    pre t = modify (first ((:) t))
    post t = do
256 257 258 259
        (x, map) <- get
        case x of
          _:rest -> case rest of []    -> put (rest, map)
                                 (x:_) -> put (rest, Map.insert (getId t) x map)
260

261 262
-- Given a root node, make a map from Id to Token
getTokenMap :: Token -> Map.Map Id Token
263 264 265 266 267 268
getTokenMap t =
    execState (doAnalysis f t) Map.empty
  where
    f t = modify (Map.insert (getId t) t)


269 270 271
-- Is this token in a quoting free context? (i.e. would variable expansion split)
-- True:  Assignments, [[ .. ]], here docs, already in double quotes
-- False: Regular words
272 273
isStrictlyQuoteFree = isQuoteFreeNode True

274 275 276 277 278
-- Like above, but also allow some cases where splitting may be desired.
-- True:  Like above + for loops
-- False: Like above
isQuoteFree = isQuoteFreeNode False

279 280 281 282 283 284 285 286 287 288

isQuoteFreeNode strict tree t =
    (isQuoteFreeElement t == Just True) ||
        head (mapMaybe isQuoteFreeContext (drop 1 $ getPath tree t) ++ [False])
  where
    -- Is this node self-quoting in itself?
    isQuoteFreeElement t =
        case t of
            T_Assignment {} -> return True
            T_FdRedirect {} -> return True
V
Vanessa McHale 已提交
289
            _               -> Nothing
290 291 292 293

    -- Are any subnodes inherently self-quoting?
    isQuoteFreeContext t =
        case t of
V
Vanessa McHale 已提交
294 295
            TC_Nullary _ DoubleBracket _    -> return True
            TC_Unary _ DoubleBracket _ _    -> return True
296
            TC_Binary _ DoubleBracket _ _ _ -> return True
V
Vanessa McHale 已提交
297 298 299 300 301 302 303 304 305
            TA_Sequence {}                  -> return True
            T_Arithmetic {}                 -> return True
            T_Assignment {}                 -> return True
            T_Redirecting {}                -> return False
            T_DoubleQuoted _ _              -> return True
            T_DollarDoubleQuoted _ _        -> return True
            T_CaseExpression {}             -> return True
            T_HereDoc {}                    -> return True
            T_DollarBraced {}               -> return True
306
            -- When non-strict, pragmatically assume it's desirable to split here
V
Vanessa McHale 已提交
307 308 309
            T_ForIn {}                      -> return (not strict)
            T_SelectIn {}                   -> return (not strict)
            _                               -> Nothing
310

311 312 313
-- Check if a token is a parameter to a certain command by name:
-- Example: isParamTo (parentMap params) "sed" t
isParamTo :: Map.Map Id Token -> String -> Token -> Bool
314 315 316 317
isParamTo tree cmd =
    go
  where
    go x = case Map.lookup (getId x) tree of
V
Vanessa McHale 已提交
318
                Nothing     -> False
319 320 321 322 323
                Just parent -> check parent
    check t =
        case t of
            T_SingleQuoted _ _ -> go t
            T_DoubleQuoted _ _ -> go t
V
Vanessa McHale 已提交
324
            T_NormalWord _ _   -> go t
325
            T_SimpleCommand {} -> isCommand t cmd
V
Vanessa McHale 已提交
326 327
            T_Redirecting {}   -> isCommand t cmd
            _                  -> False
328

329 330
-- Get the parent command (T_Redirecting) of a Token, if any.
getClosestCommand :: Map.Map Id Token -> Token -> Maybe Token
331
getClosestCommand tree t =
332
    findFirst findCommand $ getPath tree t
333
  where
334 335 336
    findCommand t =
        case t of
            T_Redirecting {} -> return True
V
Vanessa McHale 已提交
337 338
            T_Script {}      -> return False
            _                -> Nothing
339

340
-- Like above, if koala_man knew Haskell when starting this project.
V
Vidar Holen 已提交
341 342 343 344
getClosestCommandM t = do
    tree <- asks parentMap
    return $ getClosestCommand tree t

345
-- Is the token used as a command name (the first word in a T_SimpleCommand)?
346 347 348 349 350 351 352 353 354 355
usedAsCommandName tree token = go (getId token) (tail $ getPath tree token)
  where
    go currentId (T_NormalWord id [word]:rest)
        | currentId == getId word = go id rest
    go currentId (T_DoubleQuoted id [word]:rest)
        | currentId == getId word = go id rest
    go currentId (T_SimpleCommand _ _ (word:_):_)
        | currentId == getId word = True
    go _ _ = False

356
-- A list of the element and all its parents up to the root node.
357 358
getPath tree t = t :
    case Map.lookup (getId t) tree of
V
Vanessa McHale 已提交
359
        Nothing     -> []
360 361
        Just parent -> getPath tree parent

V
Vidar Holen 已提交
362 363 364 365 366 367
-- Version of the above taking the map from the current context
-- Todo: give this the name "getPath"
getPathM t = do
    map <- asks parentMap
    return $ getPath map t

368 369 370 371 372 373 374 375 376
isParentOf tree parent child =
    elem (getId parent) . map getId $ getPath tree child

parents params = getPath (parentMap params)

pathTo t = do
    parents <- reader parentMap
    return $ getPath parents t

377 378 379 380 381 382 383 384
-- Find the first match in a list where the predicate is Just True.
-- Stops if it's Just False and ignores Nothing.
findFirst :: (a -> Maybe Bool) -> [a] -> Maybe a
findFirst p l =
    case l of
        [] -> Nothing
        (x:xs) ->
            case p x of
V
Vanessa McHale 已提交
385
                Just True  -> return x
386
                Just False -> Nothing
V
Vanessa McHale 已提交
387
                Nothing    -> findFirst p xs
388

389 390 391 392 393 394 395 396 397
-- Check whether a word is entirely output from a single command
tokenIsJustCommandOutput t = case t of
    T_NormalWord id [T_DollarExpansion _ cmds] -> check cmds
    T_NormalWord id [T_DoubleQuoted _ [T_DollarExpansion _ cmds]] -> check cmds
    T_NormalWord id [T_Backticked _ cmds] -> check cmds
    T_NormalWord id [T_DoubleQuoted _ [T_Backticked _ cmds]] -> check cmds
    _ -> False
  where
    check [x] = not $ isOnlyRedirection x
V
Vanessa McHale 已提交
398
    check _   = False
399 400

-- TODO: Replace this with a proper Control Flow Graph
401
getVariableFlow params t =
402 403 404 405
    let (_, stack) = runState (doStackAnalysis startScope endScope t) []
    in reverse stack
  where
    startScope t =
406
        let scopeType = leadType params t
407 408 409 410 411
        in do
            when (scopeType /= NoneScope) $ modify (StackScope scopeType:)
            when (assignFirst t) $ setWritten t

    endScope t =
412
        let scopeType = leadType params t
413 414 415 416 417
        in do
            setRead t
            unless (assignFirst t) $ setWritten t
            when (scopeType /= NoneScope) $ modify (StackScopeEnd:)

V
Vanessa McHale 已提交
418
    assignFirst T_ForIn {}    = True
419
    assignFirst T_SelectIn {} = True
V
Vanessa McHale 已提交
420
    assignFirst _             = False
421 422

    setRead t =
423
        let read    = getReferencedVariables (parentMap params) t
424 425 426 427 428 429 430
        in mapM_ (\v -> modify (Reference v:)) read

    setWritten t =
        let written = getModifiedVariables t
        in mapM_ (\v -> modify (Assignment v:)) written


431
leadType params t =
432 433 434 435 436 437 438 439 440 441 442 443 444
    case t of
        T_DollarExpansion _ _  -> SubshellScope "$(..) expansion"
        T_Backticked _ _  -> SubshellScope "`..` expansion"
        T_Backgrounded _ _  -> SubshellScope "backgrounding &"
        T_Subshell _ _  -> SubshellScope "(..) group"
        T_CoProcBody _ _  -> SubshellScope "coproc"
        T_Redirecting {}  ->
            if fromMaybe False causesSubshell
            then SubshellScope "pipeline"
            else NoneScope
        _ -> NoneScope
  where
    parentPipeline = do
445
        parent <- Map.lookup (getId t) (parentMap params)
446 447
        case parent of
            T_Pipeline {} -> return parent
V
Vanessa McHale 已提交
448
            _             -> Nothing
449 450 451 452 453

    causesSubshell = do
        (T_Pipeline _ _ list) <- parentPipeline
        if length list <= 1
            then return False
454
            else if not $ hasLastpipe params
455 456 457 458 459 460 461 462 463 464 465
                then return True
                else return . not $ (getId . head $ reverse list) == getId t

getModifiedVariables t =
    case t of
        T_SimpleCommand _ vars [] ->
            concatMap (\x -> case x of
                                T_Assignment id _ name _ w  ->
                                    [(x, x, name, dataTypeFrom DataString w)]
                                _ -> []
                      ) vars
466
        c@T_SimpleCommand {} ->
467 468
            getModifiedVariableCommand c

469 470 471 472 473
        TA_Unary _ "++|" v@(TA_Variable _ name _)  ->
            [(t, v, name, DataString $ SourceFrom [v])]
        TA_Unary _ "|++" v@(TA_Variable _ name _)  ->
            [(t, v, name, DataString $ SourceFrom [v])]
        TA_Assignment _ op (TA_Variable _ name _) rhs -> maybeToList $ do
474 475 476
            guard $ op `elem` ["=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", "&=", "^=", "|="]
            return (t, t, name, DataString $ SourceFrom [rhs])

477 478 479 480 481 482 483
        -- Count [[ -v foo ]] as an "assignment".
        -- This is to prevent [ -v foo ] being unassigned or unused.
        TC_Unary id _ "-v" token -> maybeToList $ do
            str <- fmap (takeWhile (/= '[')) $ -- Quoted index
                    flip getLiteralStringExt token $ \x ->
                case x of
                    T_Glob _ s -> return s -- Unquoted index
V
Vanessa McHale 已提交
484
                    _          -> Nothing
485 486

            guard . not . null $ str
V
hlints  
Vanessa McHale 已提交
487
            return (t, token, str, DataString SourceChecked)
488

489 490 491 492 493 494
        T_DollarBraced _ l -> maybeToList $ do
            let string = bracedString t
            let modifier = getBracedModifier string
            guard $ ":=" `isPrefixOf` modifier
            return (t, t, getBracedReference string, DataString $ SourceFrom [l])

495 496 497 498 499 500 501
        t@(T_FdRedirect _ ('{':var) op) -> -- {foo}>&2 modifies foo
            [(t, t, takeWhile (/= '}') var, DataString SourceInteger) | not $ isClosingFileOp op]

        t@(T_CoProc _ name _) ->
            [(t, t, fromMaybe "COPROC" name, DataArray SourceInteger)]

        --Points to 'for' rather than variable
502
        T_ForIn id str [] _ -> [(t, t, str, DataString SourceExternal)]
503 504 505 506 507 508
        T_ForIn id str words _ -> [(t, t, str, DataString $ SourceFrom words)]
        T_SelectIn id str words _ -> [(t, t, str, DataString $ SourceFrom words)]
        _ -> []

isClosingFileOp op =
    case op of
509 510
        T_IoDuplicate _ (T_GREATAND _) "-" -> True
        T_IoDuplicate _ (T_LESSAND  _) "-" -> True
V
Vanessa McHale 已提交
511
        _                                  -> False
512 513 514 515 516 517 518 519


-- Consider 'export/declare -x' a reference, since it makes the var available
getReferencedVariableCommand base@(T_SimpleCommand _ _ (T_NormalWord _ (T_Literal _ x:_):rest)) =
    case x of
        "export" -> if "f" `elem` flags
            then []
            else concatMap getReference rest
520 521 522
        "declare" -> if
                any (`elem` flags) ["x", "p"] &&
                    (not $ any (`elem` flags) ["f", "F"])
523 524
            then concatMap getReference rest
            else []
525 526 527 528
        "readonly" ->
            if any (`elem` flags) ["f", "p"]
            then []
            else concatMap getReference rest
529 530 531 532 533 534 535 536 537 538 539 540 541
        "trap" ->
            case rest of
                head:_ -> map (\x -> (head, head, x)) $ getVariablesFromLiteralToken head
                _ -> []
        _ -> []
  where
    getReference t@(T_Assignment _ _ name _ value) = [(t, t, name)]
    getReference t@(T_NormalWord _ [T_Literal _ name]) | not ("-" `isPrefixOf` name) = [(t, t, name)]
    getReference _ = []
    flags = map snd $ getAllFlags base

getReferencedVariableCommand _ = []

542 543 544 545 546 547 548 549
-- The function returns a tuple consisting of four items describing an assignment.
-- Given e.g. declare foo=bar
-- (
--   BaseCommand :: Token,     -- The command/structure assigning the variable, i.e. declare foo=bar
--   AssignmentToken :: Token, -- The specific part that assigns this variable, i.e. foo=bar
--   VariableName :: String,   -- The variable name, i.e. foo
--   VariableValue :: DataType -- A description of the value being assigned, i.e. "Literal string with value foo"
-- )
550 551 552 553
getModifiedVariableCommand base@(T_SimpleCommand _ _ (T_NormalWord _ (T_Literal _ x:_):rest)) =
   filter (\(_,_,s,_) -> not ("-" `isPrefixOf` s)) $
    case x of
        "read" ->
554 555 556 557
            let params = map getLiteral rest
                readArrayVars = getReadArrayVariables rest
            in
                catMaybes . (++ readArrayVars) . takeWhile isJust . reverse $ params
558 559 560
        "getopts" ->
            case rest of
                opts:var:_ -> maybeToList $ getLiteral var
V
Vanessa McHale 已提交
561
                _          -> []
562 563 564 565 566 567 568 569 570 571

        "let" -> concatMap letParamToLiteral rest

        "export" ->
            if "f" `elem` flags then [] else concatMap getModifierParamString rest

        "declare" -> if any (`elem` flags) ["F", "f", "p"] then [] else declaredVars
        "typeset" -> declaredVars

        "local" -> concatMap getModifierParamString rest
572 573 574 575
        "readonly" ->
            if any (`elem` flags) ["f", "p"]
            then []
            else concatMap getModifierParamString rest
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
        "set" -> maybeToList $ do
            params <- getSetParams rest
            return (base, base, "@", DataString $ SourceFrom params)

        "printf" -> maybeToList $ getPrintfVariable rest

        "mapfile" -> maybeToList $ getMapfileArray base rest
        "readarray" -> maybeToList $ getMapfileArray base rest

        _ -> []
  where
    flags = map snd $ getAllFlags base
    stripEquals s = let rest = dropWhile (/= '=') s in
        if rest == "" then "" else tail rest
    stripEqualsFrom (T_NormalWord id1 (T_Literal id2 s:rs)) =
        T_NormalWord id1 (T_Literal id2 (stripEquals s):rs)
    stripEqualsFrom (T_NormalWord id1 [T_DoubleQuoted id2 [T_Literal id3 s]]) =
        T_NormalWord id1 [T_DoubleQuoted id2 [T_Literal id3 (stripEquals s)]]
    stripEqualsFrom t = t

    declaredVars = concatMap (getModifierParam defaultType) rest
      where
        defaultType = if any (`elem` flags) ["a", "A"] then DataArray else DataString

600
    getLiteralOfDataType t d = do
601 602
        s <- getLiteralString t
        when ("-" `isPrefixOf` s) $ fail "argument"
603 604 605 606 607
        return (base, t, s, d)

    getLiteral t = getLiteralOfDataType t (DataString SourceExternal)

    getLiteralArray t = getLiteralOfDataType t (DataArray SourceExternal)
608 609 610 611 612

    getModifierParamString = getModifierParam DataString

    getModifierParam def t@(T_Assignment _ _ name _ value) =
        [(base, t, name, dataTypeFrom def value)]
613
    getModifierParam def t@T_NormalWord {} = maybeToList $ do
614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
        name <- getLiteralString t
        guard $ isVariableName name
        return (base, t, name, def SourceDeclaration)
    getModifierParam _ _ = []

    letParamToLiteral token =
          if var == ""
            then []
            else [(base, token, var, DataString $ SourceFrom [stripEqualsFrom token])]
        where var = takeWhile isVariableChar $ dropWhile (`elem` "+-") $ concat $ oversimplify token

    getSetParams (t:_:rest) | getLiteralString t == Just "-o" = getSetParams rest
    getSetParams (t:rest) =
        let s = getLiteralString t in
            case s of
V
Vanessa McHale 已提交
629
                Just "--"    -> return rest
630
                Just ('-':_) -> getSetParams rest
V
Vanessa McHale 已提交
631
                _            -> return (t:fromMaybe [] (getSetParams rest))
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
    getSetParams [] = Nothing

    getPrintfVariable list = f $ map (\x -> (x, getLiteralString x)) list
      where
        f ((_, Just "-v") : (t, Just var) : _) = return (base, t, var, DataString $ SourceFrom list)
        f (_:rest) = f rest
        f [] = fail "not found"

    -- mapfile has some curious syntax allowing flags plus 0..n variable names
    -- where only the first non-option one is used if any. Here we cheat and
    -- just get the last one, if it's a variable name.
    getMapfileArray base arguments = do
        lastArg <- listToMaybe (reverse arguments)
        name <- getLiteralString lastArg
        guard $ isVariableName name
        return (base, lastArg, name, DataArray SourceExternal)

649 650 651 652 653
    -- get all the array variables used in read, e.g. read -a arr
    getReadArrayVariables args = do
        map (getLiteralArray . snd)
            (filter (\(x,_) -> getLiteralString x == Just "-a") (zip (args) (tail args)))

654 655 656 657 658 659 660 661 662
getModifiedVariableCommand _ = []

getIndexReferences s = fromMaybe [] $ do
    match <- matchRegex re s
    index <- match !!! 0
    return $ matchAllStrings variableNameRegex index
  where
    re = mkRegex "(\\[.*\\])"

663 664 665 666
prop_getOffsetReferences1 = getOffsetReferences ":bar" == ["bar"]
prop_getOffsetReferences2 = getOffsetReferences ":bar:baz" == ["bar", "baz"]
prop_getOffsetReferences3 = getOffsetReferences "[foo]:bar" == ["bar"]
prop_getOffsetReferences4 = getOffsetReferences "[foo]:bar:baz" == ["bar", "baz"]
V
Vidar Holen 已提交
667
getOffsetReferences mods = fromMaybe [] $ do
668
-- if mods start with [, then drop until ]
V
Vidar Holen 已提交
669
    match <- matchRegex re mods
670
    offsets <- match !!! 1
V
Vidar Holen 已提交
671 672
    return $ matchAllStrings variableNameRegex offsets
  where
673
    re = mkRegex "^(\\[.+\\])? *:([^-=?+].*)"
V
Vidar Holen 已提交
674

675
getReferencedVariables parents t =
676 677 678
    case t of
        T_DollarBraced id l -> let str = bracedString t in
            (t, t, getBracedReference str) :
V
Vidar Holen 已提交
679 680 681
                map (\x -> (l, l, x)) (
                    getIndexReferences str
                    ++ getOffsetReferences (getBracedModifier str))
682
        TA_Variable id name _ ->
683 684
            if isArithmeticAssignment t
            then []
685
            else [(t, t, name)]
686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710
        T_Assignment id mode str _ word ->
            [(t, t, str) | mode == Append] ++ specialReferences str t word

        TC_Unary id _ "-v" token -> getIfReference t token
        TC_Unary id _ "-R" token -> getIfReference t token
        TC_Binary id DoubleBracket op lhs rhs ->
            if isDereferencing op
            then concatMap (getIfReference t) [lhs, rhs]
            else []

        t@(T_FdRedirect _ ('{':var) op) -> -- {foo}>&- references and closes foo
            [(t, t, takeWhile (/= '}') var) | isClosingFileOp op]
        x -> getReferencedVariableCommand x
  where
    -- Try to reduce false positives for unused vars only referenced from evaluated vars
    specialReferences name base word =
        if name `elem` [
            "PS1", "PS2", "PS3", "PS4",
            "PROMPT_COMMAND"
          ]
        then
            map (\x -> (base, base, x)) $
                getVariablesFromLiteralToken word
        else []

711 712
    literalizer t = case t of
        T_Glob _ s -> return s    -- Also when parsed as globs
V
Vanessa McHale 已提交
713
        _          -> Nothing
714 715 716 717 718 719 720 721 722

    getIfReference context token = maybeToList $ do
            str <- getLiteralStringExt literalizer token
            guard . not $ null str
            when (isDigit $ head str) $ fail "is a number"
            return (context, token, getBracedReference str)

    isDereferencing = (`elem` ["-eq", "-ne", "-lt", "-le", "-gt", "-ge"])

723
    isArithmeticAssignment t = case getPath parents t of
724
        this: TA_Assignment _ "=" lhs _ :_ -> lhs == t
V
Vanessa McHale 已提交
725
        _                                  -> False
726

727 728 729 730 731
dataTypeFrom defaultType v = (case v of T_Array {} -> DataArray; _ -> defaultType) $ SourceFrom [v]


--- Command specific checks

732
-- Compare a command to a string: t `isCommand` "sed" (also matches /usr/bin/sed)
733
isCommand token str = isCommandMatch token (\cmd -> cmd  == str || ('/' : str) `isSuffixOf` cmd)
734 735

-- Compare a command to a literal. Like above, but checks full path.
736 737
isUnqualifiedCommand token str = isCommandMatch token (== str)

V
hlints  
Vanessa McHale 已提交
738 739
isCommandMatch token matcher = fromMaybe False $
    fmap matcher (getCommandName token)
740

741 742 743 744
-- Does this regex look like it was intended as a glob?
-- True:  *foo*
-- False: .*foo.*
isConfusedGlobRegex :: String -> Bool
745 746
isConfusedGlobRegex ('*':_) = True
isConfusedGlobRegex [x,'*'] | x /= '\\' = True
V
Vanessa McHale 已提交
747
isConfusedGlobRegex _       = False
748 749 750 751 752 753 754 755 756

isVariableStartChar x = x == '_' || isAsciiLower x || isAsciiUpper x
isVariableChar x = isVariableStartChar x || isDigit x
variableNameRegex = mkRegex "[_a-zA-Z][_a-zA-Z0-9]*"

prop_isVariableName1 = isVariableName "_fo123"
prop_isVariableName2 = not $ isVariableName "4"
prop_isVariableName3 = not $ isVariableName "test: "
isVariableName (x:r) = isVariableStartChar x && all isVariableChar r
V
Vanessa McHale 已提交
757
isVariableName _     = False
758 759 760 761 762 763 764 765 766 767 768 769 770

getVariablesFromLiteralToken token =
    getVariablesFromLiteral (fromJust $ getLiteralStringExt (const $ return " ") token)

-- Try to get referenced variables from a literal string like "$foo"
-- Ignores tons of cases like arithmetic evaluation and array indices.
prop_getVariablesFromLiteral1 =
    getVariablesFromLiteral "$foo${bar//a/b}$BAZ" == ["foo", "bar", "BAZ"]
getVariablesFromLiteral string =
    map (!! 0) $ matchAllSubgroups variableRegex string
  where
    variableRegex = mkRegex "\\$\\{?([A-Za-z0-9_]+)"

771
-- Get the variable name from an expansion like ${var:-foo}
772 773 774 775 776 777 778 779 780 781 782 783
prop_getBracedReference1 = getBracedReference "foo" == "foo"
prop_getBracedReference2 = getBracedReference "#foo" == "foo"
prop_getBracedReference3 = getBracedReference "#" == "#"
prop_getBracedReference4 = getBracedReference "##" == "#"
prop_getBracedReference5 = getBracedReference "#!" == "!"
prop_getBracedReference6 = getBracedReference "!#" == "#"
prop_getBracedReference7 = getBracedReference "!foo#?" == "foo"
prop_getBracedReference8 = getBracedReference "foo-bar" == "foo"
prop_getBracedReference9 = getBracedReference "foo:-bar" == "foo"
prop_getBracedReference10= getBracedReference "foo: -1" == "foo"
prop_getBracedReference11= getBracedReference "!os*" == ""
prop_getBracedReference12= getBracedReference "!os?bar**" == ""
784
prop_getBracedReference13= getBracedReference "foo[bar]" == "foo"
785 786 787 788 789
getBracedReference s = fromMaybe s $
    nameExpansion s `mplus` takeName noPrefix `mplus` getSpecial noPrefix `mplus` getSpecial s
  where
    noPrefix = dropPrefix s
    dropPrefix (c:rest) = if c `elem` "!#" then rest else c:rest
V
Vanessa McHale 已提交
790
    dropPrefix ""       = ""
791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806
    takeName s = do
        let name = takeWhile isVariableChar s
        guard . not $ null name
        return name
    getSpecial (c:_) =
        if c `elem` "*@#?-$!" then return [c] else fail "not special"
    getSpecial _ = fail "empty"

    nameExpansion ('!':rest) = do -- e.g. ${!foo*bar*}
        let suffix = dropWhile isVariableChar rest
        guard $ suffix /= rest -- e.g. ${!@}
        first <- suffix !!! 0
        guard $ first `elem` "*?"
        return ""
    nameExpansion _ = Nothing

807 808 809 810 811 812 813 814
prop_getBracedModifier1 = getBracedModifier "foo:bar:baz" == ":bar:baz"
prop_getBracedModifier2 = getBracedModifier "!var:-foo" == ":-foo"
prop_getBracedModifier3 = getBracedModifier "foo[bar]" == "[bar]"
getBracedModifier s = fromMaybe "" . listToMaybe $ do
    let var = getBracedReference s
    a <- dropModifier s
    dropPrefix var a
  where
V
Vanessa McHale 已提交
815
    dropPrefix [] t        = return t
816
    dropPrefix (a:b) (c:d) | a == c = dropPrefix b d
V
Vanessa McHale 已提交
817
    dropPrefix _ _         = []
818 819

    dropModifier (c:rest) | c `elem` "#!" = [rest, c:rest]
V
Vanessa McHale 已提交
820
    dropModifier x        = [x]
821

822 823 824 825 826 827 828 829
-- Useful generic functions.

-- Run an action in a Maybe (or do nothing).
-- Example:
-- potentially $ do
--   s <- getLiteralString cmd
--   guard $ s `elem` ["--recursive", "-r"]
--   return $ warn .. "Something something recursive"
830 831 832
potentially :: Monad m => Maybe (m ()) -> m ()
potentially = fromMaybe (return ())

833
-- Get element 0 or a default. Like `head` but safe.
834
headOrDefault _ (a:_) = a
V
Vanessa McHale 已提交
835
headOrDefault def _   = def
836

837
--- Get element n of a list, or Nothing. Like `!!` but safe.
838 839
(!!!) list i =
    case drop i list of
V
Vanessa McHale 已提交
840
        []    -> Nothing
841 842
        (r:_) -> Just r

V
Vidar Holen 已提交
843 844 845 846
-- Run a command if the shell is in the given list
whenShell l c = do
    shell <- asks shellType
    when (shell `elem` l ) c
847 848


849
filterByAnnotation asSpec params =
850 851
    filter (not . shouldIgnore)
  where
852
    token = asScript asSpec
853 854
    shouldIgnore note =
        any (shouldIgnoreFor (getCode note)) $
N
Ng Zhi An 已提交
855
            getPath parents (T_Bang $ tcId note)
856 857 858 859
    shouldIgnoreFor num (T_Annotation _ anns _) =
        any hasNum anns
      where
        hasNum (DisableComment ts) = num == ts
V
Vanessa McHale 已提交
860
        hasNum _                   = False
861
    shouldIgnoreFor _ T_Include {} = not $ asCheckSourced asSpec
862
    shouldIgnoreFor _ _ = False
863
    parents = parentMap params
N
Ng Zhi An 已提交
864
    getCode = cCode . tcComment
865

866 867 868 869
-- Is this a ${#anything}, to get string length or array count?
isCountingReference (T_DollarBraced id token) =
    case concat $ oversimplify token of
        '#':_ -> True
V
Vanessa McHale 已提交
870
        _     -> False
871 872 873 874 875 876
isCountingReference _ = False

-- FIXME: doesn't handle ${a:+$var} vs ${a:+"$var"}
isQuotedAlternativeReference t =
    case t of
        T_DollarBraced _ _ ->
V
Vidar Holen 已提交
877
            getBracedModifier (bracedString t) `matches` re
878
        _ -> False
V
Vidar Holen 已提交
879 880
  where
    re = mkRegex "(^|\\]):?\\+"
881

882
-- getGnuOpts "erd:u:" will parse a SimpleCommand like
V
Vidar Holen 已提交
883 884 885 886 887
--     read -re -d : -u 3 bar
-- into
--     Just [("r", -re), ("e", -re), ("d", :), ("u", 3), ("", bar)]
-- where flags with arguments map to arguments, while others map to themselves.
-- Any unrecognized flag will result in Nothing.
888 889 890 891
getGnuOpts = getOpts getAllFlags
getBsdOpts = getOpts getLeadingFlags
getOpts :: (Token -> [(Token, String)]) -> String -> Token -> Maybe [(String, Token)]
getOpts flagTokenizer string cmd = process flags
V
Vidar Holen 已提交
892
  where
893
    flags = flagTokenizer cmd
V
Vidar Holen 已提交
894
    flagList (c:':':rest) = ([c], True) : flagList rest
V
Vanessa McHale 已提交
895 896
    flagList (c:rest)     = ([c], False) : flagList rest
    flagList []           = []
V
Vidar Holen 已提交
897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913
    flagMap = Map.fromList $ ("", False) : flagList string

    process [] = return []
    process [(token, flag)] = do
        takesArg <- Map.lookup flag flagMap
        guard $ not takesArg
        return [(flag, token)]
    process ((token1, flag1):rest2@((token2, flag2):rest)) = do
        takesArg <- Map.lookup flag1 flagMap
        if takesArg
            then do
                guard $ flag2 == ""
                more <- process rest
                return $ (flag1, token2) : more
            else do
                more <- process rest2
                return $ (flag1, token1) : more
914

915
return []
916
runTests =  $( [| $(forAllProperties) (quickCheckWithResult (stdArgs { maxSuccess = 1 }) ) |])