Parser.hs 114.2 KB
Newer Older
V
Vidar Holen 已提交
1
{-
2 3
    Copyright 2012-2015 Vidar Holen

V
Vidar Holen 已提交
4
    This file is part of ShellCheck.
M
Mike Frysinger 已提交
5
    https://www.shellcheck.net
V
Vidar Holen 已提交
6 7

    ShellCheck is free software: you can redistribute it and/or modify
V
Vidar Holen 已提交
8
    it under the terms of the GNU General Public License as published by
V
Vidar Holen 已提交
9 10 11 12 13 14
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    ShellCheck is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
V
Vidar Holen 已提交
15
    GNU General Public License for more details.
V
Vidar Holen 已提交
16

17
    You should have received a copy of the GNU General Public License
M
Mike Frysinger 已提交
18
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
V
Vidar Holen 已提交
19
-}
V
Vidar Holen 已提交
20 21 22
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE FlexibleContexts #-}
23
{-# LANGUAGE MultiWayIf #-}
24
module ShellCheck.Parser (parseScript, runTests) where
V
Vidar Holen 已提交
25

V
Vidar Holen 已提交
26
import ShellCheck.AST
27
import ShellCheck.ASTLib
28
import ShellCheck.Data
29
import ShellCheck.Interface
30

V
Vidar Holen 已提交
31
import Control.Applicative ((<*), (*>))
V
Vidar Holen 已提交
32
import Control.Monad
33
import Control.Monad.Identity
34
import Control.Monad.Trans
V
Vidar Holen 已提交
35
import Data.Char
36
import Data.Functor
37
import Data.List (isPrefixOf, isInfixOf, isSuffixOf, partition, sortBy, intercalate, nub)
V
Vidar Holen 已提交
38
import Data.Maybe
39
import Data.Monoid
40 41
import Debug.Trace
import GHC.Exts (sortWith)
V
Vidar Holen 已提交
42 43
import Prelude hiding (readList)
import System.IO
44
import Text.Parsec hiding (runParser, (<?>))
45
import Text.Parsec.Error
46 47 48 49 50
import Text.Parsec.Pos
import qualified Control.Monad.Reader as Mr
import qualified Control.Monad.State as Ms
import qualified Data.Map as Map

R
Rodrigo Setti 已提交
51
import Test.QuickCheck.All (quickCheckAll)
V
Vidar Holen 已提交
52

53
type SCBase m = Mr.ReaderT (Environment m) (Ms.StateT SystemState m)
54 55 56
type SCParser m v = ParsecT String UserState (SCBase m) v

backslash :: Monad m => SCParser m Char
V
Vidar Holen 已提交
57
backslash = char '\\'
V
Vidar Holen 已提交
58 59 60 61 62 63
linefeed :: Monad m => SCParser m Char
linefeed = do
    optional carriageReturn
    c <- char '\n'
    readPendingHereDocs
    return c
64 65
singleQuote = char '\''
doubleQuote = char '"'
V
Vidar Holen 已提交
66 67
variableStart = upper <|> lower <|> oneOf "_"
variableChars = upper <|> lower <|> digit <|> oneOf "_"
68 69 70 71
-- Chars to allow in function names
functionChars = variableChars <|> oneOf ":+?-./^"
-- Chars to allow in functions using the 'function' keyword
extendedFunctionChars = functionChars <|> oneOf "[]*=!"
V
Vidar Holen 已提交
72
specialVariable = oneOf "@*#?-$!"
73
paramSubSpecialChars = oneOf "/:+-=%"
74
quotableChars = "|&;<>()\\ '\t\n\r\xA0" ++ doubleQuotableChars
75
quotable = almostSpace <|> oneOf quotableChars
V
Vidar Holen 已提交
76
bracedQuotable = oneOf "}\"$`'"
77
doubleQuotableChars = "\\\"$`"
78
doubleQuotable = oneOf doubleQuotableChars
V
Vidar Holen 已提交
79
whitespace = oneOf " \t" <|> carriageReturn <|> almostSpace <|> linefeed
V
Vidar Holen 已提交
80
linewhitespace = oneOf " \t" <|> almostSpace
81

82 83
suspectCharAfterQuotes = variableChars <|> char '%'

84 85
extglobStartChars = "?*@!+"
extglobStart = oneOf extglobStartChars
V
Vidar Holen 已提交
86

87 88
unicodeDoubleQuotes = "\x201C\x201D\x2033\x2036"
unicodeSingleQuotes = "\x2018\x2019"
89

90
prop_spacing = isOk spacing "  \\\n # Comment"
V
Vidar Holen 已提交
91
spacing = do
92
    x <- many (many1 linewhitespace <|> try (string "\\\n" >> return ""))
V
Vidar Holen 已提交
93 94 95
    optional readComment
    return $ concat x

96 97
spacing1 = do
    spacing <- spacing
V
Vidar Holen 已提交
98
    when (null spacing) $ fail "Expected whitespace"
99 100
    return spacing

101 102 103
prop_allspacing = isOk allspacing "#foo"
prop_allspacing2 = isOk allspacing " #foo\n # bar\n#baz\n"
prop_allspacing3 = isOk allspacing "#foo\n#bar\n#baz\n"
V
Vidar Holen 已提交
104
allspacing = do
105 106 107 108 109 110 111 112 113 114
    s <- spacing
    more <- option False (linefeed >> return True)
    if more then do
        rest <- allspacing
        return $ s ++ "\n" ++ rest
      else
        return s

allspacingOrFail = do
    s <- allspacing
V
Vidar Holen 已提交
115
    when (null s) $ fail "Expected whitespace"
V
Vidar Holen 已提交
116

117
readUnicodeQuote = do
118
    start <- startSpan
119
    c <- oneOf (unicodeSingleQuotes ++ unicodeDoubleQuotes)
120 121
    id <- endSpan start
    parseProblemAtId id WarningC 1110 "This is a unicode quote. Delete and retype it (or quote to make literal)."
122
    return $ T_Literal id [c]
V
Vidar Holen 已提交
123

V
Vidar Holen 已提交
124
carriageReturn = do
V
Vidar Holen 已提交
125
    parseNote ErrorC 1017 "Literal carriage return. Run script through tr -d '\\r' ."
V
Vidar Holen 已提交
126 127
    char '\r'

V
Vidar Holen 已提交
128 129 130 131 132 133 134 135 136 137
almostSpace =
    choice [
        check '\xA0' "unicode non-breaking space",
        check '\x200B' "unicode zerowidth space"
    ]
  where
    check c name = do
        parseNote ErrorC 1018 $ "This is a " ++ name ++ ". Delete and retype it."
        char c
        return ' '
138

V
Vidar Holen 已提交
139
--------- Message/position annotation on top of user state
V
Vidar Holen 已提交
140
data Note = Note Id Severity Code String deriving (Show, Eq)
141
data ParseNote = ParseNote SourcePos SourcePos Severity Code String deriving (Show, Eq)
142 143 144 145 146
data Context =
        ContextName SourcePos String
        | ContextAnnotation [Annotation]
        | ContextSource String
    deriving (Show)
147

V
Vidar Holen 已提交
148
data HereDocContext =
149
        HereDocPending Token [Context] -- on linefeed, read this T_HereDoc
V
Vidar Holen 已提交
150 151
    deriving (Show)

152 153
data UserState = UserState {
    lastId :: Id,
N
Ng Zhi An 已提交
154
    positionMap :: Map.Map Id (SourcePos, SourcePos),
V
Vidar Holen 已提交
155 156 157
    parseNotes :: [ParseNote],
    hereDocMap :: Map.Map Id [Token],
    pendingHereDocs :: [HereDocContext]
158 159 160 161
}
initialUserState = UserState {
    lastId = Id $ -1,
    positionMap = Map.empty,
V
Vidar Holen 已提交
162 163 164
    parseNotes = [],
    hereDocMap = Map.empty,
    pendingHereDocs = []
165
}
V
Vidar Holen 已提交
166

167
codeForParseNote (ParseNote _ _ _ code _) = code
V
Vidar Holen 已提交
168
noteToParseNote map (Note id severity code message) =
169
        ParseNote pos pos severity code message
V
Vidar Holen 已提交
170 171
    where
        pos = fromJust $ Map.lookup id map
172

173
getLastId = lastId <$> getState
174

175
getNextIdBetween startPos endPos = do
176 177
    state <- getState
    let newId = incId (lastId state)
178
    let newMap = Map.insert newId (startPos, endPos) (positionMap state)
179 180 181 182
    putState $ state {
        lastId = newId,
        positionMap = newMap
    }
183
    return newId
R
Rodrigo Setti 已提交
184
  where incId (Id n) = Id $ n+1
185

186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
getNextIdSpanningTokens startTok endTok = do
    (start, _) <- getSpanForId (getId startTok)
    (_, end)   <- getSpanForId (getId endTok)
    getNextIdBetween start end

-- Get an ID starting from the first token of the list, and ending after the last
getNextIdSpanningTokenList list =
    if null list
    then do
        pos <- getPosition
        getNextIdBetween pos pos
    else
        getNextIdSpanningTokens (head list) (last list)

-- Get the span covered by an id
getSpanForId :: Monad m => Id -> SCParser m (SourcePos, SourcePos)
getSpanForId id =
    Map.findWithDefault (error "Internal error: no position for id. Please report!") id <$>
        getMap

-- Create a new id with the same span as an existing one
getNewIdFor :: Monad m => Id -> SCParser m Id
getNewIdFor id = getSpanForId id >>= uncurry getNextIdBetween
209

210 211 212 213 214 215
data IncompleteInterval = IncompleteInterval SourcePos

startSpan = IncompleteInterval <$> getPosition

endSpan (IncompleteInterval start) = do
    endPos <- getPosition
216
    id <- getNextIdBetween start endPos
217 218
    return id

V
Vidar Holen 已提交
219 220 221 222 223 224 225 226 227
addToHereDocMap id list = do
    state <- getState
    let map = hereDocMap state
    putState $ state {
        hereDocMap = Map.insert id list map
    }

addPendingHereDoc t = do
    state <- getState
228
    context <- getCurrentContexts
V
Vidar Holen 已提交
229 230
    let docs = pendingHereDocs state
    putState $ state {
231
        pendingHereDocs = HereDocPending t context : docs
V
Vidar Holen 已提交
232 233 234 235
    }

popPendingHereDocs = do
    state <- getState
236
    let pending = pendingHereDocs state
V
Vidar Holen 已提交
237
    putState $ state {
238
        pendingHereDocs = []
V
Vidar Holen 已提交
239
    }
240
    return . reverse $ pendingHereDocs state
V
Vidar Holen 已提交
241

242 243
getMap = positionMap <$> getState
getParseNotes = parseNotes <$> getState
V
Vidar Holen 已提交
244

245
addParseNote n = do
246
    irrelevant <- shouldIgnoreCode (codeForParseNote n)
R
Rodrigo Setti 已提交
247
    unless irrelevant $ do
248 249 250 251
        state <- getState
        putState $ state {
            parseNotes = n : parseNotes state
        }
252 253 254

shouldIgnoreCode code = do
    context <- getCurrentContexts
255 256
    checkSourced <- Mr.asks checkSourced
    return $ any (disabling checkSourced) context
257
  where
258 259 260 261 262
    disabling checkSourced item =
        case item of
            ContextAnnotation list -> any disabling' list
            ContextSource _ -> not $ checkSourced
            _ -> False
263
    disabling' (DisableComment n) = code == n
264
    disabling' _ = False
265

266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
shouldFollow file = do
    context <- getCurrentContexts
    if any isThisFile context
      then return False
      else
        if length (filter isSource context) >= 100
          then do
            parseProblem ErrorC 1092 "Stopping at 100 'source' frames :O"
            return False
          else
            return True
  where
    isSource (ContextSource _) = True
    isSource _ = False
    isThisFile (ContextSource name) | name == file = True
    isThisFile _= False

283 284 285 286 287 288 289 290 291 292 293 294
getSourceOverride = do
    context <- getCurrentContexts
    return . msum . map findFile $ takeWhile isSameFile context
  where
    isSameFile (ContextSource _) = False
    isSameFile _ = True

    findFile (ContextAnnotation list) = msum $ map getFile list
    findFile _ = Nothing
    getFile (SourceOverride str) = Just str
    getFile _ = Nothing

V
Vidar Holen 已提交
295
-- Store potential parse problems outside of parsec
296 297 298 299 300 301 302 303 304 305

data SystemState = SystemState {
    contextStack :: [Context],
    parseProblems :: [ParseNote]
}
initialSystemState = SystemState {
    contextStack = [],
    parseProblems = []
}

306 307
data Environment m = Environment {
    systemInterface :: SystemInterface m,
308 309
    checkSourced :: Bool,
    shellTypeOverride :: Maybe Shell
310 311
}

V
Vidar Holen 已提交
312
parseProblem level code msg = do
V
Vidar Holen 已提交
313
    pos <- getPosition
V
Vidar Holen 已提交
314
    parseProblemAt pos level code msg
V
Vidar Holen 已提交
315

316 317
setCurrentContexts c = Ms.modify (\state -> state { contextStack = c })
getCurrentContexts = contextStack <$> Ms.get
318 319 320 321 322 323 324

popContext = do
    v <- getCurrentContexts
    if not $ null v
        then do
            let (a:r) = v
            setCurrentContexts r
V
Vidar Holen 已提交
325
            return $ Just a
326
        else
V
Vidar Holen 已提交
327
            return Nothing
328 329 330 331 332

pushContext c = do
    v <- getCurrentContexts
    setCurrentContexts (c:v)

333
parseProblemAtWithEnd start end level code msg = do
334
    irrelevant <- shouldIgnoreCode code
R
Rodrigo Setti 已提交
335
    unless irrelevant $
336
        addParseProblem note
337
  where
338 339
    note = ParseNote start end level code msg

340 341 342 343 344
addParseProblem note =
    Ms.modify (\state -> state {
        parseProblems = note:parseProblems state
    })

345
parseProblemAt pos = parseProblemAtWithEnd pos pos
V
Vidar Holen 已提交
346

347 348
parseProblemAtId :: Monad m => Id -> Severity -> Integer -> String -> SCParser m ()
parseProblemAtId id level code msg = do
349
    (start, end) <- getSpanForId id
N
Ng Zhi An 已提交
350
    parseProblemAtWithEnd start end level code msg
351

352
-- Store non-parse problems inside
V
Vidar Holen 已提交
353

V
Vidar Holen 已提交
354
parseNote c l a = do
V
Vidar Holen 已提交
355
    pos <- getPosition
V
Vidar Holen 已提交
356
    parseNoteAt pos c l a
V
Vidar Holen 已提交
357

358
parseNoteAt pos c l a = addParseNote $ ParseNote pos pos c l a
359 360 361
parseNoteAtId id c l a = do
    (start, end) <- getSpanForId id
    addParseNote $ ParseNote start end c l a
362 363

parseNoteAtWithEnd start end c l a = addParseNote $ ParseNote start end c l a
V
Vidar Holen 已提交
364 365 366 367 368 369 370

--------- Convenient combinators
thenSkip main follow = do
    r <- main
    optional follow
    return r

R
Rodrigo Setti 已提交
371
unexpecting s p = try $
372
    (try p >> fail ("Unexpected " ++ s)) <|> return ()
373

374
notFollowedBy2 = unexpecting ""
375

R
Rodrigo Setti 已提交
376
reluctantlyTill p end =
377
    (lookAhead (void (try end) <|> eof) >> return []) <|> do
V
Vidar Holen 已提交
378 379 380 381 382 383
        x <- p
        more <- reluctantlyTill p end
        return $ x:more
      <|> return []

reluctantlyTill1 p end = do
384
    notFollowedBy2 end
V
Vidar Holen 已提交
385 386 387 388
    x <- p
    more <- reluctantlyTill p end
    return $ x:more

R
Rodrigo Setti 已提交
389 390
attempting rest branch =
    (try branch >> rest) <|> rest
V
Vidar Holen 已提交
391

V
Vidar Holen 已提交
392 393
orFail parser errorAction =
    try parser <|> (errorAction >>= fail)
V
Vidar Holen 已提交
394

395 396
-- Construct a node with a parser, e.g. T_Literal `withParser` (readGenericLiteral ",")
withParser node parser = do
397
    start <- startSpan
398
    contents <- parser
399
    id <- endSpan start
400 401
    return $ node id contents

V
Vidar Holen 已提交
402 403
wasIncluded p = option False (p >> return True)

R
Rodrigo Setti 已提交
404
acceptButWarn parser level code note =
V
Vidar Holen 已提交
405
    optional $ try (do
406 407
        pos <- getPosition
        parser
V
Vidar Holen 已提交
408
        parseProblemAt pos level code note
409 410
      )

411 412 413 414 415 416 417 418 419 420
parsecBracket before after op = do
    val <- before
    (op val <* optional (after val)) <|> (after val *> fail "")

swapContext contexts p =
    parsecBracket (getCurrentContexts <* setCurrentContexts contexts)
                  setCurrentContexts
                  (const p)

withContext entry p = parsecBracket (pushContext entry) (const popContext) (const p)
421

422 423 424 425
called s p = do
    pos <- getPosition
    withContext (ContextName pos s) p

R
Rodrigo Setti 已提交
426 427
withAnnotations anns =
    withContext (ContextAnnotation anns)
428

R
Rodrigo Setti 已提交
429 430
readConditionContents single =
    readCondContents `attempting` lookAhead (do
431
                                pos <- getPosition
S
Stefan Knudsen 已提交
432
                                s <- readVariableName
V
Vidar Holen 已提交
433
                                when (s `elem` commonCommands) $
434
                                    parseProblemAt pos WarningC 1014 "Use 'if cmd; then ..' to check exit code, or 'if [[ $(cmd) == .. ]]' to check output.")
435

436
  where
437 438 439 440 441 442 443 444 445 446
    spacingOrLf = condSpacing True
    condSpacing required = do
        pos <- getPosition
        space <- allspacing
        when (required && null space) $
            parseProblemAt pos ErrorC 1035 "You are missing a required space here."
        when (single && '\n' `elem` space) $
            parseProblemAt pos ErrorC 1080 "When breaking lines in [ ], you need \\ before the linefeed."
        return space

V
Vidar Holen 已提交
447
    typ = if single then SingleBracket else DoubleBracket
448
    readCondBinaryOp = try $ do
V
Vidar Holen 已提交
449
        optional guardArithmetic
450
        op <- getOp
451
        spacingOrLf
452
        return op
453
      where
454 455 456
        flaglessOps = [ "==", "!=", "<=", ">=", "=~", ">", "<", "=" ]

        getOp = do
457
            start <- startSpan
458
            op <- readRegularOrEscaped anyOp
459
            id <- endSpan start
460 461 462 463 464
            return $ TC_Binary id typ op

        anyOp = flagOp <|> flaglessOp <|> fail
                    "Expected comparison operator (don't wrap commands in []/[[]])"
        flagOp = try $ do
465
            s <- readOp
V
Vidar Holen 已提交
466
            when (s == "-a" || s == "-o") $ fail "Unexpected operator"
467 468 469
            return s
        flaglessOp =
            choice $ map (try . string) flaglessOps
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485

        -- hacks to read quoted operators without having to read a shell word
    readEscaped p = try $ withEscape <|> withQuotes
      where
        withEscape = do
            char '\\'
            escaped <$> p
        withQuotes = do
            c <- oneOf "'\""
            s <- p
            char c
            return $ escaped s
        escaped s = if any (`elem` s) "<>()" then '\\':s else s

    readRegularOrEscaped p = readEscaped p <|> p

486

V
Vidar Holen 已提交
487
    guardArithmetic = do
488
        try . lookAhead $ void (oneOf "+*/%") <|> void (string "- ")
V
Vidar Holen 已提交
489 490 491 492 493
        parseProblem ErrorC 1076 $
            if single
            then "Trying to do math? Use e.g. [ $((i/2+7)) -ge 18 ]."
            else "Trying to do math? Use e.g. [[ $((i/2+7)) -ge 18 ]]."

494 495 496
    readCondUnaryExp = do
      op <- readCondUnaryOp
      pos <- getPosition
497
      liftM op readCondWord `orFail` do
V
Vidar Holen 已提交
498 499
          parseProblemAt pos ErrorC 1019 "Expected this to be an argument to the unary condition."
          return "Expected an argument for the unary operator"
500

501
    readCondUnaryOp = try $ do
502
        start <- startSpan
503
        s <- readOp
504
        id <- endSpan start
505
        spacingOrLf
506 507 508
        return $ TC_Unary id typ s

    readOp = try $ do
509 510
        char '-' <|> weirdDash
        s <- many1 letter <|> fail "Expected a test operator"
511
        return ('-':s)
512

513 514 515 516 517 518 519
    weirdDash = do
        pos <- getPosition
        oneOf "\x058A\x05BE\x2010\x2011\x2012\x2013\x2014\x2015\xFE63\xFF0D"
        parseProblemAt pos ErrorC 1100
            "This is a unicode dash. Delete and retype as ASCII minus."
        return '-'

520
    readCondWord = do
R
Rodrigo Setti 已提交
521
        notFollowedBy2 (try (spacing >> string "]"))
522 523
        x <- readNormalWord
        pos <- getPosition
524
        when (endedWith "]" x && notArrayIndex x) $ do
V
Vidar Holen 已提交
525
            parseProblemAt pos ErrorC 1020 $
V
Vidar Holen 已提交
526
                "You need a space before the " ++ (if single then "]" else "]]") ++ "."
527
            fail "Missing space before ]"
V
Vidar Holen 已提交
528
        when (single && endedWith ")" x) $ do
R
Rodrigo Setti 已提交
529
            parseProblemAt pos ErrorC 1021
V
Vidar Holen 已提交
530
                "You need a space before the \\)"
531
            fail "Missing space before )"
532
        void spacing
533
        return x
V
Vidar Holen 已提交
534
      where endedWith str (T_NormalWord id s@(_:_)) =
R
Rodrigo Setti 已提交
535 536
                case last s of T_Literal id s -> str `isSuffixOf` s
                               _ -> False
V
Vidar Holen 已提交
537
            endedWith _ _ = False
538 539
            notArrayIndex (T_NormalWord id s@(_:T_Literal _ t:_)) = t /= "["
            notArrayIndex _ = True
540

541
    readCondAndOp = readAndOrOp TC_And "&&" False <|> readAndOrOp TC_And "-a" True
V
Vidar Holen 已提交
542

543
    readCondOrOp = do
V
Vidar Holen 已提交
544
        optional guardArithmetic
545
        readAndOrOp TC_Or "||" False <|> readAndOrOp TC_Or "-o" True
546

547
    readAndOrOp node op requiresSpacing = do
548
        optional $ lookAhead weirdDash
549 550 551
        start <- startSpan
        x <- try $ string op
        id <- endSpan start
552
        condSpacing requiresSpacing
553
        return $ node id typ x
554

555
    readCondNullaryOrBinary = do
556
      start <- startSpan
557 558 559
      x <- readCondWord `attempting` (do
              pos <- getPosition
              lookAhead (char '[')
V
Vidar Holen 已提交
560
              parseProblemAt pos ErrorC 1026 $ if single
561 562
                  then "If grouping expressions inside [..], use \\( ..\\)."
                  else "If grouping expressions inside [[..]], use ( .. )."
563
            )
564
      id <- endSpan start
565 566
      (do
            pos <- getPosition
567
            isRegex <- regexOperatorAhead
568
            op <- readCondBinaryOp
569 570
            y <- if isRegex
                    then readRegex
R
Rodrigo Setti 已提交
571
                    else  readCondWord <|> (parseProblemAt pos ErrorC 1027 "Expected another argument for this operator." >> mzero)
572
            return (x `op` y)
573 574 575 576 577 578 579 580 581 582
          ) <|> ( do
            checkTrailingOp x
            return $ TC_Nullary id typ x
          )

    checkTrailingOp x = fromMaybe (return ()) $ do
        (T_Literal id str) <- getTrailingUnquotedLiteral x
        trailingOp <- listToMaybe (filter (`isSuffixOf` str) binaryTestOps)
        return $ parseProblemAtId id ErrorC 1108 $
            "You need a space before and after the " ++ trailingOp ++ " ."
583 584

    readCondGroup = do
585
        start <- startSpan
586 587 588 589 590 591 592 593 594 595
        pos <- getPosition
        lparen <- try $ readRegularOrEscaped (string "(")
        when (single && lparen == "(") $
            singleWarning pos
        when (not single && lparen == "\\(") $
            doubleWarning pos
        condSpacing single
        x <- readCondContents
        cpos <- getPosition
        rparen <- readRegularOrEscaped (string ")")
596
        id <- endSpan start
597 598 599 600 601 602 603
        condSpacing single
        when (single && rparen == ")") $
            singleWarning cpos
        when (not single && rparen == "\\)") $
            doubleWarning cpos
        return $ TC_Group id typ x

604
      where
605 606 607 608 609
        singleWarning pos =
            parseProblemAt pos ErrorC 1028 "In [..] you have to escape \\( \\) or preferably combine [..] expressions."
        doubleWarning pos =
            parseProblemAt pos ErrorC 1029 "In [[..]] you shouldn't escape ( or )."

610

611
    -- Currently a bit of a hack since parsing rules are obscure
R
Rodrigo Setti 已提交
612
    regexOperatorAhead = lookAhead (do
613
        try (string "=~") <|> try (string "~=")
V
Vidar Holen 已提交
614
        return True)
615
          <|> return False
V
Vidar Holen 已提交
616
    readRegex = called "regex" $ do
617
        start <- startSpan
618
        parts <- many1 readPart
619
        id <- endSpan start
620
        void spacing
621 622
        return $ T_NormalWord id parts
      where
623 624 625 626 627 628 629 630 631
        readPart = choice [
            readGroup,
            readSingleQuoted,
            readDoubleQuoted,
            readDollarExpression,
            readNormalLiteral "( ",
            readPipeLiteral,
            readGlobLiteral
            ]
632
        readGlobLiteral = do
633
            start <- startSpan
V
Vidar Holen 已提交
634
            s <- extglobStart <|> oneOf "{}[]$"
635
            id <- endSpan start
V
Vidar Holen 已提交
636
            return $ T_Literal id [s]
V
Vidar Holen 已提交
637
        readGroup = called "regex grouping" $ do
638
            start <- startSpan
639
            char '('
640
            parts <- many (readPart <|> readRegexLiteral)
641
            char ')'
642
            id <- endSpan start
V
Vidar Holen 已提交
643 644
            return $ T_NormalWord id parts
        readRegexLiteral = do
645
            start <- startSpan
V
Vidar Holen 已提交
646
            str <- readGenericLiteral1 (singleQuote <|> doubleQuotable <|> oneOf "()")
647
            id <- endSpan start
V
Vidar Holen 已提交
648
            return $ T_Literal id str
649
        readPipeLiteral = do
650
            start <- startSpan
651
            str <- string "|"
652
            id <- endSpan start
653
            return $ T_Literal id str
654

655 656
    readCondTerm = do
        term <- readCondNot <|> readCondExpr
657
        condSpacing False
658 659
        return term

660
    readCondNot = do
661
        start <- startSpan
662
        char '!'
663
        id <- endSpan start
664
        spacingOrLf
665
        expr <- readCondExpr
666
        return $ TC_Unary id typ "!" expr
667

668
    readCondExpr =
669
      readCondGroup <|> readCondUnaryExp <|> readCondNullaryOrBinary
670 671 672

    readCondOr = chainl1 readCondAnd readCondAndOp
    readCondAnd = chainl1 readCondTerm readCondOrOp
673
    readCondContents = readCondOr
674 675


676 677 678 679 680 681 682 683 684
prop_a1 = isOk readArithmeticContents " n++ + ++c"
prop_a2 = isOk readArithmeticContents "$N*4-(3,2)"
prop_a3 = isOk readArithmeticContents "n|=2<<1"
prop_a4 = isOk readArithmeticContents "n &= 2 **3"
prop_a5 = isOk readArithmeticContents "1 |= 4 && n >>= 4"
prop_a6 = isOk readArithmeticContents " 1 | 2 ||3|4"
prop_a7 = isOk readArithmeticContents "3*2**10"
prop_a8 = isOk readArithmeticContents "3"
prop_a9 = isOk readArithmeticContents "a^!-b"
V
Vidar Holen 已提交
685 686 687 688 689 690 691 692 693
prop_a10= isOk readArithmeticContents "! $?"
prop_a11= isOk readArithmeticContents "10#08 * 16#f"
prop_a12= isOk readArithmeticContents "\"$((3+2))\" + '37'"
prop_a13= isOk readArithmeticContents "foo[9*y+x]++"
prop_a14= isOk readArithmeticContents "1+`echo 2`"
prop_a15= isOk readArithmeticContents "foo[`echo foo | sed s/foo/4/g` * 3] + 4"
prop_a16= isOk readArithmeticContents "$foo$bar"
prop_a17= isOk readArithmeticContents "i<(0+(1+1))"
prop_a18= isOk readArithmeticContents "a?b:c"
694
prop_a19= isOk readArithmeticContents "\\\n3 +\\\n  2"
695 696
prop_a20= isOk readArithmeticContents "a ? b ? c : d : e"
prop_a21= isOk readArithmeticContents "a ? b : c ? d : e"
697
prop_a22= isOk readArithmeticContents "!!a"
V
Vidar Holen 已提交
698
readArithmeticContents :: Monad m => SCParser m Token
699 700 701
readArithmeticContents =
    readSequence
  where
702 703 704
    spacing =
        let lf = try (string "\\\n") >> return '\n'
        in many (whitespace <|> lf)
705 706 707 708

    splitBy x ops = chainl1 x (readBinary ops)
    readBinary ops = readComboOp ops TA_Binary
    readComboOp op token = do
709
        start <- startSpan
710 711
        op <- choice (map (\x -> try $ do
                                        s <- string x
V
Vidar Holen 已提交
712
                                        failIfIncompleteOp
713 714
                                        return s
                            ) op)
715
        id <- endSpan start
716 717 718
        spacing
        return $ token id op

V
Vidar Holen 已提交
719 720 721 722
    failIfIncompleteOp = notFollowedBy2 $ oneOf "&|<>="

    -- Read binary minus, but also check for -lt, -gt and friends:
    readMinusOp = do
723
        start <- startSpan
V
Vidar Holen 已提交
724 725 726 727 728 729 730 731 732 733 734 735 736 737
        pos <- getPosition
        try $ do
            char '-'
            failIfIncompleteOp
        optional $ do
            (str, alt) <- lookAhead . choice $ map tryOp [
                ("lt", "<"),
                ("gt", ">"),
                ("le", "<="),
                ("ge", ">="),
                ("eq", "=="),
                ("ne", "!=")
              ]
            parseProblemAt pos ErrorC 1106 $ "In arithmetic contexts, use " ++ alt ++ " instead of -" ++ str
738
        id <- endSpan start
V
Vidar Holen 已提交
739 740 741 742 743 744 745 746
        spacing
        return $ TA_Binary id "-"
      where
        tryOp (str, alt) = try $ do
            string str
            spacing1
            return (str, alt)

747
    readArrayIndex = do
748
        start <- startSpan
749
        char '['
750 751
        pos <- getPosition
        middle <- readStringForParser readArithmeticContents
752
        char ']'
753
        id <- endSpan start
754
        return $ T_UnparsedIndex id pos middle
755

756
    literal s = do
757
        start <- startSpan
758
        string s
759
        id <- endSpan start
760 761
        return $ T_Literal id s

762
    readVariable = do
763
        start <- startSpan
764 765
        name <- readVariableName
        indices <- many readArrayIndex
766
        id <- endSpan start
767 768
        spacing
        return $ TA_Variable id name indices
769 770

    readExpansion = do
771
        start <- startSpan
772 773 774 775 776
        pieces <- many1 $ choice [
            readSingleQuoted,
            readDoubleQuoted,
            readNormalDollar,
            readBraced,
777
            readUnquotedBackTicked,
778
            literal "#",
V
Vidar Holen 已提交
779
            readNormalLiteral "+-*/=%^,]?:"
780
            ]
781
        id <- endSpan start
782
        spacing
783
        return $ TA_Expansion id pieces
784 785 786 787 788 789 790 791

    readGroup = do
        char '('
        s <- readSequence
        char ')'
        spacing
        return s

792
    readArithTerm = readGroup <|> readVariable <|> readExpansion
793 794 795

    readSequence = do
        spacing
796
        start <- startSpan
797
        l <- readAssignment `sepBy` (char ',' >> spacing)
798
        id <- endSpan start
799 800
        return $ TA_Sequence id l

801 802 803
    readAssignment = chainr1 readTrinary readAssignmentOp
    readAssignmentOp = readComboOp ["=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", "&=", "^=", "|="] TA_Assignment

804
    readTrinary = do
805
        x <- readLogicalOr
806
        do
807
            start <- startSpan
808 809
            string "?"
            spacing
810
            y <- readTrinary
811 812
            string ":"
            spacing
813
            z <- readTrinary
814
            id <- endSpan start
815 816 817 818 819 820 821 822 823 824 825 826
            return $ TA_Trinary id x y z
         <|>
          return x

    readLogicalOr  = readLogicalAnd `splitBy` ["||"]
    readLogicalAnd = readBitOr `splitBy` ["&&"]
    readBitOr  = readBitXor `splitBy` ["|"]
    readBitXor = readBitAnd `splitBy` ["^"]
    readBitAnd = readEquated `splitBy` ["&"]
    readEquated = readCompared `splitBy` ["==", "!="]
    readCompared = readShift `splitBy` ["<=", ">=", "<", ">"]
    readShift = readAddition `splitBy` ["<<", ">>"]
V
Vidar Holen 已提交
827
    readAddition = chainl1 readMultiplication (readBinary ["+"] <|> readMinusOp)
828 829 830 831 832
    readMultiplication = readExponential `splitBy` ["*", "/", "%"]
    readExponential = readAnyNegated `splitBy` ["**"]

    readAnyNegated = readNegated <|> readAnySigned
    readNegated = do
833
        start <- startSpan
834
        op <- oneOf "!~"
835
        id <- endSpan start
836
        spacing
837
        x <- readAnyNegated
838 839 840 841
        return $ TA_Unary id [op] x

    readAnySigned = readSigned <|> readAnycremented
    readSigned = do
842
        start <- startSpan
843
        op <- choice (map readSignOp "+-")
844
        id <- endSpan start
845 846 847 848 849 850
        spacing
        x <- readAnycremented
        return $ TA_Unary id [op] x
     where
        readSignOp c = try $ do
            char c
851
            notFollowedBy2 $ char c
852 853 854 855 856
            spacing
            return c

    readAnycremented = readNormalOrPostfixIncremented <|> readPrefixIncremented
    readPrefixIncremented = do
857
        start <- startSpan
858
        op <- try $ string "++" <|> string "--"
859
        id <- endSpan start
860 861 862 863 864 865 866 867
        spacing
        x <- readArithTerm
        return $ TA_Unary id (op ++ "|") x

    readNormalOrPostfixIncremented = do
        x <- readArithTerm
        spacing
        do
868
            start <- startSpan
869
            op <- try $ string "++" <|> string "--"
870
            id <- endSpan start
871
            spacing
872
            return $ TA_Unary id ('|':op) x
873 874 875 876 877
         <|>
            return x



V
Vidar Holen 已提交
878 879 880 881 882 883 884 885 886 887 888 889
prop_readCondition   = isOk readCondition "[ \\( a = b \\) -a \\( c = d \\) ]"
prop_readCondition2  = isOk readCondition "[[ (a = b) || (c = d) ]]"
prop_readCondition3  = isOk readCondition "[[ $c = [[:alpha:].~-] ]]"
prop_readCondition4  = isOk readCondition "[[ $c =~ *foo* ]]"
prop_readCondition5  = isOk readCondition "[[ $c =~ f( ]] )* ]]"
prop_readCondition5a = isOk readCondition "[[ $c =~ a(b) ]]"
prop_readCondition5b = isOk readCondition "[[ $c =~ f( ($var ]]) )* ]]"
prop_readCondition6  = isOk readCondition "[[ $c =~ ^[yY]$ ]]"
prop_readCondition7  = isOk readCondition "[[ ${line} =~ ^[[:space:]]*# ]]"
prop_readCondition8  = isOk readCondition "[[ $l =~ ogg|flac ]]"
prop_readCondition9  = isOk readCondition "[ foo -a -f bar ]"
prop_readCondition10 = isOk readCondition "[[\na == b\n||\nc == d ]]"
890 891
prop_readCondition10a= isOk readCondition "[[\na == b  ||\nc == d ]]"
prop_readCondition10b= isOk readCondition "[[ a == b\n||\nc == d ]]"
V
Vidar Holen 已提交
892 893 894 895 896 897 898 899 900 901 902 903
prop_readCondition11 = isOk readCondition "[[ a == b ||\n c == d ]]"
prop_readCondition12 = isWarning readCondition "[ a == b \n -o c == d ]"
prop_readCondition13 = isOk readCondition "[[ foo =~ ^fo{1,3}$ ]]"
prop_readCondition14 = isOk readCondition "[ foo '>' bar ]"
prop_readCondition15 = isOk readCondition "[ foo \">=\" bar ]"
prop_readCondition16 = isOk readCondition "[ foo \\< bar ]"
prop_readCondition17 = isOk readCondition "[[ ${file::1} = [-.\\|/\\\\] ]]"
prop_readCondition18 = isOk readCondition "[ ]"
prop_readCondition19 = isOk readCondition "[ '(' x \")\" ]"
prop_readCondition20 = isOk readCondition "[[ echo_rc -eq 0 ]]"
prop_readCondition21 = isOk readCondition "[[ $1 =~ ^(a\\ b)$ ]]"
prop_readCondition22 = isOk readCondition "[[ $1 =~ \\.a\\.(\\.b\\.)\\.c\\. ]]"
V
Vidar Holen 已提交
904
prop_readCondition23 = isOk readCondition "[[ -v arr[$var] ]]"
905
readCondition = called "test expression" $ do
906
    opos <- getPosition
907
    start <- startSpan
R
Rodrigo Setti 已提交
908
    open <- try (string "[[") <|> string "["
909
    let single = open == "["
V
Vidar Holen 已提交
910
    let typ = if single then SingleBracket else DoubleBracket
911 912 913 914

    pos <- getPosition
    space <- allspacing
    when (null space) $
915
        parseProblemAtWithEnd opos pos ErrorC 1035 $ "You need a space after the " ++
916 917 918 919 920 921
            if single
                then "[ and before the ]."
                else "[[ and before the ]]."
    when (single && '\n' `elem` space) $
        parseProblemAt pos ErrorC 1080 "You need \\ before line feeds to break lines in [ ]."

V
Vidar Holen 已提交
922 923 924
    condition <- readConditionContents single <|> do
        guard . not . null $ space
        lookAhead $ string "]"
925
        id <- endSpan start
V
Vidar Holen 已提交
926
        return $ TC_Empty id typ
927 928

    cpos <- getPosition
929
    close <- try (string "]]") <|> string "]" <|> fail "Expected test to end here (don't wrap commands in []/[[]])"
930
    id <- endSpan start
V
Vidar Holen 已提交
931 932
    when (open == "[[" && close /= "]]") $ parseProblemAt cpos ErrorC 1033 "Did you mean ]] ?"
    when (open == "[" && close /= "]" ) $ parseProblemAt opos ErrorC 1034 "Did you mean [[ ?"
933 934
    spacing
    many readCmdWord -- Read and throw away remainders to get then/do warnings. Fixme?
V
Vidar Holen 已提交
935
    return $ T_Condition id typ condition
936

937 938 939 940 941 942 943
readAnnotationPrefix = do
    char '#'
    many linewhitespace
    string "shellcheck"

prop_readAnnotation1 = isOk readAnnotation "# shellcheck disable=1234,5678\n"
prop_readAnnotation2 = isOk readAnnotation "# shellcheck disable=SC1234 disable=SC5678\n"
944
prop_readAnnotation3 = isOk readAnnotation "# shellcheck disable=SC1234 source=/dev/null disable=SC5678\n"
945
prop_readAnnotation4 = isWarning readAnnotation "# shellcheck cats=dogs disable=SC1234\n"
946 947
prop_readAnnotation5 = isOk readAnnotation "# shellcheck disable=SC2002 # All cats are precious\n"
prop_readAnnotation6 = isOk readAnnotation "# shellcheck disable=SC1234 # shellcheck foo=bar\n"
948
readAnnotation = called "shellcheck directive" $ do
949 950
    try readAnnotationPrefix
    many1 linewhitespace
V
Vidar Holen 已提交
951
    values <- many1 readKey
952
    optional readAnyComment
V
Vidar Holen 已提交
953 954 955 956
    void linefeed <|> do
        parseNote ErrorC 1125 "Invalid key=value pair? Ignoring the rest of this directive starting here."
        many (noneOf "\n")
        void linefeed <|> eof
957 958 959
    many linewhitespace
    return $ concat values
  where
V
Vidar Holen 已提交
960 961 962
    readKey = do
        keyPos <- getPosition
        key <- many1 letter
963
        char '=' <|> fail "Expected '=' after directive key"
V
Vidar Holen 已提交
964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
        annotations <- case key of
            "disable" -> readCode `sepBy` char ','
              where
                readCode = do
                    optional $ string "SC"
                    int <- many1 digit
                    return $ DisableComment (read int)

            "source" -> do
                filename <- many1 $ noneOf " \n"
                return [SourceOverride filename]

            "shell" -> do
                pos <- getPosition
                shell <- many1 $ noneOf " \n"
                when (isNothing $ shellForExecutable shell) $
                    parseNoteAt pos ErrorC 1103
                        "This shell type is unknown. Use e.g. sh or bash."
                return [ShellOverride shell]

            _ -> do
                parseNoteAt keyPos WarningC 1107 "This directive is unknown. It will be ignored."
                anyChar `reluctantlyTill` whitespace
                return []
988

989
        many linewhitespace
V
Vidar Holen 已提交
990
        return annotations
991

992 993 994 995
readAnnotations = do
    annotations <- many (readAnnotation `thenSkip` allspacing)
    return $ concat annotations

V
Vidar Holen 已提交
996
readComment = do
997
    unexpecting "shellcheck annotation" readAnnotationPrefix
998 999 1000
    readAnyComment

readAnyComment = do
V
Vidar Holen 已提交
1001
    char '#'
1002
    many $ noneOf "\r\n"
V
Vidar Holen 已提交
1003

1004
prop_readNormalWord = isOk readNormalWord "'foo'\"bar\"{1..3}baz$(lol)"
1005
prop_readNormalWord2 = isOk readNormalWord "foo**(foo)!!!(@@(bar))"
1006
prop_readNormalWord3 = isOk readNormalWord "foo#"
V
Vidar Holen 已提交
1007
prop_readNormalWord4 = isOk readNormalWord "$\"foo\"$'foo\nbar'"
V
Vidar Holen 已提交
1008 1009
prop_readNormalWord5 = isWarning readNormalWord "${foo}}"
prop_readNormalWord6 = isOk readNormalWord "foo/{}"
1010 1011 1012
prop_readNormalWord7 = isOk readNormalWord "foo\\\nbar"
prop_readNormalWord8 = isWarning readSubshell "(foo\\ \nbar)"
prop_readNormalWord9 = isOk readSubshell "(foo\\ ;\nbar)"
1013 1014 1015
prop_readNormalWord10 = isWarning readNormalWord "\x201Chello\x201D"
prop_readNormalWord11 = isWarning readNormalWord "\x2018hello\x2019"
prop_readNormalWord12 = isWarning readNormalWord "hello\x2018"
1016 1017 1018
readNormalWord = readNormalishWord ""

readNormalishWord end = do
1019
    start <- startSpan
1020
    pos <- getPosition
1021
    x <- many1 (readNormalWordPart end)
1022
    id <- endSpan start
1023
    checkPossibleTermination pos x
1024
    return $ T_NormalWord id x
V
Vidar Holen 已提交
1025

1026
readIndexSpan = do
1027
    start <- startSpan
1028
    x <- many (readNormalWordPart "]" <|> someSpace <|> otherLiteral)
1029
    id <- endSpan start
1030 1031 1032
    return $ T_NormalWord id x
  where
    someSpace = do
1033
        start <- startSpan
1034
        str <- spacing1
1035
        id <- endSpan start
1036 1037
        return $ T_Literal id str
    otherLiteral = do
1038
        start <- startSpan
1039
        str <- many1 $ oneOf quotableChars
1040
        id <- endSpan start
1041 1042
        return $ T_Literal id str

V
Vidar Holen 已提交
1043
checkPossibleTermination pos [T_Literal _ x] =
R
Rodrigo Setti 已提交
1044 1045
    when (x `elem` ["do", "done", "then", "fi", "esac"]) $
        parseProblemAt pos WarningC 1010 $ "Use semicolon or linefeed before '" ++ x ++ "' (or quote to make it literal)."
1046
checkPossibleTermination _ _ = return ()
V
Vidar Holen 已提交
1047

1048
readNormalWordPart end = do
1049
    notFollowedBy2 $ oneOf end
1050
    checkForParenthesis
V
Vidar Holen 已提交
1051 1052 1053 1054 1055 1056
    choice [
        readSingleQuoted,
        readDoubleQuoted,
        readGlob,
        readNormalDollar,
        readBraced,
1057
        readUnquotedBackTicked,
V
Vidar Holen 已提交
1058
        readProcSub,
1059
        readUnicodeQuote,
V
Vidar Holen 已提交
1060 1061 1062
        readNormalLiteral end,
        readLiteralCurlyBraces
      ]
1063
  where
R
Rodrigo Setti 已提交
1064
    checkForParenthesis =
1065 1066 1067
        return () `attempting` do
            pos <- getPosition
            lookAhead $ char '('
V
Vidar Holen 已提交
1068
            parseProblemAt pos ErrorC 1036 "'(' is invalid here. Did you forget to escape it?"
1069

V
Vidar Holen 已提交
1070
    readLiteralCurlyBraces = do
1071
        start <- startSpan
V
Vidar Holen 已提交
1072
        str <- findParam <|> literalBraces
1073
        id <- endSpan start
V
Vidar Holen 已提交
1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
        return $ T_Literal id str

    findParam = try $ string "{}"
    literalBraces = do
        pos <- getPosition
        c <- oneOf "{}"
        parseProblemAt pos WarningC 1083 $
            "This " ++ [c] ++ " is literal. Check expression (missing ;/\\n?) or quote it."
        return [c]

1084

V
Vidar Holen 已提交
1085
readSpacePart = do
1086
    start <- startSpan
V
Vidar Holen 已提交
1087
    x <- many1 whitespace
1088
    id <- endSpan start
V
Vidar Holen 已提交
1089
    return $ T_Literal id x
V
Vidar Holen 已提交
1090

V
Vidar Holen 已提交
1091
readDollarBracedWord = do
1092
    start <- startSpan
V
Vidar Holen 已提交
1093
    list <- many readDollarBracedPart
1094
    id <- endSpan start
V
Vidar Holen 已提交
1095
    return $ T_NormalWord id list
1096

1097 1098 1099
readDollarBracedPart = readSingleQuoted <|> readDoubleQuoted <|>
                       readParamSubSpecialChar <|> readExtglob <|> readNormalDollar <|>
                       readUnquotedBackTicked <|> readDollarBracedLiteral
V
Vidar Holen 已提交
1100 1101

readDollarBracedLiteral = do
1102
    start <- startSpan
V
Vidar Holen 已提交
1103
    vars <- (readBraceEscaped <|> (anyChar >>= \x -> return [x])) `reluctantlyTill1` bracedQuotable
1104
    id <- endSpan start
V
Vidar Holen 已提交
1105 1106
    return $ T_Literal id $ concat vars

1107
readParamSubSpecialChar = do
1108 1109 1110 1111
    start <- startSpan
    x <- many1 paramSubSpecialChars
    id <- endSpan start
    return $ T_ParamSubSpecialChar id x
1112

1113 1114
prop_readProcSub1 = isOk readProcSub "<(echo test | wc -l)"
prop_readProcSub2 = isOk readProcSub "<(  if true; then true; fi )"
1115
prop_readProcSub3 = isOk readProcSub "<( # nothing here \n)"
1116
readProcSub = called "process substitution" $ do
1117
    start <- startSpan
1118
    dir <- try $ do
V
Vidar Holen 已提交
1119
                    x <- oneOf "<>"
1120 1121
                    char '('
                    return [x]
1122
    list <- readCompoundListOrEmpty
1123 1124
    allspacing
    char ')'
1125
    id <- endSpan start
1126 1127
    return $ T_ProcSub id dir list

1128 1129
prop_readSingleQuoted = isOk readSingleQuoted "'foo bar'"
prop_readSingleQuoted2 = isWarning readSingleQuoted "'foo bar\\'"
1130 1131 1132
prop_readSingleQuoted4 = isWarning readNormalWord "'it's"
prop_readSingleQuoted5 = isWarning readSimpleCommand "foo='bar\ncow 'arg"
prop_readSingleQuoted6 = isOk readSimpleCommand "foo='bar cow 'arg"
1133 1134
prop_readSingleQuoted7 = isOk readSingleQuoted "'foo\x201C\&bar'"
prop_readSingleQuoted8 = isWarning readSingleQuoted "'foo\x2018\&bar'"
1135
readSingleQuoted = called "single quoted string" $ do
1136
    start <- startSpan
1137
    startPos <- getPosition
V
Vidar Holen 已提交
1138
    singleQuote
1139
    s <- many readSingleQuotedPart
1140 1141
    let string = concat s
    endPos <- getPosition
1142
    singleQuote <|> fail "Expected end of single quoted string"
V
Vidar Holen 已提交
1143

1144 1145
    optional $ do
        c <- try . lookAhead $ suspectCharAfterQuotes <|> oneOf "'"
R
Rodrigo Setti 已提交
1146
        if not (null string) && isAlpha c && isAlpha (last string)
1147
          then
R
Rodrigo Setti 已提交
1148
            parseProblemAt endPos WarningC 1011
1149 1150 1151 1152 1153
                "This apostrophe terminated the single quoted string!"
          else
            when ('\n' `elem` string && not ("\n" `isPrefixOf` string)) $
                suggestForgotClosingQuote startPos endPos "single quoted string"

1154
    id <- endSpan start
1155
    return (T_SingleQuoted id string)
V
Vidar Holen 已提交
1156 1157 1158 1159 1160 1161 1162 1163 1164

readSingleQuotedLiteral = do
    singleQuote
    strs <- many1 readSingleQuotedPart
    singleQuote
    return $ concat strs

readSingleQuotedPart =
    readSingleEscaped
1165 1166 1167 1168 1169 1170 1171 1172 1173
    <|> many1 (noneOf $ "'\\" ++ unicodeSingleQuotes)
    <|> readUnicodeQuote
   where
    readUnicodeQuote = do
        pos <- getPosition
        x <- oneOf unicodeSingleQuotes
        parseProblemAt pos WarningC 1112
            "This is a unicode quote. Delete and retype it (or ignore/doublequote for literal)."
        return [x]
V
Vidar Holen 已提交
1174

1175 1176 1177 1178 1179

prop_readBackTicked = isOk (readBackTicked False) "`ls *.mp3`"
prop_readBackTicked2 = isOk (readBackTicked False) "`grep \"\\\"\"`"
prop_readBackTicked3 = isWarning (readBackTicked False) "´grep \"\\\"\"´"
prop_readBackTicked4 = isOk readSimpleCommand "`echo foo\necho bar`"
1180 1181
prop_readBackTicked5 = isOk readSimpleCommand "echo `foo`bar"
prop_readBackTicked6 = isWarning readSimpleCommand "echo `foo\necho `bar"
1182 1183
prop_readBackTicked7 = isOk readSimpleCommand "`#inline comment`"
prop_readBackTicked8 = isOk readSimpleCommand "echo `#comment` \\\nbar baz"
1184 1185 1186
readQuotedBackTicked = readBackTicked True
readUnquotedBackTicked = readBackTicked False
readBackTicked quoted = called "backtick expansion" $ do
1187
    start <- startSpan
1188
    startPos <- getPosition
1189
    backtick
1190
    subStart <- getPosition
1191
    subString <- readGenericLiteral "`´"
1192
    endPos <- getPosition
1193
    backtick
1194
    id <- endSpan start
1195 1196 1197

    optional $ do
        c <- try . lookAhead $ suspectCharAfterQuotes
R
Rodrigo Setti 已提交
1198
        when ('\n' `elem` subString && not ("\n" `isPrefixOf` subString)) $
1199 1200
            suggestForgotClosingQuote startPos endPos "backtick expansion"

1201
    -- Result positions may be off due to escapes
1202
    result <- subParse subStart subParser (unEscape subString)
1203 1204
    return $ T_Backticked id result
  where
1205
    unEscape [] = []
1206
    unEscape ('\\':'"':rest) | quoted = '"' : unEscape rest
1207
    unEscape ('\\':x:rest) | x `elem` "$`\\" = x : unEscape rest
1208 1209
    unEscape ('\\':'\n':rest) = unEscape rest
    unEscape (c:rest) = c : unEscape rest
1210 1211 1212 1213
    subParser = do
        cmds <- readCompoundListOrEmpty
        verifyEof
        return cmds
1214
    backtick =
1215
      void (char '`') <|> do
1216 1217
         pos <- getPosition
         char '´'
1218
         parseProblemAt pos ErrorC 1077
1219
            "For command expansion, the tick should slant left (` vs ´). Use $(..) instead."
1220

1221
-- Run a parser on a new input, such as for `..` or here documents.
1222 1223 1224 1225 1226 1227 1228 1229 1230
subParse pos parser input = do
    lastPosition <- getPosition
    lastInput <- getInput
    setPosition pos
    setInput input
    result <- parser
    setInput lastInput
    setPosition lastPosition
    return result
V
Vidar Holen 已提交
1231

1232 1233 1234 1235 1236 1237
-- Parse something, but forget all parseProblems
inSeparateContext = parseForgettingContext True
-- Parse something, but forget all parseProblems on failure
forgetOnFailure = parseForgettingContext False

parseForgettingContext alsoOnSuccess parser = do
1238 1239 1240 1241 1242
    context <- Ms.get
    success context <|> failure context
  where
    success c = do
        res <- try parser
1243
        when alsoOnSuccess $ Ms.put c
1244 1245 1246 1247 1248
        return res
    failure c = do
        Ms.put c
        fail ""

1249
prop_readDoubleQuoted = isOk readDoubleQuoted "\"Hello $FOO\""
V
Vidar Holen 已提交
1250
prop_readDoubleQuoted2 = isOk readDoubleQuoted "\"$'\""
1251
prop_readDoubleQuoted3 = isOk readDoubleQuoted "\"\x2018hello\x2019\""
1252 1253
prop_readDoubleQuoted4 = isWarning readSimpleCommand "\"foo\nbar\"foo"
prop_readDoubleQuoted5 = isOk readSimpleCommand "lol \"foo\nbar\" etc"
V
Vidar Holen 已提交
1254 1255
prop_readDoubleQuoted6 = isOk readSimpleCommand "echo \"${ ls; }\""
prop_readDoubleQuoted7 = isOk readSimpleCommand "echo \"${ ls;}bar\""
1256
prop_readDoubleQuoted8 = isWarning readDoubleQuoted "\"\x201Chello\x201D\""
1257
prop_readDoubleQuoted10 = isOk readDoubleQuoted "\"foo\\\\n\""
N
Ng Zhi An 已提交
1258
readDoubleQuoted = called "double quoted string" $ do
1259
    start <- startSpan
1260
    startPos <- getPosition
V
Vidar Holen 已提交
1261 1262
    doubleQuote
    x <- many doubleQuotedPart
1263
    endPos <- getPosition
1264
    doubleQuote <|> fail "Expected end of double quoted string"
1265
    id <- endSpan start
1266 1267 1268 1269
    optional $ do
        try . lookAhead $ suspectCharAfterQuotes <|> oneOf "$\""
        when (any hasLineFeed x && not (startsWithLineFeed x)) $
            suggestForgotClosingQuote startPos endPos "double quoted string"
N
Ng Zhi An 已提交
1270
    return $ T_DoubleQuoted id x
1271
  where
R
Rodrigo Setti 已提交
1272
    startsWithLineFeed (T_Literal _ ('\n':_):_) = True
1273 1274 1275 1276 1277 1278 1279
    startsWithLineFeed _ = False
    hasLineFeed (T_Literal _ str) | '\n' `elem` str = True
    hasLineFeed _ = False

suggestForgotClosingQuote startPos endPos name = do
    parseProblemAt startPos WarningC 1078 $
        "Did you forget to close this " ++ name ++ "?"
R
Rodrigo Setti 已提交
1280
    parseProblemAt endPos InfoC 1079
1281
        "This is actually an end quote, but due to next char it looks suspect."
V
Vidar Holen 已提交
1282

1283 1284 1285 1286
doubleQuotedPart = readDoubleLiteral <|> readDoubleQuotedDollar <|> readQuotedBackTicked <|> readUnicodeQuote
  where
    readUnicodeQuote = do
        pos <- getPosition
1287
        start <- startSpan
1288
        c <- oneOf unicodeDoubleQuotes
1289
        id <- endSpan start
1290 1291 1292
        parseProblemAt pos WarningC 1111
            "This is a unicode quote. Delete and retype it (or ignore/singlequote for literal)."
        return $ T_Literal id [c]
V
Vidar Holen 已提交
1293

1294
readDoubleLiteral = do
1295
    start <- startSpan
V
Vidar Holen 已提交
1296
    s <- many1 readDoubleLiteralPart
1297
    id <- endSpan start
1298
    return $ T_Literal id (concat s)
V
Vidar Holen 已提交
1299 1300

readDoubleLiteralPart = do
1301
    x <- many1 (readDoubleEscaped <|> many1 (noneOf (doubleQuotableChars ++ unicodeDoubleQuotes)))
V
Vidar Holen 已提交
1302 1303
    return $ concat x

1304
readNormalLiteral end = do
1305
    start <- startSpan
1306
    s <- many1 (readNormalLiteralPart end)
1307
    id <- endSpan start
1308
    return $ T_Literal id (concat s)
V
Vidar Holen 已提交
1309

1310 1311
prop_readGlob1 = isOk readGlob "*"
prop_readGlob2 = isOk readGlob "[^0-9]"
1312 1313 1314
prop_readGlob3 = isOk readGlob "[a[:alpha:]]"
prop_readGlob4 = isOk readGlob "[[:alnum:]]"
prop_readGlob5 = isOk readGlob "[^[:alpha:]1-9]"
1315 1316 1317
prop_readGlob6 = isOk readGlob "[\\|]"
prop_readGlob7 = isOk readGlob "[^[]"
prop_readGlob8 = isOk readGlob "[*?]"
1318 1319 1320
readGlob = readExtglob <|> readSimple <|> readClass <|> readGlobbyLiteral
    where
        readSimple = do
1321
            start <- startSpan
1322
            c <- oneOf "*?"
1323
            id <- endSpan start
1324 1325 1326
            return $ T_Glob id [c]
        -- Doesn't handle weird things like [^]a] and [$foo]. fixme?
        readClass = try $ do
1327
            start <- startSpan
1328
            char '['
1329
            s <- many1 (predefined <|> readNormalLiteralPart "]" <|> globchars)
1330
            char ']'
1331
            id <- endSpan start
R
Rodrigo Setti 已提交
1332
            return $ T_Glob id $ "[" ++ concat s ++ "]"
1333
          where
1334
           globchars = fmap return . oneOf $ "!$[" ++ extglobStartChars
1335 1336 1337 1338 1339
           predefined = do
              try $ string "[:"
              s <- many1 letter
              string ":]"
              return $ "[:" ++ s ++ ":]"
1340

1341
        readGlobbyLiteral = do
1342
            start <- startSpan
1343
            c <- extglobStart <|> char '['
1344
            id <- endSpan start
1345 1346
            return $ T_Literal id [c]

1347 1348 1349 1350 1351 1352 1353 1354 1355
readNormalLiteralPart customEnd =
    readNormalEscaped <|>
        many1 (noneOf (customEnd ++ standardEnd))
  where
    standardEnd = "[{}"
        ++ quotableChars
        ++ extglobStartChars
        ++ unicodeDoubleQuotes
        ++ unicodeSingleQuotes
V
Vidar Holen 已提交
1356

1357
readNormalEscaped = called "escaped char" $ do
V
Vidar Holen 已提交
1358
    pos <- getPosition
1359
    backslash
V
Vidar Holen 已提交
1360
    do
1361
        next <- quotable <|> oneOf "?*@!+[]{}.,~#"
1362
        when (next == ' ') $ checkTrailingSpaces pos <|> return ()
V
Vidar Holen 已提交
1363 1364 1365
        return $ if next == '\n' then "" else [next]
      <|>
        do
1366
            next <- anyChar
1367
            case escapedChar next of
R
Rodrigo Setti 已提交
1368
                Just name -> parseNoteAt pos WarningC 1012 $ "\\" ++ [next] ++ " is just literal '" ++ [next] ++ "' here. For " ++ name ++ ", use " ++ alternative next ++ " instead."
V
Vidar Holen 已提交
1369
                Nothing -> parseNoteAt pos InfoC 1001 $ "This \\" ++ [next] ++ " will be a regular '" ++ [next] ++ "' in this context."
V
Vidar Holen 已提交
1370
            return [next]
1371
  where
1372
    alternative 'n' = "a quoted, literal line feed"
1373
    alternative t = "\"$(printf '\\" ++ [t] ++ "')\""
1374 1375 1376 1377
    escapedChar 'n' = Just "line feed"
    escapedChar 't' = Just "tab"
    escapedChar 'r' = Just "carriage return"
    escapedChar _ = Nothing
V
Vidar Holen 已提交
1378

1379 1380 1381 1382 1383
    checkTrailingSpaces pos = lookAhead . try $ do
        many linewhitespace
        void linefeed <|> eof
        parseProblemAt pos ErrorC 1101 "Delete trailing spaces after \\ to break line (or use quotes for literal space)."

V
Vidar Holen 已提交
1384 1385 1386 1387 1388

prop_readExtglob1 = isOk readExtglob "!(*.mp3)"
prop_readExtglob2 = isOk readExtglob "!(*.mp3|*.wmv)"
prop_readExtglob4 = isOk readExtglob "+(foo \\) bar)"
prop_readExtglob5 = isOk readExtglob "+(!(foo *(bar)))"
1389 1390 1391
prop_readExtglob6 = isOk readExtglob "*(((||))|())"
prop_readExtglob7 = isOk readExtglob "*(<>)"
prop_readExtglob8 = isOk readExtglob "@(|*())"
1392
readExtglob = called "extglob" $ do
1393
    start <- startSpan
1394 1395 1396 1397
    c <- try $ do
            f <- extglobStart
            char '('
            return f
R
Rodrigo Setti 已提交
1398
    contents <- readExtglobPart `sepBy` char '|'
1399
    char ')'
1400
    id <- endSpan start
1401
    return $ T_Extglob id [c] contents
V
Vidar Holen 已提交
1402 1403

readExtglobPart = do
1404
    start <- startSpan
1405
    x <- many (readExtglobGroup <|> readNormalWordPart "" <|> readSpacePart <|> readExtglobLiteral)
1406
    id <- endSpan start
V
Vidar Holen 已提交
1407
    return $ T_NormalWord id x
1408 1409 1410
  where
    readExtglobGroup = do
        char '('
1411
        start <- startSpan
R
Rodrigo Setti 已提交
1412
        contents <- readExtglobPart `sepBy` char '|'
1413
        id <- endSpan start
1414 1415 1416
        char ')'
        return $ T_Extglob id "" contents
    readExtglobLiteral = do
1417
        start <- startSpan
1418
        str <- many1 (oneOf "<>#;&")
1419
        id <- endSpan start
V
Vidar Holen 已提交
1420
        return $ T_Literal id str
V
Vidar Holen 已提交
1421 1422


V
Vidar Holen 已提交
1423
readSingleEscaped = do
1424
    pos <- getPosition
V
Vidar Holen 已提交
1425
    s <- backslash
1426
    x <- lookAhead anyChar
V
Vidar Holen 已提交
1427

1428
    case x of
1429
        '\'' -> parseProblemAt pos InfoC 1003 "Want to escape a single quote? echo 'This is how it'\\''s done'.";
1430 1431
        '\n' -> parseProblemAt pos InfoC 1004 "This backslash+linefeed is literal. Break outside single quotes if you just want to break the line."
        _ -> return ()
V
Vidar Holen 已提交
1432

1433
    return [s]
V
Vidar Holen 已提交
1434 1435

readDoubleEscaped = do
1436
    pos <- getPosition
V
Vidar Holen 已提交
1437 1438
    bs <- backslash
    (linefeed >> return "")
1439
        <|> fmap return doubleQuotable
1440 1441
        <|> do
            c <- anyChar
1442 1443 1444
            -- This is an invalid escape sequence where the \ is literal.
            -- Previously this caused a SC1117, which may be re-enabled as
            -- as a pedantic warning.
1445
            return [bs, c]
V
Vidar Holen 已提交
1446

V
Vidar Holen 已提交
1447 1448 1449
readBraceEscaped = do
    bs <- backslash
    (linefeed >> return "")
1450 1451
        <|> fmap return bracedQuotable
        <|> fmap (\ x -> [bs, x]) anyChar
V
Vidar Holen 已提交
1452

V
Vidar Holen 已提交
1453

1454
readGenericLiteral endChars = do
R
Rodrigo Setti 已提交
1455
    strings <- many (readGenericEscaped <|> many1 (noneOf ('\\':endChars)))
V
Vidar Holen 已提交
1456 1457 1458
    return $ concat strings

readGenericLiteral1 endExp = do
1459
    strings <- (readGenericEscaped <|> (anyChar >>= \x -> return [x])) `reluctantlyTill1` endExp
V
Vidar Holen 已提交
1460 1461 1462 1463 1464
    return $ concat strings

readGenericEscaped = do
    backslash
    x <- anyChar
1465
    return $ if x == '\n' then [] else ['\\', x]
V
Vidar Holen 已提交
1466

1467 1468
prop_readBraced = isOk readBraced "{1..4}"
prop_readBraced2 = isOk readBraced "{foo,bar,\"baz lol\"}"
1469 1470 1471 1472
prop_readBraced3 = isOk readBraced "{1,\\},2}"
prop_readBraced4 = isOk readBraced "{1,{2,3}}"
prop_readBraced5 = isOk readBraced "{JP{,E}G,jp{,e}g}"
prop_readBraced6 = isOk readBraced "{foo,bar,$((${var}))}"
1473 1474
prop_readBraced7 = isNotOk readBraced "{}"
prop_readBraced8 = isNotOk readBraced "{foo}"
1475 1476 1477 1478 1479 1480
readBraced = try braceExpansion
  where
    braceExpansion =
        T_BraceExpansion `withParser` do
            char '{'
            elements <- bracedElement `sepBy1` char ','
1481 1482 1483 1484 1485
            guard $
                case elements of
                    (_:_:_) -> True
                    [t] -> ".." `isInfixOf` onlyLiteralString t
                    [] -> False
1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498
            char '}'
            return elements
    bracedElement =
        T_NormalWord `withParser` do
            many $ choice [
                braceExpansion,
                readDollarExpression,
                readSingleQuoted,
                readDoubleQuoted,
                braceLiteral
                ]
    braceLiteral =
        T_Literal `withParser` readGenericLiteral1 (oneOf "{}\"$'," <|> whitespace)
V
Vidar Holen 已提交
1499

V
Vidar Holen 已提交
1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512
ensureDollar =
    -- The grammar should have been designed along the lines of readDollarExpr = char '$' >> stuff, but
    -- instead, each subunit parses its own $. This results in ~7 1-3 char lookaheads instead of one 1-char.
    -- Instead of optimizing the grammar, here's a green cut that decreases shellcheck runtime by 10%:
    lookAhead $ char '$'

readNormalDollar = do
    ensureDollar
    readDollarExp <|> readDollarDoubleQuote <|> readDollarSingleQuote <|> readDollarLonely
readDoubleQuotedDollar = do
    ensureDollar
    readDollarExp <|> readDollarLonely

1513 1514 1515

prop_readDollarExpression1 = isOk readDollarExpression "$(((1) && 3))"
prop_readDollarExpression2 = isWarning readDollarExpression "$(((1)) && 3)"
1516 1517
prop_readDollarExpression3 = isWarning readDollarExpression "$((\"$@\" &); foo;)"
readDollarExpression :: Monad m => SCParser m Token
1518
readDollarExpression = do
V
Vidar Holen 已提交
1519 1520 1521 1522
    ensureDollar
    readDollarExp

readDollarExp = arithmetic <|> readDollarExpansion <|> readDollarBracket <|> readDollarBraceCommandExpansion <|> readDollarBraced <|> readDollarVariable
1523 1524
  where
    arithmetic = readAmbiguous "$((" readDollarArithmetic readDollarExpansion (\pos ->
V
Vidar Holen 已提交
1525
        parseNoteAt pos WarningC 1102 "Shells disambiguate $(( differently or not at all. For $(command substition), add space after $( . For $((arithmetics)), fix parsing errors.")
V
Vidar Holen 已提交
1526

1527
prop_readDollarSingleQuote = isOk readDollarSingleQuote "$'foo\\\'lol'"
1528
readDollarSingleQuote = called "$'..' expression" $ do
1529
    start <- startSpan
1530
    try $ string "$'"
1531
    str <- readGenericLiteral "'"
1532
    char '\''
1533
    id <- endSpan start
1534 1535 1536 1537 1538
    return $ T_DollarSingleQuoted id str

prop_readDollarDoubleQuote = isOk readDollarDoubleQuote "$\"hello\""
readDollarDoubleQuote = do
    lookAhead . try $ string "$\""
1539
    start <- startSpan
1540 1541 1542
    char '$'
    doubleQuote
    x <- many doubleQuotedPart
1543
    doubleQuote <|> fail "Expected end of translated double quoted string"
1544
    id <- endSpan start
1545
    return $ T_DollarDoubleQuoted id x
1546

1547 1548
prop_readDollarArithmetic = isOk readDollarArithmetic "$(( 3 * 4 +5))"
prop_readDollarArithmetic2 = isOk readDollarArithmetic "$(((3*4)+(1*2+(3-1))))"
1549
readDollarArithmetic = called "$((..)) expression" $ do
1550
    start <- startSpan
V
Vidar Holen 已提交
1551
    try (string "$((")
1552
    c <- readArithmeticContents
1553 1554 1555
    pos <- getPosition
    char ')'
    char ')' <|> fail "Expected a double )) to end the $((..))"
1556
    id <- endSpan start
1557
    return (T_DollarArithmetic id c)
V
Vidar Holen 已提交
1558

1559
readDollarBracket = called "$[..] expression" $ do
1560
    start <- startSpan
1561 1562 1563
    try (string "$[")
    c <- readArithmeticContents
    string "]"
1564
    id <- endSpan start
1565 1566
    return (T_DollarBracket id c)

V
Vidar Holen 已提交
1567
prop_readArithmeticExpression = isOk readArithmeticExpression "((a?b:c))"
1568
readArithmeticExpression = called "((..)) command" $ do
1569
    start <- startSpan
1570
    try (string "((")
1571
    c <- readArithmeticContents
1572
    string "))"
1573
    id <- endSpan start
1574
    return (T_Arithmetic id c)
1575

1576 1577 1578 1579 1580
-- If the next characters match prefix, try two different parsers and warn if the alternate parser had to be used
readAmbiguous :: Monad m => String -> SCParser m p -> SCParser m p -> (SourcePos -> SCParser m ()) -> SCParser m p
readAmbiguous prefix expected alternative warner = do
    pos <- getPosition
    try . lookAhead $ string prefix
1581 1582
    -- If the expected parser fails, try the alt.
    -- If the alt fails, run the expected one again for the errors.
V
Vidar Holen 已提交
1583
    try expected <|> try (withAlt pos) <|> expected
1584
  where
1585 1586 1587
    withAlt pos = do
        t <- forgetOnFailure alternative
        warner pos
1588 1589
        return t

V
Vidar Holen 已提交
1590 1591 1592
prop_readDollarBraceCommandExpansion1 = isOk readDollarBraceCommandExpansion "${ ls; }"
prop_readDollarBraceCommandExpansion2 = isOk readDollarBraceCommandExpansion "${\nls\n}"
readDollarBraceCommandExpansion = called "ksh ${ ..; } command expansion" $ do
1593
    start <- startSpan
V
Vidar Holen 已提交
1594 1595 1596 1597 1598 1599
    try $ do
        string "${"
        whitespace
    allspacing
    term <- readTerm
    char '}' <|> fail "Expected } to end the ksh ${ ..; } command expansion"
1600
    id <- endSpan start
V
Vidar Holen 已提交
1601 1602
    return $ T_DollarBraceCommandExpansion id term

V
Vidar Holen 已提交
1603 1604
prop_readDollarBraced1 = isOk readDollarBraced "${foo//bar/baz}"
prop_readDollarBraced2 = isOk readDollarBraced "${foo/'{cow}'}"
V
Vidar Holen 已提交
1605
prop_readDollarBraced3 = isOk readDollarBraced "${foo%%$(echo cow\\})}"
V
Vidar Holen 已提交
1606
prop_readDollarBraced4 = isOk readDollarBraced "${foo#\\}}"
1607
readDollarBraced = called "parameter expansion" $ do
1608
    start <- startSpan
V
Vidar Holen 已提交
1609
    try (string "${")
V
Vidar Holen 已提交
1610
    word <- readDollarBracedWord
1611
    char '}'
1612
    id <- endSpan start
V
Vidar Holen 已提交
1613
    return $ T_DollarBraced id word
1614

1615 1616 1617
prop_readDollarExpansion1= isOk readDollarExpansion "$(echo foo; ls\n)"
prop_readDollarExpansion2= isOk readDollarExpansion "$(  )"
prop_readDollarExpansion3= isOk readDollarExpansion "$( command \n#comment \n)"
1618
readDollarExpansion = called "command expansion" $ do
1619
    start <- startSpan
V
Vidar Holen 已提交
1620
    try (string "$(")
1621
    cmds <- readCompoundListOrEmpty
1622
    char ')' <|> fail "Expected end of $(..) expression"
1623
    id <- endSpan start
R
Rodrigo Setti 已提交
1624
    return $ T_DollarExpansion id cmds
V
Vidar Holen 已提交
1625

1626
prop_readDollarVariable = isOk readDollarVariable "$@"
1627 1628 1629
prop_readDollarVariable2 = isOk (readDollarVariable >> anyChar) "$?!"
prop_readDollarVariable3 = isWarning (readDollarVariable >> anyChar) "$10"
prop_readDollarVariable4 = isWarning (readDollarVariable >> string "[@]") "$arr[@]"
1630
prop_readDollarVariable5 = isWarning (readDollarVariable >> string "[f") "$arr[f"
1631

N
Ng Zhi An 已提交
1632
readDollarVariable :: Monad m => SCParser m Token
N
Ng Zhi An 已提交
1633
readDollarVariable = do
1634
    start <- startSpan
1635 1636
    pos <- getPosition

N
Ng Zhi An 已提交
1637
    let singleCharred p = do
1638
        value <- wrapString ((:[]) <$> p)
1639
        id <- endSpan start
N
Ng Zhi An 已提交
1640
        return $ (T_DollarBraced id value)
1641

N
Ng Zhi An 已提交
1642
    let positional = do
1643 1644 1645 1646
        value <- singleCharred digit
        return value `attempting` do
            lookAhead digit
            parseNoteAt pos ErrorC 1037 "Braces are required for positionals over 9, e.g. ${10}."
V
Vidar Holen 已提交
1647 1648 1649 1650

    let special = singleCharred specialVariable

    let regular = do
1651
        value <- wrapString readVariableName
1652
        id <- endSpan start
N
Ng Zhi An 已提交
1653
        return (T_DollarBraced id value) `attempting` do
1654 1655
            lookAhead $ char '['
            parseNoteAt pos ErrorC 1087 "Use braces when expanding arrays, e.g. ${array[idx]} (or ${var}[.. to quiet)."
V
Vidar Holen 已提交
1656

1657
    try $ char '$' >> (positional <|> special <|> regular)
V
Vidar Holen 已提交
1658

V
Vidar Holen 已提交
1659
  where
1660 1661 1662 1663 1664 1665 1666
    wrapString p = do
        start <- getPosition
        s <- p
        end <- getPosition
        id1 <- getNextIdBetween start end
        id2 <- getNextIdBetween start end
        return $ T_NormalWord id1 [T_Literal id2 s]
V
Vidar Holen 已提交
1667

V
Vidar Holen 已提交
1668 1669 1670 1671 1672
readVariableName = do
    f <- variableStart
    rest <- many variableChars
    return (f:rest)

1673
readDollarLonely = do
1674
    start <- startSpan
V
Vidar Holen 已提交
1675
    char '$'
1676
    id <- endSpan start
V
Vidar Holen 已提交
1677
    n <- lookAhead (anyChar <|> (eof >> return '_'))
1678 1679
    return $ T_Literal id "$"

V
Vidar Holen 已提交
1680
prop_readHereDoc = isOk readScript "cat << foo\nlol\ncow\nfoo"
1681
prop_readHereDoc2 = isNotOk readScript "cat <<- EOF\n  cow\n  EOF"
V
Vidar Holen 已提交
1682
prop_readHereDoc3 = isOk readScript "cat << foo\n$\"\nfoo"
1683
prop_readHereDoc4 = isNotOk readScript "cat << foo\n`\nfoo"
V
Vidar Holen 已提交
1684 1685 1686 1687 1688 1689 1690
prop_readHereDoc5 = isOk readScript "cat <<- !foo\nbar\n!foo"
prop_readHereDoc6 = isOk readScript "cat << foo\\ bar\ncow\nfoo bar"
prop_readHereDoc7 = isOk readScript "cat << foo\n\\$(f ())\nfoo"
prop_readHereDoc8 = isOk readScript "cat <<foo>>bar\netc\nfoo"
prop_readHereDoc9 = isOk readScript "if true; then cat << foo; fi\nbar\nfoo\n"
prop_readHereDoc10= isOk readScript "if true; then cat << foo << bar; fi\nfoo\nbar\n"
prop_readHereDoc11= isOk readScript "cat << foo $(\nfoo\n)lol\nfoo\n"
V
Vidar Holen 已提交
1691
prop_readHereDoc12= isOk readScript "cat << foo|cat\nbar\nfoo"
1692
prop_readHereDoc13= isOk readScript "cat <<'#!'\nHello World\n#!\necho Done"
V
Vidar Holen 已提交
1693
prop_readHereDoc14= isWarning readScript "cat << foo\nbar\nfoo \n"
1694
prop_readHereDoc15= isWarning readScript "cat <<foo\nbar\nfoo bar\nfoo"
V
Vidar Holen 已提交
1695
prop_readHereDoc16= isOk readScript "cat <<- ' foo'\nbar\n foo\n"
1696 1697 1698
prop_readHereDoc17= isWarning readScript "cat <<- ' foo'\nbar\n  foo\n foo\n"
prop_readHereDoc20= isWarning readScript "cat << foo\n  foo\n()\nfoo\n"
prop_readHereDoc21= isOk readScript "# shellcheck disable=SC1039\ncat << foo\n  foo\n()\nfoo\n"
1699
readHereDoc = called "here document" $ do
V
Vidar Holen 已提交
1700
    pos <- getPosition
V
Vidar Holen 已提交
1701
    try $ string "<<"
1702
    dashed <- (char '-' >> return Dashed) <|> return Undashed
V
Vidar Holen 已提交
1703 1704 1705 1706
    sp <- spacing
    optional $ do
        try . lookAhead $ char '('
        let message = "Shells are space sensitive. Use '< <(cmd)', not '<<" ++ sp ++ "(cmd)'."
V
Vidar Holen 已提交
1707
        parseProblemAt pos ErrorC 1038 message
1708
    start <- startSpan
V
Vidar Holen 已提交
1709
    (quoted, endToken) <- readToken
1710
    hid <- endSpan start
1711

V
Vidar Holen 已提交
1712 1713 1714
    -- add empty tokens for now, read the rest in readPendingHereDocs
    let doc = T_HereDoc hid dashed quoted endToken []
    addPendingHereDoc doc
1715
    return doc
1716
  where
V
Vidar Holen 已提交
1717 1718 1719 1720 1721 1722 1723
    quotes = "\"'\\"
    -- Fun fact: bash considers << foo"" quoted, but not << <("foo").
    -- Instead of replicating this, just read a token and strip quotes.
    readToken = do
        str <- readStringForParser readNormalWord
        return (if any (`elem` quotes) str then Quoted else Unquoted,
                filter (not . (`elem` quotes)) str)
1724

V
Vidar Holen 已提交
1725 1726 1727 1728 1729

readPendingHereDocs = do
    docs <- popPendingHereDocs
    mapM_ readDoc docs
  where
1730
    readDoc (HereDocPending (T_HereDoc id dashed quoted endToken _) ctx) =
1731 1732
      swapContext ctx $
      do
1733
        docStartPos <- getPosition
1734
        (terminated, wasWarned, lines) <- readDocLines dashed endToken
1735
        docEndPos <- getPosition
1736 1737 1738
        let hereData = unlines lines
        unless terminated $ do
            unless wasWarned $
1739
                debugHereDoc id endToken hereData
1740
            fail "Here document was not correctly terminated"
1741
        list <- parseHereData quoted (docStartPos, docEndPos) hereData
1742 1743 1744 1745 1746
        addToHereDocMap id list

    -- Read the lines making up the here doc. Returns (IsTerminated, Lines)
    readDocLines :: Monad m => Dashed -> String -> SCParser m (Bool, Bool, [String])
    readDocLines dashed endToken = do
V
Vidar Holen 已提交
1747
        pos <- getPosition
1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762
        str <- rawLine
        isEof <- option False (eof >> return True)
        (isEnd, wasWarned) <- subParse pos checkEnd str
        if
            | isEnd -> return (True, wasWarned, [])
            | isEof -> return (False, wasWarned, [str])
            | True -> do
                (ok, previousWarning, rest) <- readDocLines dashed endToken
                return (ok, wasWarned || previousWarning, str:rest)
      where
        -- Check if this is the actual end, or a plausible false end
        checkEnd = option (False, False) $ try $ do
            -- Match what's basically '^( *)token( *)(.*)$'
            leadingSpacePos <- getPosition
            leadingSpace <- linewhitespace `reluctantlyTill` string endToken
V
Vidar Holen 已提交
1763
            string endToken
1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791
            trailingSpacePos <- getPosition
            trailingSpace <- many linewhitespace
            trailerPos <- getPosition
            trailer <- many anyChar

            let leadingSpacesAreTabs = all (== '\t') leadingSpace
            let thereIsNoTrailer = null trailingSpace && null trailer
            let leaderIsOk = null leadingSpace
                    || dashed == Dashed && leadingSpacesAreTabs
            let trailerStart = if null trailer then '\0' else head trailer
            let hasTrailingSpace = not $ null trailingSpace
            let hasTrailer = not $ null trailer
            let ppt = parseProblemAt trailerPos ErrorC

            if leaderIsOk && thereIsNoTrailer
              then return (True, False)
              else do
                let foundCause = return (False, True)
                let skipLine = return (False, False)
                -- This may be intended as an end token. Debug why it isn't.
                if
                    | trailerStart == ')' -> do
                        ppt 1119 $ "Add a linefeed between end token and terminating ')'."
                        foundCause
                    | trailerStart == '#' -> do
                        ppt 1120 "No comments allowed after here-doc token. Comment the next line instead."
                        foundCause
                    | trailerStart `elem` ";>|&" -> do
V
Vidar Holen 已提交
1792
                        ppt 1121 "Add ;/& terminators (and other syntax) on the line with the <<, not here."
1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814
                        foundCause
                    | hasTrailingSpace && hasTrailer -> do
                        ppt 1122 "Nothing allowed after end token. To continue a command, put it on the line with the <<."
                        foundCause
                    | leaderIsOk && hasTrailingSpace && not hasTrailer -> do
                        parseProblemAt trailingSpacePos ErrorC 1118 "Delete whitespace after the here-doc end token."
                        -- Parse as if it's the actual end token. Will koala_man regret this once again?
                        return (True, True)
                    | not hasTrailingSpace && hasTrailer ->
                        -- The end token is just a prefix
                        skipLine
                    | hasTrailer ->
                        error "ShellCheck bug, please report (here doc trailer)."

                    -- The following cases assume no trailing text:
                    | dashed == Undashed && (not $ null leadingSpace) -> do
                        parseProblemAt leadingSpacePos ErrorC 1039 "Remove indentation before end token (or use <<- and indent with tabs)."
                        foundCause
                    | dashed == Dashed && not leadingSpacesAreTabs -> do
                        parseProblemAt leadingSpacePos ErrorC 1040 "When using <<-, you can only indent with tabs."
                        foundCause
                    | True -> skipLine
V
Vidar Holen 已提交
1815

V
Vidar Holen 已提交
1816 1817 1818
    rawLine = do
        c <- many $ noneOf "\n"
        void (char '\n') <|> eof
1819
        return c
V
Vidar Holen 已提交
1820

1821 1822
    parseHereData Quoted (start,end) hereData = do
        id <- getNextIdBetween start end
R
Rodrigo Setti 已提交
1823
        return [T_Literal id hereData]
1824

1825
    parseHereData Unquoted (startPos, _) hereData =
1826 1827
        subParse startPos readHereData hereData

1828
    readHereData = many $ doubleQuotedPart <|> readHereLiteral
1829 1830

    readHereLiteral = do
1831
        start <- startSpan
1832
        chars <- many1 $ noneOf "`$\\"
1833
        id <- endSpan start
1834 1835
        return $ T_Literal id chars

1836
    debugHereDoc tokenId endToken doc
R
Rodrigo Setti 已提交
1837 1838
        | endToken `isInfixOf` doc =
            let lookAt line = when (endToken `isInfixOf` line) $
1839
                      parseProblemAtId tokenId ErrorC 1042 ("Close matches include '" ++ line ++ "' (!= '" ++ endToken ++ "').")
R
Rodrigo Setti 已提交
1840
            in do
1841
                  parseProblemAtId tokenId ErrorC 1041 ("Found '" ++ endToken ++ "' further down, but not on a separate line.")
R
Rodrigo Setti 已提交
1842 1843
                  mapM_ lookAt (lines doc)
        | map toLower endToken `isInfixOf` map toLower doc =
1844
            parseProblemAtId tokenId ErrorC 1043 ("Found " ++ endToken ++ " further down, but with wrong casing.")
R
Rodrigo Setti 已提交
1845
        | otherwise =
1846
            parseProblemAtId tokenId ErrorC 1044 ("Couldn't find end token `" ++ endToken ++ "' in the here document.")
V
Vidar Holen 已提交
1847 1848 1849


readFilename = readNormalWord
1850 1851 1852
readIoFileOp = choice [g_DGREAT, g_LESSGREAT, g_GREATAND, g_LESSAND, g_CLOBBER, redirToken '<' T_Less, redirToken '>' T_Greater ]

readIoDuplicate = try $ do
1853
    start <- startSpan
1854
    op <- g_GREATAND <|> g_LESSAND
1855
    target <- readIoVariable <|> digitsAndOrDash
1856
    id <- endSpan start
1857
    return $ T_IoDuplicate id op target
1858 1859 1860 1861 1862 1863 1864
  where
    -- either digits with optional dash, or a required dash
    digitsAndOrDash = do
        str <- many digit
        dash <- (if null str then id else option "") $ string "-"
        return $ str ++ dash

1865 1866

prop_readIoFile = isOk readIoFile ">> \"$(date +%YYmmDD)\""
1867
readIoFile = called "redirection" $ do
1868
    start <- startSpan
V
Vidar Holen 已提交
1869 1870 1871
    op <- readIoFileOp
    spacing
    file <- readFilename
1872
    id <- endSpan start
1873
    return $ T_IoFile id op file
1874

1875 1876 1877 1878 1879 1880
readIoVariable = try $ do
    char '{'
    x <- readVariableName
    char '}'
    return $ "{" ++ x ++ "}"

1881 1882 1883
readIoSource = try $ do
    x <- string "&" <|> readIoVariable <|> many digit
    lookAhead $ void readIoFileOp <|> void (string "<<")
V
Vidar Holen 已提交
1884
    return x
1885

1886 1887 1888 1889 1890 1891
prop_readIoRedirect = isOk readIoRedirect "3>&2"
prop_readIoRedirect2 = isOk readIoRedirect "2> lol"
prop_readIoRedirect3 = isOk readIoRedirect "4>&-"
prop_readIoRedirect4 = isOk readIoRedirect "&> lol"
prop_readIoRedirect5 = isOk readIoRedirect "{foo}>&2"
prop_readIoRedirect6 = isOk readIoRedirect "{foo}<&-"
1892
prop_readIoRedirect7 = isOk readIoRedirect "{foo}>&1-"
1893
readIoRedirect = do
1894
    start <- startSpan
1895 1896
    n <- readIoSource
    redir <- readHereString <|> readHereDoc <|> readIoDuplicate <|> readIoFile
1897
    id <- endSpan start
1898
    skipAnnotationAndWarn
V
Vidar Holen 已提交
1899
    spacing
1900
    return $ T_FdRedirect id n redir
V
Vidar Holen 已提交
1901 1902 1903

readRedirectList = many1 readIoRedirect

1904
prop_readHereString = isOk readHereString "<<< \"Hello $world\""
1905
readHereString = called "here string" $ do
1906
    start <- startSpan
V
Vidar Holen 已提交
1907
    try $ string "<<<"
1908
    id <- endSpan start
V
Vidar Holen 已提交
1909 1910
    spacing
    word <- readNormalWord
1911
    return $ T_HereString id word
V
Vidar Holen 已提交
1912

V
Vidar Holen 已提交
1913
readNewlineList = many1 ((linefeed <|> carriageReturn) `thenSkip` spacing)
V
Vidar Holen 已提交
1914 1915
readLineBreak = optional readNewlineList

V
Vidar Holen 已提交
1916 1917
prop_readSeparator1 = isWarning readScript "a &; b"
prop_readSeparator2 = isOk readScript "a & b"
1918 1919
prop_readSeparator3 = isWarning readScript "a &amp; b"
prop_readSeparator4 = isWarning readScript "a &gt; file; b"
1920
prop_readSeparator5 = isWarning readScript "curl https://example.com/?foo=moo&bar=cow"
V
Vidar Holen 已提交
1921
readSeparatorOp = do
1922
    notFollowedBy2 (void g_AND_IF <|> void readCaseSeparator)
V
Vidar Holen 已提交
1923
    notFollowedBy2 (string "&>")
1924
    start <- getPosition
R
Rodrigo Setti 已提交
1925
    f <- try (do
1926
                    pos <- getPosition
V
Vidar Holen 已提交
1927
                    char '&'
1928 1929 1930 1931 1932 1933 1934 1935 1936 1937
                    optional $ choice [
                        do
                            s <- lookAhead . choice . map (try . string) $
                                ["amp;", "gt;", "lt;"]
                            parseProblemAt pos ErrorC 1109 "This is an unquoted HTML entity. Replace with corresponding character.",

                        do
                            try . lookAhead $ variableStart
                            parseProblemAt pos WarningC 1132 "This & terminates the command. Escape it or add space after & to silence."
                      ]
1938

V
Vidar Holen 已提交
1939 1940 1941
                    spacing
                    pos <- getPosition
                    char ';'
1942 1943
                    -- In case statements we might have foo & ;;
                    notFollowedBy2 $ char ';'
V
Vidar Holen 已提交
1944
                    parseProblemAt pos ErrorC 1045 "It's not 'foo &; bar', just 'foo & bar'."
V
Vidar Holen 已提交
1945 1946
                    return '&'
            ) <|> char ';' <|> char '&'
1947
    end <- getPosition
V
Vidar Holen 已提交
1948
    spacing
1949
    return (f, (start, end))
V
Vidar Holen 已提交
1950

1951
readSequentialSep = void (g_Semi >> readLineBreak) <|> void readNewlineList
V
Vidar Holen 已提交
1952 1953 1954 1955 1956 1957 1958
readSeparator =
    do
        separator <- readSeparatorOp
        readLineBreak
        return separator
     <|>
        do
1959
            start <- getPosition
V
Vidar Holen 已提交
1960
            readNewlineList
1961 1962
            end <- getPosition
            return ('\n', (start, end))
1963

1964
prop_readSimpleCommand = isOk readSimpleCommand "echo test > file"
V
Vidar Holen 已提交
1965
prop_readSimpleCommand2 = isOk readSimpleCommand "cmd &> file"
1966 1967
prop_readSimpleCommand3 = isOk readSimpleCommand "export foo=(bar baz)"
prop_readSimpleCommand4 = isOk readSimpleCommand "typeset -a foo=(lol)"
1968 1969
prop_readSimpleCommand5 = isOk readSimpleCommand "time if true; then echo foo; fi"
prop_readSimpleCommand6 = isOk readSimpleCommand "time -p ( ls -l; )"
1970
prop_readSimpleCommand7 = isOk readSimpleCommand "\\ls"
V
Vidar Holen 已提交
1971 1972 1973 1974
prop_readSimpleCommand8 = isWarning readSimpleCommand "// Lol"
prop_readSimpleCommand9 = isWarning readSimpleCommand "/* Lolbert */"
prop_readSimpleCommand10 = isWarning readSimpleCommand "/**** Lolbert */"
prop_readSimpleCommand11 = isOk readSimpleCommand "/\\* foo"
1975 1976 1977
prop_readSimpleCommand12 = isWarning readSimpleCommand "elsif foo"
prop_readSimpleCommand13 = isWarning readSimpleCommand "ElseIf foo"
prop_readSimpleCommand14 = isWarning readSimpleCommand "elseif[$i==2]"
1978
readSimpleCommand = called "simple command" $ do
V
Vidar Holen 已提交
1979
    prefix <- option [] readCmdPrefix
1980
    skipAnnotationAndWarn
1981
    cmd <- option Nothing $ Just <$> readCmdName
1982
    when (null prefix && isNothing cmd) $ fail "Expected a command"
V
Vidar Holen 已提交
1983

1984
    case cmd of
1985 1986 1987 1988 1989
      Nothing -> do
        id1 <- getNextIdSpanningTokenList prefix
        id2 <- getNewIdFor id1
        return $ makeSimpleCommand id1 id2 prefix [] []

1990
      Just cmd -> do
1991
            validateCommand cmd
1992 1993 1994
            suffix <- option [] $ getParser readCmdSuffix cmd [
                        (["declare", "export", "local", "readonly", "typeset"], readModifierSuffix),
                        (["time"], readTimeSuffix),
1995 1996
                        (["let"], readLetSuffix),
                        (["eval"], readEvalSuffix)
1997
                    ]
1998

1999 2000 2001
            id1 <- getNextIdSpanningTokenList (prefix ++ (cmd:suffix))
            id2 <- getNewIdFor id1

2002 2003
            let result = makeSimpleCommand id1 id2 prefix [cmd] suffix
            if isCommand ["source", "."] cmd
2004
                then readSource result
2005
                else return result
2006
  where
2007 2008 2009 2010 2011 2012 2013
    isCommand strings (T_NormalWord _ [T_Literal _ s]) = s `elem` strings
    isCommand _ _ = False
    getParser def cmd [] = def
    getParser def cmd ((list, action):rest) =
        if isCommand list cmd
        then action
        else getParser def cmd rest
V
Vidar Holen 已提交
2014

V
Vidar Holen 已提交
2015 2016 2017 2018
    cStyleComment cmd =
        case cmd of
            _ -> False

2019
    validateCommand cmd =
2020
        case cmd of
2021 2022
            (T_NormalWord _ [T_Literal _ "//"]) -> commentWarning (getId cmd)
            (T_NormalWord _ (T_Literal _ "/" : T_Glob _ "*" :_)) -> commentWarning (getId cmd)
2023
            (T_NormalWord _ (T_Literal _ str:_)) -> do
2024 2025 2026
                let cmdString = map toLower $ takeWhile isAlpha str
                when (cmdString `elem` ["elsif", "elseif"]) $
                    parseProblemAtId (getId cmd) ErrorC 1131 "Use 'elif' to start another branch."
2027 2028
            _ -> return ()

2029 2030 2031 2032 2033 2034 2035 2036
    commentWarning id =
        parseProblemAtId id ErrorC 1127 "Was this intended as a comment? Use # in sh."

    makeSimpleCommand id1 id2 prefix cmd suffix =
        let
            (preAssigned, preRest) = partition assignment prefix
            (preRedirected, preRest2) = partition redirection preRest
            (postRedirected, postRest) = partition redirection suffix
2037

2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051
            redirs = preRedirected ++ postRedirected
            assigns = preAssigned
            args = cmd ++ preRest2 ++ postRest
        in
            T_Redirecting id1 redirs $ T_SimpleCommand id2 assigns args
      where
        assignment (T_Assignment {}) = True
        assignment _ = False
        redirection (T_FdRedirect {}) = True
        redirection _ = False


readSource :: Monad m => Token -> SCParser m Token
readSource t@(T_Redirecting _ _ (T_SimpleCommand cmdId _ (cmd:file:_))) = do
2052
    override <- getSourceOverride
2053 2054 2055 2056 2057
    let literalFile = do
        name <- override `mplus` getLiteralString file
        -- Hack to avoid 'source ~/foo' trying to read from literal tilde
        guard . not $ "~/" `isPrefixOf` name
        return name
2058 2059
    case literalFile of
        Nothing -> do
2060
            parseNoteAtId (getId file) WarningC 1090
2061
                "Can't follow non-constant source. Use a directive to specify location."
2062 2063 2064 2065 2066
            return t
        Just filename -> do
            proceed <- shouldFollow filename
            if not proceed
              then do
2067
                parseNoteAtId (getId file) InfoC 1093
2068 2069 2070
                    "This file appears to be recursively sourced. Ignoring."
                return t
              else do
2071
                sys <- Mr.asks systemInterface
2072 2073 2074 2075
                input <-
                    if filename == "/dev/null" -- always allow /dev/null
                    then return (Right "")
                    else system $ siReadFile sys filename
2076 2077
                case input of
                    Left err -> do
2078
                        parseNoteAtId (getId file) InfoC 1091 $
2079 2080 2081
                            "Not following: " ++ err
                        return t
                    Right script -> do
2082 2083
                        id1 <- getNewIdFor cmdId
                        id2 <- getNewIdFor cmdId
2084 2085 2086

                        let included = do
                            src <- subRead filename script
2087
                            return $ T_SourceCommand id1 t (T_Include id2 src)
2088 2089

                        let failed = do
2090
                            parseNoteAtId (getId file) WarningC 1094
2091 2092 2093 2094 2095 2096 2097 2098 2099
                                "Parsing of sourced file failed. Ignoring it."
                            return t

                        included <|> failed
  where
    subRead name script =
        withContext (ContextSource name) $
            inSeparateContext $
                subParse (initialPos name) readScript script
2100
readSource t = return t
2101 2102


2103
prop_readPipeline = isOk readPipeline "! cat /etc/issue | grep -i ubuntu"
2104
prop_readPipeline2 = isWarning readPipeline "!cat /etc/issue | grep -i ubuntu"
2105
prop_readPipeline3 = isOk readPipeline "for f; do :; done|cat"
2106
readPipeline = do
2107
    unexpecting "keyword/token" readKeyword
V
Vidar Holen 已提交
2108
    do
2109
        (T_Bang id) <- g_Bang
V
Vidar Holen 已提交
2110
        pipe <- readPipeSequence
2111
        return $ T_Banged id pipe
R
Rodrigo Setti 已提交
2112
      <|>
V
Vidar Holen 已提交
2113 2114
        readPipeSequence

2115
prop_readAndOr = isOk readAndOr "grep -i lol foo || exit 1"
2116 2117 2118
prop_readAndOr1 = isOk readAndOr "# shellcheck disable=1\nfoo"
prop_readAndOr2 = isOk readAndOr "# shellcheck disable=1\n# lol\n# shellcheck disable=3\nfoo"
readAndOr = do
2119
    start <- startSpan
2120
    apos <- getPosition
2121
    annotations <- readAnnotations
2122
    aid <- endSpan start
2123

2124 2125 2126 2127
    unless (null annotations) $ optional $ do
        try . lookAhead $ readKeyword
        parseProblemAt apos ErrorC 1123 "ShellCheck directives are only valid in front of complete compound commands, like 'if', not e.g. individual 'elif' branches."

R
Rodrigo Setti 已提交
2128
    andOr <- withAnnotations annotations $
2129 2130 2131 2132 2133 2134 2135 2136 2137
        chainr1 readPipeline $ do
            op <- g_AND_IF <|> g_OR_IF
            readLineBreak
            return $ case op of T_AND_IF id -> T_AndIf id
                                T_OR_IF  id -> T_OrIf id

    return $ if null annotations
                then andOr
                else T_Annotation aid annotations andOr
V
Vidar Holen 已提交
2138

2139 2140 2141 2142 2143 2144
readTermOrNone = do
    allspacing
    readTerm <|> do
        eof
        return []

V
Vidar Holen 已提交
2145
prop_readTerm = isOk readTerm "time ( foo; bar; )"
V
Vidar Holen 已提交
2146
readTerm = do
V
Vidar Holen 已提交
2147
    allspacing
V
Vidar Holen 已提交
2148 2149
    m <- readAndOr
    readTerm' m
2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164
  where
    readTerm' current =
        do
            (sep, (start, end)) <- readSeparator
            id <- getNextIdBetween start end
            more <- option (T_EOF id) readAndOr
            case more of (T_EOF _) -> return [transformWithSeparator id sep current]
                         _         -> do
                                    list <- readTerm' more
                                    return (transformWithSeparator id sep current : list)
          <|>
            return [current]
      where
        transformWithSeparator i '&' = T_Backgrounded i
        transformWithSeparator i _  = id
V
Vidar Holen 已提交
2165 2166 2167


readPipeSequence = do
2168
    start <- startSpan
V
Vidar Holen 已提交
2169 2170
    (cmds, pipes) <- sepBy1WithSeparators readCommand
                        (readPipe `thenSkip` (spacing >> readLineBreak))
2171
    id <- endSpan start
V
Vidar Holen 已提交
2172
    spacing
V
Vidar Holen 已提交
2173 2174 2175 2176 2177 2178 2179 2180
    return $ T_Pipeline id pipes cmds
  where
    sepBy1WithSeparators p s = do
        let elems = p >>= \x -> return ([x], [])
        let seps = do
            separator <- s
            return $ \(a,b) (c,d) -> (a++c, b ++ d ++ [separator])
        elems `chainl1` seps
V
Vidar Holen 已提交
2181 2182

readPipe = do
2183
    notFollowedBy2 g_OR_IF
2184
    start <- startSpan
V
Vidar Holen 已提交
2185 2186
    char '|'
    qualifier <- string "&" <|> return ""
2187
    id <- endSpan start
V
Vidar Holen 已提交
2188 2189
    spacing
    return $ T_Pipe id ('|':qualifier)
V
Vidar Holen 已提交
2190

V
Vidar Holen 已提交
2191 2192 2193 2194 2195
readCommand = choice [
    readCompoundCommand,
    readCoProc,
    readSimpleCommand
    ]
V
Vidar Holen 已提交
2196

2197 2198 2199 2200 2201 2202
readCmdName = do
    -- Ignore alias suppression
    optional . try $ do
        char '\\'
        lookAhead $ variableChars
    readCmdWord
2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214

readCmdWord = do
    skipAnnotationAndWarn
    readNormalWord <* spacing

-- Due to poor planning, annotations after commands isn't handled well.
-- At the time this function is used, it's usually too late to skip
-- comments, so you end up with a parse failure instead.
skipAnnotationAndWarn = optional $ do
        try . lookAhead $ readAnnotationPrefix
        parseProblem ErrorC 1126 "Place shellcheck directives before commands, not after."
        readAnyComment
V
Vidar Holen 已提交
2215

2216
prop_readIfClause = isOk readIfClause "if false; then foo; elif true; then stuff; more stuff; else cows; fi"
V
Vidar Holen 已提交
2217
prop_readIfClause2 = isWarning readIfClause "if false; then; echo oo; fi"
V
Vidar Holen 已提交
2218
prop_readIfClause3 = isWarning readIfClause "if false; then true; else; echo lol; fi"
2219
prop_readIfClause4 = isWarning readIfClause "if false; then true; else if true; then echo lol; fi; fi"
V
Vidar Holen 已提交
2220
prop_readIfClause5 = isOk readIfClause "if false; then true; else\nif true; then echo lol; fi; fi"
2221
readIfClause = called "if expression" $ do
2222
    start <- startSpan
2223
    pos <- getPosition
V
Vidar Holen 已提交
2224 2225 2226
    (condition, action) <- readIfPart
    elifs <- many readElifPart
    elses <- option [] readElsePart
V
Vidar Holen 已提交
2227 2228

    g_Fi `orFail` do
V
Vidar Holen 已提交
2229 2230
        parseProblemAt pos ErrorC 1046 "Couldn't find 'fi' for this 'if'."
        parseProblem ErrorC 1047 "Expected 'fi' matching previously mentioned 'if'."
2231
        return "Expected 'fi'"
2232
    id <- endSpan start
V
Vidar Holen 已提交
2233

2234
    return $ T_IfExpression id ((condition, action):elifs) elses
V
Vidar Holen 已提交
2235

2236

2237
verifyNotEmptyIf s =
2238 2239 2240
    optional (do
                emptyPos <- getPosition
                try . lookAhead $ (g_Fi <|> g_Elif <|> g_Else)
V
Vidar Holen 已提交
2241
                parseProblemAt emptyPos ErrorC 1048 $ "Can't have empty " ++ s ++ " clauses (use 'true' as a no-op).")
V
Vidar Holen 已提交
2242
readIfPart = do
2243
    pos <- getPosition
V
Vidar Holen 已提交
2244 2245 2246
    g_If
    allspacing
    condition <- readTerm
2247

2248
    ifNextToken (g_Fi <|> g_Elif <|> g_Else) $
2249
        parseProblemAt pos ErrorC 1049 "Did you forget the 'then' for this 'if'?"
2250

2251
    called "then clause" $ do
V
Vidar Holen 已提交
2252 2253
        g_Then `orFail` do
            parseProblem ErrorC 1050 "Expected 'then'."
2254
            return "Expected 'then'"
2255

2256
        acceptButWarn g_Semi ErrorC 1051 "Semicolons directly after 'then' are not allowed. Just remove it."
2257
        allspacing
2258
        verifyNotEmptyIf "then"
2259

2260 2261
        action <- readTerm
        return (condition, action)
V
Vidar Holen 已提交
2262

2263
readElifPart = called "elif clause" $ do
2264
    pos <- getPosition
2265
    g_Elif
V
Vidar Holen 已提交
2266 2267
    allspacing
    condition <- readTerm
2268
    ifNextToken (g_Fi <|> g_Elif <|> g_Else) $
2269 2270
        parseProblemAt pos ErrorC 1049 "Did you forget the 'then' for this 'elif'?"

2271
    g_Then
2272
    acceptButWarn g_Semi ErrorC 1052 "Semicolons directly after 'then' are not allowed. Just remove it."
V
Vidar Holen 已提交
2273
    allspacing
2274
    verifyNotEmptyIf "then"
V
Vidar Holen 已提交
2275 2276 2277
    action <- readTerm
    return (condition, action)

2278
readElsePart = called "else clause" $ do
V
Vidar Holen 已提交
2279
    pos <- getPosition
V
Vidar Holen 已提交
2280
    g_Else
2281 2282 2283 2284
    optional $ do
        try . lookAhead $ g_If
        parseProblemAt pos ErrorC 1075 "Use 'elif' instead of 'else if' (or put 'if' on new line if nesting)."

2285
    acceptButWarn g_Semi ErrorC 1053 "Semicolons directly after 'else' are not allowed. Just remove it."
V
Vidar Holen 已提交
2286
    allspacing
2287
    verifyNotEmptyIf "else"
V
Vidar Holen 已提交
2288 2289
    readTerm

2290 2291 2292 2293 2294
ifNextToken parser action =
    optional $ do
        try . lookAhead $ parser
        action

2295
prop_readSubshell = isOk readSubshell "( cd /foo; tar cf stuff.tar * )"
2296
readSubshell = called "explicit subshell" $ do
2297
    start <- startSpan
V
Vidar Holen 已提交
2298 2299 2300 2301
    char '('
    allspacing
    list <- readCompoundList
    allspacing
2302
    char ')' <|> fail ") closing the subshell"
2303
    id <- endSpan start
2304
    return $ T_Subshell id list
V
Vidar Holen 已提交
2305

2306
prop_readBraceGroup = isOk readBraceGroup "{ a; b | c | d; e; }"
2307
prop_readBraceGroup2 = isWarning readBraceGroup "{foo;}"
2308
prop_readBraceGroup3 = isOk readBraceGroup "{(foo)}"
2309
readBraceGroup = called "brace group" $ do
2310
    start <- startSpan
V
Vidar Holen 已提交
2311
    char '{'
2312 2313 2314
    void allspacingOrFail <|> optional (do
        lookAhead $ noneOf "(" -- {( is legal
        parseProblem ErrorC 1054 "You need a space after the '{'.")
2315 2316 2317
    optional $ do
        pos <- getPosition
        lookAhead $ char '}'
V
Vidar Holen 已提交
2318
        parseProblemAt pos ErrorC 1055 "You need at least one command here. Use 'true;' as a no-op."
V
Vidar Holen 已提交
2319
    list <- readTerm
2320
    char '}' <|> do
V
Vidar Holen 已提交
2321
        parseProblem ErrorC 1056 "Expected a '}'. If you have one, try a ; or \\n in front of it."
V
Vidar Holen 已提交
2322
        fail "Missing '}'"
2323
    id <- endSpan start
2324
    return $ T_BraceGroup id list
V
Vidar Holen 已提交
2325

2326
prop_readWhileClause = isOk readWhileClause "while [[ -e foo ]]; do sleep 1; done"
2327
readWhileClause = called "while loop" $ do
2328 2329
    start <- startSpan
    kwId <- getId <$> g_While
V
Vidar Holen 已提交
2330
    condition <- readTerm
2331 2332
    statements <- readDoGroup kwId
    id <- endSpan start
2333
    return $ T_WhileExpression id condition statements
V
Vidar Holen 已提交
2334

2335
prop_readUntilClause = isOk readUntilClause "until kill -0 $PID; do sleep 1; done"
2336
readUntilClause = called "until loop" $ do
2337 2338
    start <- startSpan
    kwId <- getId <$> g_Until
V
Vidar Holen 已提交
2339
    condition <- readTerm
2340 2341
    statements <- readDoGroup kwId
    id <- endSpan start
2342
    return $ T_UntilExpression id condition statements
V
Vidar Holen 已提交
2343

2344
readDoGroup kwId = do
V
Vidar Holen 已提交
2345
    optional (do
2346
                try . lookAhead $ g_Done
2347
                parseProblemAtId kwId ErrorC 1057 "Did you forget the 'do' for this loop?")
2348

2349
    doKw <- g_Do `orFail` do
V
Vidar Holen 已提交
2350
        parseProblem ErrorC 1058 "Expected 'do'."
2351
        return "Expected 'do'"
V
Vidar Holen 已提交
2352

V
Vidar Holen 已提交
2353
    acceptButWarn g_Semi ErrorC 1059 "No semicolons directly after 'do'."
V
Vidar Holen 已提交
2354
    allspacing
V
Vidar Holen 已提交
2355

V
Vidar Holen 已提交
2356 2357
    optional (do
                try . lookAhead $ g_Done
2358
                parseProblemAtId (getId doKw) ErrorC 1060 "Can't have empty do clauses (use 'true' as a no-op).")
V
Vidar Holen 已提交
2359

V
Vidar Holen 已提交
2360 2361
    commands <- readCompoundList
    g_Done `orFail` do
2362
            parseProblemAtId (getId doKw) ErrorC 1061 "Couldn't find 'done' for this 'do'."
V
Vidar Holen 已提交
2363
            parseProblem ErrorC 1062 "Expected 'done' matching previously mentioned 'do'."
2364
            return "Expected 'done'"
V
Vidar Holen 已提交
2365
    return commands
2366

2367

2368
prop_readForClause = isOk readForClause "for f in *; do rm \"$f\"; done"
V
Vidar Holen 已提交
2369
prop_readForClause3 = isOk readForClause "for f; do foo; done"
V
Vidar Holen 已提交
2370 2371 2372
prop_readForClause4 = isOk readForClause "for((i=0; i<10; i++)); do echo $i; done"
prop_readForClause5 = isOk readForClause "for ((i=0;i<10 && n>x;i++,--n))\ndo \necho $i\ndone"
prop_readForClause6 = isOk readForClause "for ((;;))\ndo echo $i\ndone"
2373
prop_readForClause7 = isOk readForClause "for ((;;)) do echo $i\ndone"
2374
prop_readForClause8 = isOk readForClause "for ((;;)) ; do echo $i\ndone"
2375
prop_readForClause9 = isOk readForClause "for i do true; done"
V
Vidar Holen 已提交
2376
prop_readForClause10= isOk readForClause "for ((;;)) { true; }"
V
Vidar Holen 已提交
2377
prop_readForClause12= isWarning readForClause "for $a in *; do echo \"$a\"; done"
2378
prop_readForClause13= isOk readForClause "for foo\nin\\\n  bar\\\n  baz\ndo true; done"
2379
readForClause = called "for loop" $ do
2380
    pos <- getPosition
2381
    (T_For id) <- g_For
V
Vidar Holen 已提交
2382
    spacing
2383
    readArithmetic id <|> readRegular id
V
Vidar Holen 已提交
2384
  where
2385
    readArithmetic id = called "arithmetic for condition" $ do
V
Vidar Holen 已提交
2386 2387 2388 2389 2390 2391 2392 2393
        try $ string "(("
        x <- readArithmeticContents
        char ';' >> spacing
        y <- readArithmeticContents
        char ';' >> spacing
        z <- readArithmeticContents
        spacing
        string "))"
2394 2395
        spacing
        optional $ readSequentialSep >> spacing
2396
        group <- readBraced <|> readDoGroup id
V
Vidar Holen 已提交
2397
        return $ T_ForArithmetic id x y z group
V
Vidar Holen 已提交
2398

V
Vidar Holen 已提交
2399 2400 2401 2402
    readBraced = do
        (T_BraceGroup _ list) <- readBraceGroup
        return list

2403
    readRegular id = do
V
Vidar Holen 已提交
2404 2405
        acceptButWarn (char '$') ErrorC 1086
            "Don't use $ on the iterator name in for loops."
2406
        name <- readVariableName `thenSkip` allspacing
V
Vidar Holen 已提交
2407
        values <- readInClause <|> (optional readSequentialSep >> return [])
2408
        group <- readDoGroup id
V
Vidar Holen 已提交
2409
        return $ T_ForIn id name values group
V
Vidar Holen 已提交
2410

2411 2412 2413 2414 2415 2416
prop_readSelectClause1 = isOk readSelectClause "select foo in *; do echo $foo; done"
prop_readSelectClause2 = isOk readSelectClause "select foo; do echo $foo; done"
readSelectClause = called "select loop" $ do
    (T_Select id) <- g_Select
    spacing
    typ <- readRegular
2417
    group <- readDoGroup id
2418 2419 2420 2421 2422 2423 2424 2425
    typ id group
  where
    readRegular = do
        name <- readVariableName
        spacing
        values <- readInClause <|> (readSequentialSep >> return [])
        return $ \id group -> (return $ T_SelectIn id name values group)

V
Vidar Holen 已提交
2426 2427
readInClause = do
    g_In
R
Rodrigo Setti 已提交
2428
    things <- readCmdWord `reluctantlyTill`
2429
                (void g_Semi <|> void linefeed <|> void g_Do)
V
Vidar Holen 已提交
2430 2431

    do {
R
Rodrigo Setti 已提交
2432
        lookAhead g_Do;
V
Vidar Holen 已提交
2433
        parseNote ErrorC 1063 "You need a line feed or semicolon before the 'do'.";
V
Vidar Holen 已提交
2434
    } <|> do {
R
Rodrigo Setti 已提交
2435
        optional g_Semi;
2436
        void allspacing;
V
Vidar Holen 已提交
2437 2438 2439 2440
    }

    return things

2441
prop_readCaseClause = isOk readCaseClause "case foo in a ) lol; cow;; b|d) fooo; esac"
2442
prop_readCaseClause2 = isOk readCaseClause "case foo\n in * ) echo bar;; esac"
2443
prop_readCaseClause3 = isOk readCaseClause "case foo\n in * ) echo bar & ;; esac"
2444 2445
prop_readCaseClause4 = isOk readCaseClause "case foo\n in *) echo bar ;& bar) foo; esac"
prop_readCaseClause5 = isOk readCaseClause "case foo\n in *) echo bar;;& foo) baz;; esac"
2446
readCaseClause = called "case expression" $ do
2447
    start <- startSpan
V
Vidar Holen 已提交
2448 2449
    g_Case
    word <- readNormalWord
2450
    allspacing
2451
    g_In <|> fail "Expected 'in'"
V
Vidar Holen 已提交
2452 2453
    readLineBreak
    list <- readCaseList
2454
    g_Esac <|> fail "Expected 'esac' to close the case statement"
2455
    id <- endSpan start
2456
    return $ T_CaseExpression id word list
V
Vidar Holen 已提交
2457 2458 2459

readCaseList = many readCaseItem

2460 2461
readCaseItem = called "case item" $ do
    notFollowedBy2 g_Esac
2462 2463 2464
    optional $ do
        try . lookAhead $ readAnnotationPrefix
        parseProblem ErrorC 1124 "ShellCheck directives are only valid in front of complete commands like 'case' statements, not individual case branches."
V
Vidar Holen 已提交
2465 2466
    optional g_Lparen
    spacing
2467
    pattern' <- readPattern
V
Vidar Holen 已提交
2468 2469 2470 2471
    void g_Rparen <|> do
        parseProblem ErrorC 1085
            "Did you forget to move the ;; after extending this case item?"
        fail "Expected ) to open a new case item"
V
Vidar Holen 已提交
2472
    readLineBreak
2473 2474
    list <- (lookAhead readCaseSeparator >> return []) <|> readCompoundList
    separator <- readCaseSeparator `attempting` do
2475 2476 2477 2478
        pos <- getPosition
        lookAhead g_Rparen
        parseProblemAt pos ErrorC 1074
            "Did you forget the ;; after the previous case item?"
V
Vidar Holen 已提交
2479
    readLineBreak
2480
    return (separator, pattern', list)
2481 2482 2483 2484 2485 2486 2487

readCaseSeparator = choice [
    tryToken ";;&" (const ()) >> return CaseContinue,
    tryToken ";&" (const ()) >> return CaseFallThrough,
    g_DSEMI >> return CaseBreak,
    lookAhead (readLineBreak >> g_Esac) >> return CaseBreak
    ]
V
Vidar Holen 已提交
2488

2489
prop_readFunctionDefinition = isOk readFunctionDefinition "foo() { command foo --lol \"$@\"; }"
2490 2491
prop_readFunctionDefinition1 = isOk readFunctionDefinition "foo   (){ command foo --lol \"$@\"; }"
prop_readFunctionDefinition4 = isWarning readFunctionDefinition "foo(a, b) { true; }"
2492
prop_readFunctionDefinition5 = isOk readFunctionDefinition ":(){ :|:;}"
V
Vidar Holen 已提交
2493 2494
prop_readFunctionDefinition6 = isOk readFunctionDefinition "?(){ foo; }"
prop_readFunctionDefinition7 = isOk readFunctionDefinition "..(){ cd ..; }"
2495
prop_readFunctionDefinition8 = isOk readFunctionDefinition "foo() (ls)"
2496 2497 2498
prop_readFunctionDefinition9 = isOk readFunctionDefinition "function foo { true; }"
prop_readFunctionDefinition10= isOk readFunctionDefinition "function foo () { true; }"
prop_readFunctionDefinition11= isWarning readFunctionDefinition "function foo{\ntrue\n}"
2499
prop_readFunctionDefinition12= isOk readFunctionDefinition "function []!() { true; }"
2500
readFunctionDefinition = called "function" $ do
2501
    start <- startSpan
2502
    functionSignature <- try readFunctionSignature
V
Vidar Holen 已提交
2503
    allspacing
2504
    void (lookAhead $ oneOf "{(") <|> parseProblem ErrorC 1064 "Expected a { to open the function definition."
2505
    group <- readBraceGroup <|> readSubshell
2506 2507
    id <- endSpan start
    return $ functionSignature id group
2508
  where
R
Rodrigo Setti 已提交
2509
    readFunctionSignature =
2510 2511 2512 2513 2514 2515 2516
        readWithFunction <|> readWithoutFunction
      where
        readWithFunction = do
            try $ do
                string "function"
                whitespace
            spacing
2517
            name <- many1 extendedFunctionChars
2518
            spaces <- spacing
2519
            hasParens <- wasIncluded readParens
2520 2521 2522
            when (not hasParens && null spaces) $
                acceptButWarn (lookAhead (oneOf "{("))
                    ErrorC 1095 "You need a space or linefeed between the function name and body."
2523
            return $ \id -> T_Function id (FunctionKeyword True) (FunctionParentheses hasParens) name
V
Vidar Holen 已提交
2524

2525
        readWithoutFunction = try $ do
2526
            name <- many1 functionChars
V
Vidar Holen 已提交
2527
            guard $ name /= "time"  -- Interfers with time ( foo )
2528
            spacing
2529
            readParens
2530
            return $ \id -> T_Function id (FunctionKeyword False) (FunctionParentheses True) name
2531 2532 2533

        readParens = do
            g_Lparen
2534
            spacing
2535 2536 2537 2538 2539 2540
            g_Rparen <|> do
                parseProblem ErrorC 1065 "Trying to declare parameters? Don't. Use () and refer to params as $1, $2.."
                many $ noneOf "\n){"
                g_Rparen
            return ()

V
Vidar Holen 已提交
2541 2542 2543 2544
prop_readCoProc1 = isOk readCoProc "coproc foo { echo bar; }"
prop_readCoProc2 = isOk readCoProc "coproc { echo bar; }"
prop_readCoProc3 = isOk readCoProc "coproc echo bar"
readCoProc = called "coproc" $ do
2545
    start <- startSpan
V
Vidar Holen 已提交
2546 2547 2548
    try $ do
        string "coproc"
        whitespace
2549
    choice [ try $ readCompoundCoProc start, readSimpleCoProc start ]
V
Vidar Holen 已提交
2550
  where
2551
    readCompoundCoProc start = do
V
Vidar Holen 已提交
2552 2553
        var <- optionMaybe $
            readVariableName `thenSkip` whitespace
2554
        body <- readBody readCompoundCommand
2555
        id <- endSpan start
V
Vidar Holen 已提交
2556
        return $ T_CoProc id var body
2557
    readSimpleCoProc start = do
2558
        body <- readBody readSimpleCommand
2559
        id <- endSpan start
V
Vidar Holen 已提交
2560
        return $ T_CoProc id Nothing body
2561
    readBody parser = do
2562
        start <- startSpan
2563
        body <- parser
2564
        id <- endSpan start
2565
        return $ T_CoProcBody id body
V
Vidar Holen 已提交
2566 2567


V
Vidar Holen 已提交
2568 2569
readPattern = (readNormalWord `thenSkip` spacing) `sepBy1` (char '|' `thenSkip` spacing)

2570
prop_readCompoundCommand = isOk readCompoundCommand "{ echo foo; }>/dev/null"
2571
readCompoundCommand = do
2572 2573
    cmd <- choice [
        readBraceGroup,
2574
        readAmbiguous "((" readArithmeticExpression readSubshell (\pos ->
V
Vidar Holen 已提交
2575
            parseNoteAt pos WarningC 1105 "Shells disambiguate (( differently or not at all. For subshell, add spaces around ( . For ((, fix parsing errors."),
2576 2577 2578 2579 2580 2581 2582 2583 2584 2585
        readSubshell,
        readCondition,
        readWhileClause,
        readUntilClause,
        readIfClause,
        readForClause,
        readSelectClause,
        readCaseClause,
        readFunctionDefinition
        ]
2586
    spacing
V
Vidar Holen 已提交
2587
    redirs <- many readIoRedirect
2588
    id <- getNextIdSpanningTokenList (cmd:redirs)
R
Rodrigo Setti 已提交
2589
    unless (null redirs) $ optional $ do
2590
        lookAhead $ try (spacing >> needsSeparator)
V
Vidar Holen 已提交
2591
        parseProblem WarningC 1013 "Bash requires ; or \\n here, after redirecting nested compound commands."
R
Rodrigo Setti 已提交
2592
    return $ T_Redirecting id redirs cmd
2593 2594 2595
  where
    needsSeparator = choice [ g_Then, g_Else, g_Elif, g_Fi, g_Do, g_Done, g_Esac, g_Rbrace ]

V
Vidar Holen 已提交
2596 2597

readCompoundList = readTerm
2598 2599 2600
readCompoundListOrEmpty = do
    allspacing
    readTerm <|> return []
V
Vidar Holen 已提交
2601 2602

readCmdPrefix = many1 (readIoRedirect <|> readAssignmentWord)
2603
readCmdSuffix = many1 (readIoRedirect <|> readCmdWord)
2604
readModifierSuffix = many1 (readIoRedirect <|> readWellFormedAssignment <|> readCmdWord)
2605 2606 2607 2608 2609 2610 2611 2612 2613
readTimeSuffix = do
    flags <- many readFlag
    pipeline <- readPipeline
    return $ flags ++ [pipeline]
  where
    -- This fails for quoted variables and such. Fixme?
    readFlag = do
        lookAhead $ char '-'
        readCmdWord
2614

2615 2616
-- Fixme: this is a hack that doesn't handle let c='4'"5" or let a\>b
readLetSuffix :: Monad m => SCParser m [Token]
2617 2618
readLetSuffix = many1 (readIoRedirect <|> try readLetExpression <|> readCmdWord)
  where
2619
    readLetExpression :: Monad m => SCParser m Token
2620 2621 2622
    readLetExpression = do
        startPos <- getPosition
        expression <- readStringForParser readCmdWord
2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637
        let (unQuoted, newPos) = kludgeAwayQuotes expression startPos
        subParse newPos readArithmeticContents unQuoted

    kludgeAwayQuotes :: String -> SourcePos -> (String, SourcePos)
    kludgeAwayQuotes s p =
        case s of
            first:rest@(_:_) ->
                let (last:backwards) = reverse rest
                    middle = reverse backwards
                in
                    if first `elem` "'\"" && first == last
                    then (middle, updatePosChar p first)
                    else (s, p)
            x -> (s, p)

2638

2639 2640 2641 2642 2643 2644 2645 2646 2647
-- bash allows a=(b), ksh allows $a=(b). dash allows neither. Let's warn.
readEvalSuffix = many1 (readIoRedirect <|> readCmdWord <|> evalFallback)
  where
    evalFallback = do
        pos <- getPosition
        lookAhead $ char '('
        parseProblemAt pos WarningC 1098 "Quote/escape special characters when using eval, e.g. eval \"a=(b)\"."
        fail "Unexpected parentheses. Make sure to quote when eval'ing as shell parsers differ."

2648 2649
-- Get whatever a parser would parse as a string
readStringForParser parser = do
2650
    pos <- inSeparateContext $ lookAhead (parser >> getPosition)
V
Vidar Holen 已提交
2651
    readUntil pos
2652 2653 2654
  where
    readUntil endPos = anyChar `reluctantlyTill` (getPosition >>= guard . (== endPos))

2655 2656 2657 2658
prop_readAssignmentWord = isOk readAssignmentWord "a=42"
prop_readAssignmentWord2 = isOk readAssignmentWord "b=(1 2 3)"
prop_readAssignmentWord3 = isWarning readAssignmentWord "$b = 13"
prop_readAssignmentWord4 = isWarning readAssignmentWord "b = $(lol)"
V
Vidar Holen 已提交
2659 2660
prop_readAssignmentWord5 = isOk readAssignmentWord "b+=lol"
prop_readAssignmentWord6 = isWarning readAssignmentWord "b += (1 2 3)"
2661 2662
prop_readAssignmentWord7 = isOk readAssignmentWord "a[3$n'']=42"
prop_readAssignmentWord8 = isOk readAssignmentWord "a[4''$(cat foo)]=42"
2663
prop_readAssignmentWord9 = isOk readAssignmentWord "IFS= "
2664
prop_readAssignmentWord9a= isOk readAssignmentWord "foo="
2665 2666
prop_readAssignmentWord9b= isOk readAssignmentWord "foo=  "
prop_readAssignmentWord9c= isOk readAssignmentWord "foo=  #bar"
2667 2668
prop_readAssignmentWord10= isWarning readAssignmentWord "foo$n=42"
prop_readAssignmentWord11= isOk readAssignmentWord "foo=([a]=b [c] [d]= [e f )"
2669
prop_readAssignmentWord12= isOk readAssignmentWord "a[b <<= 3 + c]='thing'"
2670 2671 2672
prop_readAssignmentWord13= isOk readAssignmentWord "var=( (1 2) (3 4) )"
prop_readAssignmentWord14= isOk readAssignmentWord "var=( 1 [2]=(3 4) )"
prop_readAssignmentWord15= isOk readAssignmentWord "var=(1 [2]=(3 4))"
2673 2674 2675
readAssignmentWord = readAssignmentWordExt True
readWellFormedAssignment = readAssignmentWordExt False
readAssignmentWordExt lenient = try $ do
2676
    start <- startSpan
V
Vidar Holen 已提交
2677
    pos <- getPosition
2678 2679
    when lenient $
        optional (char '$' >> parseNote ErrorC 1066 "Don't use $ on the left side of assignments.")
V
Vidar Holen 已提交
2680
    variable <- readVariableName
2681 2682
    when lenient $
        optional (readNormalDollar >> parseNoteAt pos ErrorC
V
Vidar Holen 已提交
2683
                                1067 "For indirection, use (associative) arrays or 'read \"var$n\" <<< \"value\"'")
2684
    indices <- many readArrayIndex
2685
    hasLeftSpace <- fmap (not . null) spacing
V
Vidar Holen 已提交
2686
    pos <- getPosition
2687
    id <- endSpan start
V
Vidar Holen 已提交
2688
    op <- readAssignmentOp
2689 2690
    hasRightSpace <- fmap (not . null) spacing
    isEndOfCommand <- fmap isJust $ optionMaybe (try . lookAhead $ (void (oneOf "\r\n;&|)") <|> eof))
2691
    if not hasLeftSpace && (hasRightSpace || isEndOfCommand)
2692
      then do
2693
        when (variable /= "IFS" && hasRightSpace && not isEndOfCommand) $
2694 2695
            parseNoteAt pos WarningC 1007
                "Remove space after = if trying to assign a value (for empty string, use var='' ... )."
2696
        value <- readEmptyLiteral
2697
        return $ T_Assignment id op variable indices value
2698
      else do
2699
        when (hasLeftSpace || hasRightSpace) $
2700 2701 2702 2703 2704
            parseNoteAt pos ErrorC 1068 $
                "Don't put spaces around the "
                ++ if op == Append
                    then "+= when appending."
                    else "= in assignments."
2705 2706
        value <- readArray <|> readNormalWord
        spacing
2707
        return $ T_Assignment id op variable indices value
2708
  where
2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721
    readAssignmentOp = do
        pos <- getPosition
        unexpecting "" $ string "==="
        choice [
            string "+=" >> return Append,
            do
                try (string "==")
                parseProblemAt pos ErrorC 1097
                    "Unexpected ==. For assignment, use =. For comparison, use [/[[."
                return Assign,

            string "=" >> return Assign
            ]
2722
    readEmptyLiteral = do
2723 2724
        start <- startSpan
        id <- endSpan start
2725
        return $ T_Literal id ""
2726

2727
readArrayIndex = do
2728
    start <- startSpan
2729
    char '['
2730 2731
    pos <- getPosition
    str <- readStringForParser readIndexSpan
2732
    char ']'
2733
    id <- endSpan start
2734
    return $ T_UnparsedIndex id pos str
2735

2736
readArray :: Monad m => SCParser m Token
2737
readArray = called "array assignment" $ do
2738
    start <- startSpan
2739
    opening <- getPosition
2740
    char '('
2741 2742 2743
    optional $ do
        lookAhead $ char '('
        parseProblemAt opening ErrorC 1116 "Missing $ on a $((..)) expression? (or use ( ( for arrays)."
2744
    allspacing
2745
    words <- readElement `reluctantlyTill` char ')'
2746
    char ')' <|> fail "Expected ) to close array assignment"
2747
    id <- endSpan start
2748
    return $ T_Array id words
2749 2750 2751
  where
    readElement = (readIndexed <|> readRegular) `thenSkip` allspacing
    readIndexed = do
2752
        start <- startSpan
2753
        index <- try $ do
2754
            x <- many1 readArrayIndex
2755 2756
            char '='
            return x
2757
        value <- readRegular <|> nothing
2758
        id <- endSpan start
2759
        return $ T_IndexedElement id index value
2760
    readRegular = readArray <|> readNormalWord
2761 2762

    nothing = do
2763 2764
        start <- startSpan
        id <- endSpan start
2765
        return $ T_Literal id ""
2766 2767

tryToken s t = try $ do
2768
    start <- startSpan
2769
    string s
2770
    id <- endSpan start
2771 2772
    spacing
    return $ t id
V
Vidar Holen 已提交
2773

2774
redirToken c t = try $ do
2775
    start <- startSpan
2776
    char c
2777
    id <- endSpan start
2778
    notFollowedBy2 $ char '('
2779 2780
    return $ t id

2781 2782
tryWordToken s t = tryParseWordToken s t `thenSkip` spacing
tryParseWordToken keyword t = try $ do
2783
    start <- startSpan
2784
    str <- anycaseString keyword
2785
    id <- endSpan start
V
Vidar Holen 已提交
2786 2787

    optional $ do
V
Vidar Holen 已提交
2788 2789 2790 2791 2792 2793 2794 2795
        c <- try . lookAhead $ anyChar
        let warning code = parseProblem ErrorC code $ "You need a space before the " ++ [c] ++ "."
        case c of
            '[' -> warning 1069
            '#' -> warning 1099
            '!' -> warning 1129
            ':' -> warning 1130
            _ -> return ()
V
Vidar Holen 已提交
2796

2797
    lookAhead keywordSeparator
2798 2799 2800
    when (str /= keyword) $
        parseProblem ErrorC 1081 $
            "Scripts are case sensitive. Use '" ++ keyword ++ "', not '" ++ str ++ "'."
2801
    return $ t id
V
Vidar Holen 已提交
2802

2803 2804
anycaseString =
    mapM anycaseChar
2805 2806 2807
  where
    anycaseChar c = char (toLower c) <|> char (toUpper c)

V
Vidar Holen 已提交
2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831
g_AND_IF = tryToken "&&" T_AND_IF
g_OR_IF = tryToken "||" T_OR_IF
g_DSEMI = tryToken ";;" T_DSEMI
g_DLESS = tryToken "<<" T_DLESS
g_DGREAT = tryToken ">>" T_DGREAT
g_LESSAND = tryToken "<&" T_LESSAND
g_GREATAND = tryToken ">&" T_GREATAND
g_LESSGREAT = tryToken "<>" T_LESSGREAT
g_DLESSDASH = tryToken "<<-" T_DLESSDASH
g_CLOBBER = tryToken ">|" T_CLOBBER
g_OPERATOR = g_AND_IF <|> g_OR_IF <|> g_DSEMI <|> g_DLESSDASH <|> g_DLESS <|> g_DGREAT <|> g_LESSAND <|> g_GREATAND <|> g_LESSGREAT

g_If = tryWordToken "if" T_If
g_Then = tryWordToken "then" T_Then
g_Else = tryWordToken "else" T_Else
g_Elif = tryWordToken "elif" T_Elif
g_Fi = tryWordToken "fi" T_Fi
g_Do = tryWordToken "do" T_Do
g_Done = tryWordToken "done" T_Done
g_Case = tryWordToken "case" T_Case
g_Esac = tryWordToken "esac" T_Esac
g_While = tryWordToken "while" T_While
g_Until = tryWordToken "until" T_Until
g_For = tryWordToken "for" T_For
2832
g_Select = tryWordToken "select" T_Select
2833
g_In = tryWordToken "in" T_In <* skipAnnotationAndWarn
V
Vidar Holen 已提交
2834
g_Lbrace = tryWordToken "{" T_Lbrace
V
Vidar Holen 已提交
2835
g_Rbrace = do -- handled specially due to ksh echo "${ foo; }bar"
2836
    start <- startSpan
V
Vidar Holen 已提交
2837
    char '}'
2838
    id <- endSpan start
V
Vidar Holen 已提交
2839
    return $ T_Rbrace id
V
Vidar Holen 已提交
2840 2841 2842

g_Lparen = tryToken "(" T_Lparen
g_Rparen = tryToken ")" T_Rparen
2843
g_Bang = do
2844
    start <- startSpan
2845
    char '!'
2846
    id <- endSpan start
2847 2848 2849 2850
    void spacing1 <|> do
        pos <- getPosition
        parseProblemAt pos ErrorC 1035
            "You are missing a required space after the !."
2851
    return $ T_Bang id
V
Vidar Holen 已提交
2852 2853

g_Semi = do
2854
    notFollowedBy2 g_DSEMI
V
Vidar Holen 已提交
2855 2856
    tryToken ";" T_Semi

2857
keywordSeparator =
2858
    eof <|> void (try allspacingOrFail) <|> void (oneOf ";()[<>&|")
V
Vidar Holen 已提交
2859

V
Vidar Holen 已提交
2860 2861
readKeyword = choice [ g_Then, g_Else, g_Elif, g_Fi, g_Do, g_Done, g_Esac, g_Rbrace, g_Rparen, g_DSEMI ]

R
Rodrigo Setti 已提交
2862
ifParse p t f =
V
Vidar Holen 已提交
2863 2864
    (lookAhead (try p) >> t) <|> f

2865 2866
prop_readShebang1 = isOk readShebang "#!/bin/sh\n"
prop_readShebang2 = isWarning readShebang "!# /bin/sh\n"
2867
prop_readShebang3 = isNotOk readShebang "#shellcheck shell=/bin/sh\n"
2868
prop_readShebang4 = isWarning readShebang "! /bin/sh"
2869 2870 2871
prop_readShebang5 = isWarning readShebang "\n#!/bin/sh"
prop_readShebang6 = isWarning readShebang " # Copyright \n!#/bin/bash"
prop_readShebang7 = isNotOk readShebang "# Copyright \nfoo\n#!/bin/bash"
V
Vidar Holen 已提交
2872
readShebang = do
2873
    anyShebang <|> try readMissingBang <|> withHeader
2874
    many linewhitespace
2875
    str <- many $ noneOf "\r\n"
2876 2877
    optional carriageReturn
    optional linefeed
V
Vidar Holen 已提交
2878
    return str
2879
  where
2880 2881 2882 2883 2884 2885
    anyShebang = choice $ map try [
        readCorrect,
        readSwapped,
        readTooManySpaces,
        readMissingHash
        ]
2886
    readCorrect = void $ string "#!"
V
Vidar Holen 已提交
2887

2888
    readSwapped = do
2889
        start <- startSpan
2890
        string "!#"
2891 2892
        id <- endSpan start
        parseProblemAtId id ErrorC 1084
2893
            "Use #!, not !#, for the shebang."
V
Vidar Holen 已提交
2894

2895
    skipSpaces = fmap (not . null) $ many linewhitespace
V
Vidar Holen 已提交
2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909
    readTooManySpaces = do
        startPos <- getPosition
        startSpaces <- skipSpaces
        char '#'
        middlePos <- getPosition
        middleSpaces <- skipSpaces
        char '!'
        when startSpaces $
            parseProblemAt startPos ErrorC 1114
                "Remove leading spaces before the shebang."
        when middleSpaces $
            parseProblemAt middlePos ErrorC 1115
                "Remove spaces between # and ! in the shebang."

2910 2911 2912
    readMissingHash = do
        pos <- getPosition
        char '!'
V
Vidar Holen 已提交
2913
        ensurePathAhead
2914 2915 2916
        parseProblemAt pos ErrorC 1104
            "Use #!, not just !, for the shebang."

V
Vidar Holen 已提交
2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927
    readMissingBang = do
        char '#'
        pos <- getPosition
        ensurePathAhead
        parseProblemAt pos ErrorC 1113
            "Use #!, not just #, for the shebang."

    ensurePathAhead = lookAhead $ do
        many linewhitespace
        char '/'

2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939
    withHeader = try $ do
        many1 headerLine
        pos <- getPosition
        anyShebang <*
            parseProblemAt pos ErrorC 1128 "The shebang must be on the first line. Delete blanks and move comments."

    headerLine = do
        notFollowedBy2 anyShebang
        many linewhitespace
        optional readAnyComment
        linefeed

2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953
verifyEof = eof <|> choice [
        ifParsable g_Lparen $
            parseProblem ErrorC 1088 "Parsing stopped here. Invalid use of parentheses?",

        ifParsable readKeyword $
            parseProblem ErrorC 1089 "Parsing stopped here. Is this keyword correctly matched up?",

        parseProblem ErrorC 1070 "Parsing stopped here. Mismatched keywords or invalid parentheses?"
    ]
  where
    ifParsable p action = do
        try (lookAhead p)
        action

2954 2955 2956 2957 2958 2959
prop_readScript1 = isOk readScriptFile "#!/bin/bash\necho hello world\n"
prop_readScript2 = isWarning readScriptFile "#!/bin/bash\r\necho hello world\n"
prop_readScript3 = isWarning readScriptFile "#!/bin/bash\necho hello\xA0world"
prop_readScript4 = isWarning readScriptFile "#!/usr/bin/perl\nfoo=("
prop_readScript5 = isOk readScriptFile "#!/bin/bash\n#This is an empty script\n\n"
readScriptFile = do
2960
    start <- startSpan
2961
    pos <- getPosition
V
Vidar Holen 已提交
2962 2963
    optional $ do
        readUtf8Bom
R
Rodrigo Setti 已提交
2964
        parseProblem ErrorC 1082
V
Vidar Holen 已提交
2965
            "This file has a UTF-8 BOM. Remove it with: LC_CTYPE=C sed '1s/^...//' < yourscript ."
V
Vidar Holen 已提交
2966
    sb <- option "" readShebang
2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978
    allspacing
    annotationStart <- startSpan
    annotations <- readAnnotations
    annotationId <- endSpan annotationStart
    let shellAnnotationSpecified =
            any (\x -> case x of ShellOverride {} -> True; _ -> False) annotations
    shellFlagSpecified <- isJust <$> Mr.asks shellTypeOverride
    let ignoreShebang = shellAnnotationSpecified || shellFlagSpecified

    unless ignoreShebang $
        verifyShebang pos (getShell sb)
    if ignoreShebang || isValidShell (getShell sb) /= Just False
2979
      then do
2980
            commands <- withAnnotations annotations readCompoundListOrEmpty
2981
            id <- endSpan start
2982
            verifyEof
2983 2984
            let script = T_Annotation annotationId annotations $
                            T_Script id sb commands
2985
            reparseIndices script
2986 2987
        else do
            many anyChar
2988
            id <- endSpan start
V
Vidar Holen 已提交
2989
            return $ T_Script id sb []
2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001

  where
    basename s = reverse . takeWhile (/= '/') . reverse $ s
    getShell sb =
        case words sb of
            [] -> ""
            [x] -> basename x
            (first:second:_) ->
                if basename first == "env"
                    then second
                    else basename first

3002
    verifyShebang pos s = do
3003 3004
        case isValidShell s of
            Just True -> return ()
A
Austin English 已提交
3005 3006
            Just False -> parseProblemAt pos ErrorC 1071 "ShellCheck only supports sh/bash/dash/ksh scripts. Sorry!"
            Nothing -> parseProblemAt pos InfoC 1008 "This shebang was unrecognized. Note that ShellCheck only handles sh/bash/dash/ksh."
3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019

    isValidShell s =
        let good = s == "" || any (`isPrefixOf` s) goodShells
            bad = any (`isPrefixOf` s) badShells
        in
            if good
                then Just True
                else if bad
                        then Just False
                        else Nothing

    goodShells = [
        "sh",
V
Vidar Holen 已提交
3020 3021
        "ash",
        "dash",
3022
        "bash",
V
Vidar Holen 已提交
3023
        "ksh"
3024 3025 3026 3027
        ]
    badShells = [
        "awk",
        "csh",
3028
        "expect",
3029 3030 3031
        "perl",
        "python",
        "ruby",
V
Vidar Holen 已提交
3032 3033
        "tcsh",
        "zsh"
3034
        ]
V
Vidar Holen 已提交
3035

V
Vidar Holen 已提交
3036 3037
    readUtf8Bom = called "Byte Order Mark" $ string "\xFEFF"

3038
readScript = readScriptFile
3039

3040 3041
-- Interactively run a specific parser in ghci:
-- debugParse readSimpleCommand "echo 'hello world'"
3042
debugParse p string = runIdentity $ do
3043
    (res, _) <- runParser testEnvironment p "-" string
3044 3045
    return res

3046 3047 3048 3049 3050 3051
-- Interactively run the complete parser in ghci:
-- debugParseScript "#!/bin/bash\necho 'Hello World'\n"
debugParseScript string =
    result {
        -- Remove the noisiest parts
        prTokenPositions = Map.fromList [
N
Ng Zhi An 已提交
3052
            (Id 0, (Position {
3053 3054 3055
                posFile = "removed for clarity",
                posLine = -1,
                posColumn = -1
N
Ng Zhi An 已提交
3056
            }, Position {
N
Ng Zhi An 已提交
3057 3058 3059 3060
                posFile = "removed for clarity",
                posLine = -1,
                posColumn = -1
            }))]
3061 3062 3063
    }
  where
    result = runIdentity $
3064
        parseScript (mockedSystemInterface []) $ newParseSpec {
3065
            psFilename = "debug",
3066
            psScript = string
3067 3068
        }

3069 3070 3071
testEnvironment =
    Environment {
        systemInterface = (mockedSystemInterface []),
3072 3073
        checkSourced = False,
        shellTypeOverride = Nothing
3074 3075
    }

3076 3077 3078 3079 3080

isOk p s =      parsesCleanly p s == Just True   -- The string parses with no warnings
isWarning p s = parsesCleanly p s == Just False  -- The string parses with warnings
isNotOk p s =   parsesCleanly p s == Nothing     -- The string does not parse

3081
parsesCleanly parser string = runIdentity $ do
3082
    (res, sys) <- runParser testEnvironment
3083 3084 3085 3086 3087
                    (parser >> eof >> getState) "-" string
    case (res, sys) of
        (Right userState, systemState) ->
            return $ Just . null $ parseNotes userState ++ parseProblems systemState
        (Left _, _) -> return Nothing
3088

3089 3090 3091 3092 3093 3094 3095 3096 3097 3098
-- For printf debugging: print the value of an expression
-- Example: return $ dump $ T_Literal id [c]
dump :: Show a => a -> a
dump x = trace (show x) x

-- Like above, but print a specific expression:
-- Example: return $ dumps ("Returning: " ++ [c])  $ T_Literal id [c]
dumps :: Show x => x -> a -> a
dumps t = trace (show t)

3099
parseWithNotes parser = do
3100
    item <- parser
V
Vidar Holen 已提交
3101 3102
    state <- getState
    return (item, state)
V
Vidar Holen 已提交
3103

3104
compareNotes (ParseNote pos1 pos1' level1 _ s1) (ParseNote pos2 pos2' level2 _ s2) = compare (pos1, pos1', level1) (pos2, pos2', level2)
3105 3106
sortNotes = sortBy compareNotes

3107

3108
makeErrorFor parsecError =
3109
    ParseNote pos pos ErrorC 1072 $
V
Vidar Holen 已提交
3110
        getStringFromParsec $ errorMessages parsecError
3111 3112
    where
      pos = errorPos parsecError
3113 3114

getStringFromParsec errors =
V
Vidar Holen 已提交
3115 3116 3117 3118 3119
        case map f errors of
            r -> unwords (take 1 $ catMaybes $ reverse r)  ++
                " Fix any mentioned problems and try again."
    where
        f err =
3120
            case err of
3121 3122 3123
                UnExpect s    ->  Nothing -- Due to not knowing Parsec, none of these
                SysUnExpect s ->  Nothing -- are actually helpful. <?> has been hidden
                Expect s      ->  Nothing -- and we only show explicit fail statements.
V
Vidar Holen 已提交
3124
                Message s     ->  if null s then Nothing else return $ s ++ "."
3125

3126
runParser :: Monad m =>
3127
    Environment m ->
3128 3129 3130 3131 3132
    SCParser m v ->
    String ->
    String ->
    m (Either ParseError v, SystemState)

3133
runParser env p filename contents =
3134 3135 3136
    Ms.runStateT
        (Mr.runReaderT
            (runParserT p initialUserState filename contents)
3137
            env)
3138
        initialSystemState
3139
system = lift . lift . lift
3140

3141 3142
parseShell env name contents = do
    (result, state) <- runParser env (parseWithNotes readScript) name contents
3143
    case result of
V
Vidar Holen 已提交
3144
        Right (script, userstate) ->
3145
            return ParseResult {
V
Vidar Holen 已提交
3146
                prComments = map toPositionedComment $ nub $ parseNotes userstate ++ parseProblems state,
N
Ng Zhi An 已提交
3147
                prTokenPositions = Map.map startEndPosToPos (positionMap userstate),
V
Vidar Holen 已提交
3148 3149
                prRoot = Just $
                    reattachHereDocs script (hereDocMap userstate)
3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160
            }
        Left err ->
            return ParseResult {
                prComments =
                    map toPositionedComment $
                        notesForContext (contextStack state)
                        ++ [makeErrorFor err]
                        ++ parseProblems state,
                prTokenPositions = Map.empty,
                prRoot = Nothing
            }
3161 3162

notesForContext list = zipWith ($) [first, second] $ filter isName list
3163
  where
3164 3165
    isName (ContextName _ _) = True
    isName _ = False
3166
    first (ContextName pos str) = ParseNote pos pos ErrorC 1073 $
3167
        "Couldn't parse this " ++ str ++ ". Fix to allow more checks."
3168
    second (ContextName pos str) = ParseNote pos pos InfoC 1009 $
V
Vidar Holen 已提交
3169
        "The mentioned syntax error was in this " ++ str ++ "."
3170

3171 3172 3173 3174 3175 3176 3177
-- Go over all T_UnparsedIndex and reparse them as either arithmetic or text
-- depending on declare -A statements.
reparseIndices root =
   analyze blank blank f root
  where
    associative = getAssociativeArrays root
    isAssociative s = s `elem` associative
3178 3179 3180 3181 3182 3183 3184 3185
    f (T_Assignment id mode name indices value) = do
        newIndices <- mapM (fixAssignmentIndex name) indices
        newValue <- case value of
            (T_Array id2 words) -> do
                newWords <- mapM (fixIndexElement name) words
                return $ T_Array id2 newWords
            x -> return x
        return $ T_Assignment id mode name newIndices newValue
3186 3187 3188
    f (TA_Variable id name indices) = do
        newIndices <- mapM (fixAssignmentIndex name) indices
        return $ TA_Variable id name newIndices
3189 3190
    f t = return t

3191
    fixIndexElement name word =
3192
        case word of
3193 3194
            T_IndexedElement id indices value -> do
                new <- mapM (fixAssignmentIndex name) indices
3195
                return $ T_IndexedElement id new value
3196
            _ -> return word
3197

3198 3199
    fixAssignmentIndex name word =
        case word of
3200
            T_UnparsedIndex id pos src ->
3201
                parsed name pos src
3202
            _ -> return word
3203

3204 3205
    parsed name pos src =
        if isAssociative name
3206 3207
        then subParse pos (called "associative array index" $ readIndexSpan) src
        else subParse pos (called "arithmetic array index expression" $ optional space >> readArithmeticContents) src
3208

V
Vidar Holen 已提交
3209 3210 3211 3212 3213 3214 3215
reattachHereDocs root map =
    doTransform f root
  where
    f t@(T_HereDoc id dash quote string []) = fromMaybe t $ do
        list <- Map.lookup id map
        return $ T_HereDoc id dash quote string list
    f t = t
3216 3217

toPositionedComment :: ParseNote -> PositionedComment
3218 3219
toPositionedComment (ParseNote start end severity code message) =
    PositionedComment (posToPos start) (posToPos end) $ Comment severity code message
3220 3221 3222 3223 3224 3225 3226 3227

posToPos :: SourcePos -> Position
posToPos sp = Position {
    posFile = sourceName sp,
    posLine = fromIntegral $ sourceLine sp,
    posColumn = fromIntegral $ sourceColumn sp
}

N
Ng Zhi An 已提交
3228 3229
startEndPosToPos :: (SourcePos, SourcePos) -> (Position, Position)
startEndPosToPos (s, e) = (posToPos s, posToPos e)
N
Ng Zhi An 已提交
3230

3231 3232 3233 3234
-- TODO: Clean up crusty old code that this is layered on top of
parseScript :: Monad m =>
        SystemInterface m -> ParseSpec -> m ParseResult
parseScript sys spec =
3235 3236 3237 3238
    parseShell env (psFilename spec) (psScript spec)
  where
    env = Environment {
        systemInterface = sys,
3239 3240
        checkSourced = psCheckSourced spec,
        shellTypeOverride = psShellTypeOverride spec
3241
    }
3242

3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272
-- Same as 'try' but emit syntax errors if the parse fails.
tryWithErrors :: Monad m => SCParser m v -> SCParser m v
tryWithErrors parser = do
    userstate <- getState
    oldContext <- getCurrentContexts
    input <- getInput
    pos <- getPosition
    result <- lift $ runParserT (setPosition pos >> getResult parser) userstate (sourceName pos) input
    case result of
        Right (result, endPos, endInput, endState) -> do
            -- 'many' objects if we don't consume anything at all, so read a dummy value
            void anyChar <|> eof
            putState endState
            setPosition endPos
            setInput endInput
            return result

        Left err -> do
            newContext <- getCurrentContexts
            addParseProblem $ makeErrorFor err
            mapM_ addParseProblem $ notesForContext newContext
            setCurrentContexts oldContext
            fail ""
  where
    getResult p = do
        result <- p
        endPos <- getPosition
        endInput <- getInput
        endState <- getState
        return (result, endPos, endInput, endState)
3273

R
Rodrigo Setti 已提交
3274 3275 3276
return []
runTests = $quickCheckAll