1 {-|
    2 
    3 More generic matching, done in one step, unlike FilterSpec and filterJournal*. 
    4 Currently used only by hledger-web.
    5 
    6 -}
    7 
    8 module Hledger.Data.Matching
    9 where
   10 import Data.Either
   11 import Data.List
   12 -- import Data.Map (findWithDefault, (!))
   13 import Data.Maybe
   14 -- import Data.Ord
   15 import Data.Time.Calendar
   16 -- import Data.Time.LocalTime
   17 -- import Data.Tree
   18 import Safe (readDef, headDef)
   19 -- import System.Time (ClockTime(TOD))
   20 import Test.HUnit
   21 import Text.ParserCombinators.Parsec
   22 -- import Text.Printf
   23 -- import qualified Data.Map as Map
   24 
   25 import Hledger.Utils
   26 import Hledger.Data.Types
   27 import Hledger.Data.AccountName
   28 import Hledger.Data.Amount
   29 -- import Hledger.Data.Commodity (canonicaliseCommodities)
   30 import Hledger.Data.Dates
   31 import Hledger.Data.Posting
   32 import Hledger.Data.Transaction
   33 -- import Hledger.Data.TimeLog
   34 
   35 -- | A matcher is a single, or boolean composition of, search criteria,
   36 -- which can be used to match postings, transactions, accounts and more.
   37 -- Currently used by hledger-web, will likely replace FilterSpec at some point.
   38 data Matcher = MatchAny              -- ^ always match
   39              | MatchNone             -- ^ never match
   40              | MatchNot Matcher      -- ^ negate this match
   41              | MatchOr [Matcher]     -- ^ match if any of these match
   42              | MatchAnd [Matcher]    -- ^ match if all of these match
   43              | MatchDesc String      -- ^ match if description matches this regexp
   44              | MatchAcct String      -- ^ match postings whose account matches this regexp
   45              | MatchDate DateSpan    -- ^ match if actual date in this date span
   46              | MatchEDate DateSpan   -- ^ match if effective date in this date span
   47              | MatchStatus Bool      -- ^ match if cleared status has this value
   48              | MatchReal Bool        -- ^ match if "realness" (involves a real non-virtual account ?) has this value
   49              | MatchEmpty Bool       -- ^ match if "emptiness" (from the --empty command-line flag) has this value.
   50                                      --   Currently this means a posting with zero amount.
   51              | MatchDepth Int        -- ^ match if account depth is less than or equal to this value
   52     deriving (Show, Eq)
   53 
   54 -- | A query option changes a query's/report's behaviour and output in some way.
   55 
   56 -- XXX could use regular CliOpts ?
   57 data QueryOpt = QueryOptInAcctOnly AccountName  -- ^ show an account register focussed on this account
   58               | QueryOptInAcct AccountName      -- ^ as above but include sub-accounts in the account register
   59            -- | QueryOptCostBasis      -- ^ show amounts converted to cost where possible
   60            -- | QueryOptEffectiveDate  -- ^ show effective dates instead of actual dates
   61     deriving (Show, Eq)
   62 
   63 -- | The account we are currently focussed on, if any, and whether subaccounts are included.
   64 -- Just looks at the first query option.
   65 inAccount :: [QueryOpt] -> Maybe (AccountName,Bool)
   66 inAccount [] = Nothing
   67 inAccount (QueryOptInAcctOnly a:_) = Just (a,False)
   68 inAccount (QueryOptInAcct a:_) = Just (a,True)
   69 
   70 -- | A matcher for the account(s) we are currently focussed on, if any.
   71 -- Just looks at the first query option.
   72 inAccountMatcher :: [QueryOpt] -> Maybe Matcher
   73 inAccountMatcher [] = Nothing
   74 inAccountMatcher (QueryOptInAcctOnly a:_) = Just $ MatchAcct $ accountNameToAccountOnlyRegex a
   75 inAccountMatcher (QueryOptInAcct a:_) = Just $ MatchAcct $ accountNameToAccountRegex a
   76 
   77 -- -- | A matcher restricting the account(s) to be shown in the sidebar, if any.
   78 -- -- Just looks at the first query option.
   79 -- showAccountMatcher :: [QueryOpt] -> Maybe Matcher
   80 -- showAccountMatcher (QueryOptInAcctSubsOnly a:_) = Just $ MatchAcct True $ accountNameToAccountRegex a
   81 -- showAccountMatcher _ = Nothing
   82 
   83 -- | Convert a query expression containing zero or more space-separated
   84 -- terms to a matcher and zero or more query options. A query term is either:
   85 --
   86 -- 1. a search criteria, used to match transactions. This is usually a prefixed pattern such as:
   87 --    acct:REGEXP
   88 --    date:PERIODEXP
   89 --    not:desc:REGEXP
   90 --
   91 -- 2. a query option, which changes behaviour in some way. There is currently one of these:
   92 --    inacct:FULLACCTNAME - should appear only once
   93 --
   94 -- Multiple search criteria are AND'ed together.
   95 -- When a pattern contains spaces, it or the whole term should be enclosed in single or double quotes.
   96 -- A reference date is required to interpret relative dates in period expressions.
   97 --
   98 parseQuery :: Day -> String -> (Matcher,[QueryOpt])
   99 parseQuery d s = (m,qopts)
  100   where
  101     terms = words'' prefixes s
  102     (matchers, qopts) = partitionEithers $ map (parseMatcher d) terms
  103     m = case matchers of []      -> MatchAny
  104                          (m':[]) -> m'
  105                          ms      -> MatchAnd ms
  106 
  107 -- | Quote-and-prefix-aware version of words - don't split on spaces which
  108 -- are inside quotes, including quotes which may have one of the specified
  109 -- prefixes in front, and maybe an additional not: prefix in front of that.
  110 words'' :: [String] -> String -> [String]
  111 words'' prefixes = fromparse . parsewith maybeprefixedquotedphrases -- XXX
  112     where
  113       maybeprefixedquotedphrases = choice' [prefixedQuotedPattern, quotedPattern, pattern] `sepBy` many1 spacenonewline
  114       prefixedQuotedPattern = do
  115         not' <- optionMaybe $ string "not:"
  116         prefix <- choice' $ map string prefixes
  117         p <- quotedPattern
  118         return $ fromMaybe "" not' ++ prefix ++ stripquotes p
  119       quotedPattern = do
  120         p <- between (oneOf "'\"") (oneOf "'\"") $ many $ noneOf "'\""
  121         return $ stripquotes p
  122       pattern = many (noneOf " \n\r\"")
  123 
  124 -- -- | Parse the query string as a boolean tree of match patterns.
  125 -- parseMatcher :: String -> Matcher
  126 -- parseMatcher s = either (const (MatchAny)) id $ runParser matcher () "" $ lexmatcher s
  127 
  128 -- lexmatcher :: String -> [String]
  129 -- lexmatcher s = words' s
  130 
  131 -- matcher :: GenParser String () Matcher
  132 -- matcher = undefined
  133 
  134 -- keep synced with patterns below, excluding "not"
  135 prefixes = map (++":") [
  136             "inacct","inacctonly",
  137             "desc","acct","date","edate","status","real","empty","depth"
  138            ]
  139 defaultprefix = "acct"
  140 
  141 -- | Parse a single query term as either a matcher or a query option.
  142 parseMatcher :: Day -> String -> Either Matcher QueryOpt
  143 parseMatcher _ ('i':'n':'a':'c':'c':'t':'o':'n':'l':'y':':':s) = Right $ QueryOptInAcctOnly s
  144 parseMatcher _ ('i':'n':'a':'c':'c':'t':':':s) = Right $ QueryOptInAcct s
  145 parseMatcher d ('n':'o':'t':':':s) = case parseMatcher d s of
  146                                        Left m  -> Left $ MatchNot m
  147                                        Right _ -> Left MatchAny -- not:somequeryoption will be ignored
  148 parseMatcher _ ('d':'e':'s':'c':':':s) = Left $ MatchDesc s
  149 parseMatcher _ ('a':'c':'c':'t':':':s) = Left $ MatchAcct s
  150 parseMatcher d ('d':'a':'t':'e':':':s) =
  151         case parsePeriodExpr d s of Left _ -> Left MatchNone -- XXX should warn
  152                                     Right (_,span) -> Left $ MatchDate span
  153 parseMatcher d ('e':'d':'a':'t':'e':':':s) =
  154         case parsePeriodExpr d s of Left _ -> Left MatchNone -- XXX should warn
  155                                     Right (_,span) -> Left $ MatchEDate span
  156 parseMatcher _ ('s':'t':'a':'t':'u':'s':':':s) = Left $ MatchStatus $ parseStatus s
  157 parseMatcher _ ('r':'e':'a':'l':':':s) = Left $ MatchReal $ parseBool s
  158 parseMatcher _ ('e':'m':'p':'t':'y':':':s) = Left $ MatchEmpty $ parseBool s
  159 parseMatcher _ ('d':'e':'p':'t':'h':':':s) = Left $ MatchDepth $ readDef 0 s
  160 parseMatcher _ "" = Left $ MatchAny
  161 parseMatcher d s = parseMatcher d $ defaultprefix++":"++s
  162 
  163 -- | Parse the boolean value part of a "status:" matcher, allowing "*" as
  164 -- another way to spell True, similar to the journal file format.
  165 parseStatus :: String -> Bool
  166 parseStatus s = s `elem` (truestrings ++ ["*"])
  167 
  168 -- | Parse the boolean value part of a "status:" matcher. A true value can
  169 -- be spelled as "1", "t" or "true".
  170 parseBool :: String -> Bool
  171 parseBool s = s `elem` truestrings
  172 
  173 truestrings :: [String]
  174 truestrings = ["1","t","true"]
  175 
  176 -- | Convert a match expression to its inverse.
  177 negateMatcher :: Matcher -> Matcher
  178 negateMatcher =  MatchNot
  179 
  180 -- | Does the match expression match this posting ?
  181 matchesPosting :: Matcher -> Posting -> Bool
  182 matchesPosting (MatchNot m) p = not $ matchesPosting m p
  183 matchesPosting (MatchAny) _ = True
  184 matchesPosting (MatchNone) _ = False
  185 matchesPosting (MatchOr ms) p = any (`matchesPosting` p) ms
  186 matchesPosting (MatchAnd ms) p = all (`matchesPosting` p) ms
  187 matchesPosting (MatchDesc r) p = regexMatchesCI r $ maybe "" tdescription $ ptransaction p
  188 matchesPosting (MatchAcct r) p = regexMatchesCI r $ paccount p
  189 matchesPosting (MatchDate span) p =
  190     case d of Just d'  -> spanContainsDate span d'
  191               Nothing -> False
  192     where d = maybe Nothing (Just . tdate) $ ptransaction p
  193 matchesPosting (MatchEDate span) p =
  194     case postingEffectiveDate p of Just d  -> spanContainsDate span d
  195                                    Nothing -> False
  196 matchesPosting (MatchStatus v) p = v == postingCleared p
  197 matchesPosting (MatchReal v) p = v == isReal p
  198 matchesPosting (MatchEmpty v) Posting{pamount=a} = v == isZeroMixedAmount a
  199 matchesPosting _ _ = False
  200 
  201 -- | Does the match expression match this transaction ?
  202 matchesTransaction :: Matcher -> Transaction -> Bool
  203 matchesTransaction (MatchNot m) t = not $ matchesTransaction m t
  204 matchesTransaction (MatchAny) _ = True
  205 matchesTransaction (MatchNone) _ = False
  206 matchesTransaction (MatchOr ms) t = any (`matchesTransaction` t) ms
  207 matchesTransaction (MatchAnd ms) t = all (`matchesTransaction` t) ms
  208 matchesTransaction (MatchDesc r) t = regexMatchesCI r $ tdescription t
  209 matchesTransaction m@(MatchAcct _) t = any (m `matchesPosting`) $ tpostings t
  210 matchesTransaction (MatchDate span) t = spanContainsDate span $ tdate t
  211 matchesTransaction (MatchEDate span) t = spanContainsDate span $ transactionEffectiveDate t
  212 matchesTransaction (MatchStatus v) t = v == tstatus t
  213 matchesTransaction (MatchReal v) t = v == hasRealPostings t
  214 matchesTransaction _ _ = False
  215 
  216 postingEffectiveDate :: Posting -> Maybe Day
  217 postingEffectiveDate p = maybe Nothing (Just . transactionEffectiveDate) $ ptransaction p
  218 
  219 transactionEffectiveDate :: Transaction -> Day
  220 transactionEffectiveDate t = case teffectivedate t of Just d  -> d
  221                                                       Nothing -> tdate t
  222 
  223 -- | Does the match expression match this account ?
  224 -- A matching in: clause is also considered a match.
  225 matchesAccount :: Matcher -> AccountName -> Bool
  226 matchesAccount (MatchNot m) a = not $ matchesAccount m a
  227 matchesAccount (MatchAny) _ = True
  228 matchesAccount (MatchNone) _ = False
  229 matchesAccount (MatchOr ms) a = any (`matchesAccount` a) ms
  230 matchesAccount (MatchAnd ms) a = all (`matchesAccount` a) ms
  231 matchesAccount (MatchAcct r) a = regexMatchesCI r a
  232 matchesAccount _ _ = False
  233 
  234 -- | What start date does this matcher specify, if any ?
  235 -- If the matcher is an OR expression, returns the earliest of the alternatives.
  236 -- When the flag is true, look for a starting effective date instead.
  237 matcherStartDate :: Bool -> Matcher -> Maybe Day
  238 matcherStartDate effective (MatchOr ms) = earliestMaybeDate $ map (matcherStartDate effective) ms
  239 matcherStartDate effective (MatchAnd ms) = latestMaybeDate $ map (matcherStartDate effective) ms
  240 matcherStartDate False (MatchDate (DateSpan (Just d) _)) = Just d
  241 matcherStartDate True (MatchEDate (DateSpan (Just d) _)) = Just d
  242 matcherStartDate _ _ = Nothing
  243 
  244 -- | Does this matcher specify a start date and nothing else (that would
  245 -- filter postings prior to the date) ?
  246 -- When the flag is true, look for a starting effective date instead.
  247 matcherIsStartDateOnly :: Bool -> Matcher -> Bool
  248 matcherIsStartDateOnly _ MatchAny = False
  249 matcherIsStartDateOnly _ MatchNone = False
  250 matcherIsStartDateOnly effective (MatchOr ms) = and $ map (matcherIsStartDateOnly effective) ms
  251 matcherIsStartDateOnly effective (MatchAnd ms) = and $ map (matcherIsStartDateOnly effective) ms
  252 matcherIsStartDateOnly False (MatchDate (DateSpan (Just _) _)) = True
  253 matcherIsStartDateOnly True (MatchEDate (DateSpan (Just _) _)) = True
  254 matcherIsStartDateOnly _ _ = False
  255 
  256 -- | Does this matcher match everything ?
  257 matcherIsNull MatchAny = True
  258 matcherIsNull (MatchAnd []) = True
  259 matcherIsNull (MatchNot (MatchOr [])) = True
  260 matcherIsNull _ = False
  261 
  262 -- | What is the earliest of these dates, where Nothing is earliest ?
  263 earliestMaybeDate :: [Maybe Day] -> Maybe Day
  264 earliestMaybeDate = headDef Nothing . sortBy compareMaybeDates
  265 
  266 -- | What is the latest of these dates, where Nothing is earliest ?
  267 latestMaybeDate :: [Maybe Day] -> Maybe Day
  268 latestMaybeDate = headDef Nothing . sortBy (flip compareMaybeDates)
  269 
  270 -- | Compare two maybe dates, Nothing is earliest.
  271 compareMaybeDates :: Maybe Day -> Maybe Day -> Ordering
  272 compareMaybeDates Nothing Nothing = EQ
  273 compareMaybeDates Nothing (Just _) = LT
  274 compareMaybeDates (Just _) Nothing = GT
  275 compareMaybeDates (Just a) (Just b) = compare a b
  276 
  277 tests_Hledger_Data_Matching :: Test
  278 tests_Hledger_Data_Matching = TestList
  279  [
  280 
  281   "parseQuery" ~: do
  282     let d = parsedate "2011/1/1"
  283     parseQuery d "a" `is` (MatchAcct "a", [])
  284     parseQuery d "acct:a" `is` (MatchAcct "a", [])
  285     parseQuery d "acct:a desc:b" `is` (MatchAnd [MatchAcct "a", MatchDesc "b"], [])
  286     parseQuery d "\"acct:expenses:autres d\233penses\"" `is` (MatchAcct "expenses:autres d\233penses", [])
  287     parseQuery d "not:desc:'a b'" `is` (MatchNot $ MatchDesc "a b", [])
  288 
  289     parseQuery d "inacct:a desc:b" `is` (MatchDesc "b", [QueryOptInAcct "a"])
  290     parseQuery d "inacct:a inacct:b" `is` (MatchAny, [QueryOptInAcct "a", QueryOptInAcct "b"])
  291 
  292     parseQuery d "status:1" `is` (MatchStatus True, [])
  293     parseQuery d "status:0" `is` (MatchStatus False, [])
  294     parseQuery d "status:" `is` (MatchStatus False, [])
  295     parseQuery d "real:1" `is` (MatchReal True, [])
  296 
  297   ,"matchesAccount" ~: do
  298     assertBool "positive acct match" $ matchesAccount (MatchAcct "b:c") "a:bb:c:d"
  299     -- assertBool "acct should match at beginning" $ not $ matchesAccount (MatchAcct True "a:b") "c:a:b"
  300 
  301   ,"matchesPosting" ~: do
  302     -- matching posting status..
  303     assertBool "positive match on true posting status"  $
  304                    (MatchStatus True)  `matchesPosting` nullposting{pstatus=True}
  305     assertBool "negative match on true posting status"  $
  306                not $ (MatchNot $ MatchStatus True)  `matchesPosting` nullposting{pstatus=True}
  307     assertBool "positive match on false posting status" $
  308                    (MatchStatus False) `matchesPosting` nullposting{pstatus=False}
  309     assertBool "negative match on false posting status" $
  310                not $ (MatchNot $ MatchStatus False) `matchesPosting` nullposting{pstatus=False}
  311     assertBool "positive match on true posting status acquired from transaction" $
  312                    (MatchStatus True) `matchesPosting` nullposting{pstatus=False,ptransaction=Just nulltransaction{tstatus=True}}
  313     assertBool "real:1 on real posting" $ (MatchReal True) `matchesPosting` nullposting{ptype=RegularPosting}
  314     assertBool "real:1 on virtual posting fails" $ not $ (MatchReal True) `matchesPosting` nullposting{ptype=VirtualPosting}
  315     assertBool "real:1 on balanced virtual posting fails" $ not $ (MatchReal True) `matchesPosting` nullposting{ptype=BalancedVirtualPosting}
  316 
  317  ]